#SPSP Introduction to R
#Video 7 Script: t Tests
#Set working directory if needed----
setwd("C:/Users/.../Desktop/LearningR/IntroR")
#
#Installing and attaching packages----
PacMan <- function(pkg){new.pkg <- 
  pkg[!(pkg %in% installed.packages()
        [,"Package"])] 
if (length(new.pkg))         
  install.packages(new.pkg, 
                   dependencies = TRUE)     
sapply(pkg, library, character.only = TRUE)}
pkgs <- c("ggplot2", "nortest", "apa", 
          "MKinfer", "cowplot", "car", 
          "dplyr", "coin")
PacMan(pkgs)
#
#Import data----
grade_DF <- read.csv(
  "gradeData_2020.csv", 
  header=TRUE, 
  na.strings=c("NA", " ", ""))
View(grade_DF)
#
#One-sample t test----
##Checking Assumptions----
#Univariate normality and outliers
#Histogram of SAT
breaks_SAT <- hist(grade_DF$SAT, 
                   breaks="Sturges",
                   plot=FALSE)
ggplot(data=grade_DF, aes(SAT)) +
  geom_histogram(alpha=0.8,  
             breaks=breaks_SAT$breaks, 
             col="white") +
  labs(title="Histogram SAT") +
  theme_classic()
#
#Boxplot of SAT
ggplot(data=grade_DF, aes(x="",y=SAT)) +
  geom_boxplot(width=.4, 
               outlier.shape = NA) +
  geom_jitter(size=1, shape=1,
              width=.1, 
              color="blue") +
  labs(title="SAT Boxplot", 
       x="", y="SAT (Mdn)") +
  theme_classic()
#Boxplot with outliers noted
ggplot(data=grade_DF, aes(x="",y=SAT)) +
  geom_boxplot(width=.4,
               outlier.color = "red") +
  labs(title="SAT Boxplot", 
       x="", y="SAT (Mdn)") +
  theme_classic()
#
#Q-Q plot
qqnorm(grade_DF$SAT)
qqline(grade_DF$SAT, 
       col="blue")
#Match other sources (e.g., SPSS)
qqnorm(grade_DF$SAT, datax=T)
#Check normality quantitatively
library(nortest)
lillie.test(grade_DF$SAT)
shapiro.test(grade_DF$SAT)
##Performing one-sample t test----
popMean <- 531
SAT_t <- t.test(grade_DF$SAT, 
                mu=popMean, 
                alternative="two.sided", 
                conf.level = .95)
SAT_t
#One-tailed (directional) t test
t.test(grade_DF$SAT, 
       mu=popMean, 
       alternative = "greater")
#
##Reporting Results----
library(apa)
t_apa(SAT_t, format = "doc")
#
SAT_t$estimate
mean(grade_DF$SAT)
sd(grade_DF$SAT)
#Visual for results
ggplot(grade_DF, aes(x="", y=SAT)) +
  stat_summary(fun="mean",geom="bar", alpha=.8)+
  geom_errorbar(stat="summary", 
                fun.data="mean_se", 
                fun.args=1.96, 
                width=.3) +
  coord_cartesian(ylim=c(500,550)) +
  geom_point(x=1, y=popMean, col="red") +
  annotate(geom="text", 
           x=1.03, y=popMean-2, 
           label=expression(mu), 
           col="red", size=5) +
  labs(x="SAT",y="mean")
#
##Considering Alternatives----
#Wilcoxon Test
wilcox.test(grade_DF$SAT, 
            mu=popMean, 
            alternative="two.sided")
SAT_t
#
#Bootstrap 
install.packages("MKinfer", 
                 dependencies = T)
library(MKinfer)
boot.t.test(grade_DF$SAT, 
            mu=popMean, 
            alternative="two.sided", 
            R=5000)
#
#
#Ind-sample t test----
#Create separate object with students 
#from private and religious schools
grade_DF_priv <- filter(
    grade_DF,
    schoolType=="private"|
      schoolType=="religious")
table(grade_DF_priv$schoolType)
grade_DF_priv$schoolType <- 
  droplevels(grade_DF_priv$schoolType)
table(grade_DF_priv$schoolType)
#Independent Private
grade_DF_privIND <- filter(
  grade_DF,
  schoolType=="private")
#Religious Private
grade_DF_privREL <- filter(
  grade_DF,
  schoolType=="religious")
##Checking Assumptions----
#Univariate normality and outliers
#Histogram of SAT by School Type
ggplot(data=grade_DF_priv, 
       aes(SAT, fill=schoolType)) +
  geom_histogram(position="identity",
    alpha=0.8, 
    binwidth=(sd(grade_DF_priv$SAT)/3)) +
  scale_fill_brewer(palette = "Set1")+
  labs(title="Histogram SAT") +
  theme_classic()
#Histogram SAT for Independent Private School
Ind_SAT_Hist <- ggplot(data=grade_DF_privIND, 
       aes(SAT)) +
  geom_histogram(position="identity",
          alpha=0.8, 
          binwidth=20, 
          col="white") +
  labs(title="Histogram SAT",
      subtitle="Independent Private") +
  geom_vline(xintercept=
               mean(grade_DF_privIND$SAT))+
  theme_classic() +
  coord_cartesian(xlim=c(500, 675), 
                  ylim=c(0,10))
Ind_SAT_Hist
#Histogram SAT for Religious Private School
Rel_SAT_Hist <- ggplot(data=grade_DF_privREL, 
       aes(SAT)) +
  geom_histogram(position="identity",
                 alpha=0.8, 
                 binwidth=20, 
                 col="white") +
  labs(title="Histogram SAT",
       subtitle="Religious Private") +
  geom_vline(xintercept=
               mean(grade_DF_privREL$SAT))+
  theme_classic() +
  coord_cartesian(xlim=c(500, 675), 
                  ylim=c(0, 10))
Rel_SAT_Hist
#Combine Histograms into One Plot
library(cowplot)
plot_grid(Ind_SAT_Hist, Rel_SAT_Hist, 
          labels=c("Ind", "Rel"), 
          label_size = 10)
#
#Boxplot of SAT
ggplot(data=grade_DF_priv, 
       aes(x=schoolType, 
           y=SAT, color=schoolType)) +
  geom_boxplot(width=.4, 
               outlier.shape = NA) +
  geom_jitter(size=1, shape=1,
              width=.1, 
              color="blue") +
  labs(title="SAT Boxplot", 
       x="School Type", y="SAT (Mdn)") +
  theme_classic()
#Boxplot with outliers noted
ggplot(data=grade_DF_priv, 
       aes(x=schoolType, 
           y=SAT, color=schoolType)) +
  geom_boxplot(width=.4,
               outlier.color = "red") +
  labs(title="SAT Boxplot", 
       x="School Type", y="SAT (Mdn)") +
  theme_classic() +
  theme(legend.position="none") +
  scale_color_manual(values=c("blue","purple"))
#Change outlier criteria to 1.25*IQR
ggplot(data=grade_DF_priv, 
       aes(x=schoolType, 
           y=SAT, color=schoolType)) +
  geom_boxplot(width=.4,
               outlier.color = "red", 
               coef=1.25) +
  labs(title="SAT Boxplot", 
       x="School Type", y="SAT (Mdn)") +
  theme_classic() +
  theme(legend.position="none") +
  scale_color_manual(values=c("blue","purple"))
#
#Check normality quantitatively
library(nortest)
lillie.test(grade_DF_privIND$SAT)
lillie.test(grade_DF_privREL$SAT)
#
shapiro.test(grade_DF_privIND$SAT)
shapiro.test(grade_DF_privREL$SAT)
#
#Check homogeneity of variance
library(car)
leveneTest(SAT~schoolType, data=grade_DF_priv)
#
##Performing ind-sample t test----
SAT_priv_t <- t.test(SAT~schoolType, 
                     data=grade_DF_priv,
                     alternative="two.sided", 
                     conf.level = .95, 
                     var.equal=T, 
                     paired=F)
SAT_priv_t
#One-tailed (directional) t test
t.test(SAT~schoolType, 
       data=grade_DF_priv,
       alternative = "greater")
#
##Reporting Results----
library(apa)
t_apa(SAT_priv_t, format = "doc")
#
#Descriptives by group
library(dplyr)
SATbySchoolTab <- grade_DF_priv %>% 
  group_by(schoolType) %>% 
  summarize(Grp_n=n(),
    SATmean=mean(SAT),
    SATsd=sd(SAT),  
    SATse=SATsd/sqrt(Grp_n),
    LL95=SATmean-(SATse*qt(.975,Grp_n-1)),
    UL95=SATmean+(SATse*qt(.975,Grp_n-1))) %>% 
  mutate_if(is.numeric, round, 3)
View(SATbySchoolTab)
#Visual for results
ggplot(grade_DF_priv, aes(x=schoolType, y=SAT)) +
  stat_summary(fun="mean",geom="bar") +
  geom_errorbar(stat="summary", 
                fun.data="mean_se", 
                fun.args=1.96, 
                width=.3) +
  coord_cartesian(ylim=c(400,600))
#
##Considering Alternatives----
#Welch's t
t.test(SAT~schoolType, 
       data=grade_DF_priv,
       alternative="two.sided", 
       conf.level = .95, 
       var.equal=F)
#
#Wilcoxon-Mann-Whitney Test
library(coin)
wilcox_test(SAT~schoolType, 
            data=grade_DF_priv, 
            alternative="two.sided")
#
#Bootstrap 
library(MKinfer)
boot.t.test(SAT~schoolType, 
            data=grade_DF_priv, 
            alternative="two.sided", 
            R=5000)
#