#SPSP Introduction to R #Video 7 Script: t Tests #Set working directory if needed---- setwd("C:/Users/.../Desktop/LearningR/IntroR") # #Installing and attaching packages---- PacMan <- function(pkg){new.pkg <- pkg[!(pkg %in% installed.packages() [,"Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, library, character.only = TRUE)} pkgs <- c("ggplot2", "nortest", "apa", "MKinfer", "cowplot", "car", "dplyr", "coin") PacMan(pkgs) # #Import data---- grade_DF <- read.csv( "gradeData_2020.csv", header=TRUE, na.strings=c("NA", " ", "")) View(grade_DF) # #One-sample t test---- ##Checking Assumptions---- #Univariate normality and outliers #Histogram of SAT breaks_SAT <- hist(grade_DF$SAT, breaks="Sturges", plot=FALSE) ggplot(data=grade_DF, aes(SAT)) + geom_histogram(alpha=0.8, breaks=breaks_SAT$breaks, col="white") + labs(title="Histogram SAT") + theme_classic() # #Boxplot of SAT ggplot(data=grade_DF, aes(x="",y=SAT)) + geom_boxplot(width=.4, outlier.shape = NA) + geom_jitter(size=1, shape=1, width=.1, color="blue") + labs(title="SAT Boxplot", x="", y="SAT (Mdn)") + theme_classic() #Boxplot with outliers noted ggplot(data=grade_DF, aes(x="",y=SAT)) + geom_boxplot(width=.4, outlier.color = "red") + labs(title="SAT Boxplot", x="", y="SAT (Mdn)") + theme_classic() # #Q-Q plot qqnorm(grade_DF$SAT) qqline(grade_DF$SAT, col="blue") #Match other sources (e.g., SPSS) qqnorm(grade_DF$SAT, datax=T) #Check normality quantitatively library(nortest) lillie.test(grade_DF$SAT) shapiro.test(grade_DF$SAT) ##Performing one-sample t test---- popMean <- 531 SAT_t <- t.test(grade_DF$SAT, mu=popMean, alternative="two.sided", conf.level = .95) SAT_t #One-tailed (directional) t test t.test(grade_DF$SAT, mu=popMean, alternative = "greater") # ##Reporting Results---- library(apa) t_apa(SAT_t, format = "doc") # SAT_t$estimate mean(grade_DF$SAT) sd(grade_DF$SAT) #Visual for results ggplot(grade_DF, aes(x="", y=SAT)) + stat_summary(fun="mean",geom="bar", alpha=.8)+ geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96, width=.3) + coord_cartesian(ylim=c(500,550)) + geom_point(x=1, y=popMean, col="red") + annotate(geom="text", x=1.03, y=popMean-2, label=expression(mu), col="red", size=5) + labs(x="SAT",y="mean") # ##Considering Alternatives---- #Wilcoxon Test wilcox.test(grade_DF$SAT, mu=popMean, alternative="two.sided") SAT_t # #Bootstrap install.packages("MKinfer", dependencies = T) library(MKinfer) boot.t.test(grade_DF$SAT, mu=popMean, alternative="two.sided", R=5000) # # #Ind-sample t test---- #Create separate object with students #from private and religious schools grade_DF_priv <- filter( grade_DF, schoolType=="private"| schoolType=="religious") table(grade_DF_priv$schoolType) grade_DF_priv$schoolType <- droplevels(grade_DF_priv$schoolType) table(grade_DF_priv$schoolType) #Independent Private grade_DF_privIND <- filter( grade_DF, schoolType=="private") #Religious Private grade_DF_privREL <- filter( grade_DF, schoolType=="religious") ##Checking Assumptions---- #Univariate normality and outliers #Histogram of SAT by School Type ggplot(data=grade_DF_priv, aes(SAT, fill=schoolType)) + geom_histogram(position="identity", alpha=0.8, binwidth=(sd(grade_DF_priv$SAT)/3)) + scale_fill_brewer(palette = "Set1")+ labs(title="Histogram SAT") + theme_classic() #Histogram SAT for Independent Private School Ind_SAT_Hist <- ggplot(data=grade_DF_privIND, aes(SAT)) + geom_histogram(position="identity", alpha=0.8, binwidth=20, col="white") + labs(title="Histogram SAT", subtitle="Independent Private") + geom_vline(xintercept= mean(grade_DF_privIND$SAT))+ theme_classic() + coord_cartesian(xlim=c(500, 675), ylim=c(0,10)) Ind_SAT_Hist #Histogram SAT for Religious Private School Rel_SAT_Hist <- ggplot(data=grade_DF_privREL, aes(SAT)) + geom_histogram(position="identity", alpha=0.8, binwidth=20, col="white") + labs(title="Histogram SAT", subtitle="Religious Private") + geom_vline(xintercept= mean(grade_DF_privREL$SAT))+ theme_classic() + coord_cartesian(xlim=c(500, 675), ylim=c(0, 10)) Rel_SAT_Hist #Combine Histograms into One Plot library(cowplot) plot_grid(Ind_SAT_Hist, Rel_SAT_Hist, labels=c("Ind", "Rel"), label_size = 10) # #Boxplot of SAT ggplot(data=grade_DF_priv, aes(x=schoolType, y=SAT, color=schoolType)) + geom_boxplot(width=.4, outlier.shape = NA) + geom_jitter(size=1, shape=1, width=.1, color="blue") + labs(title="SAT Boxplot", x="School Type", y="SAT (Mdn)") + theme_classic() #Boxplot with outliers noted ggplot(data=grade_DF_priv, aes(x=schoolType, y=SAT, color=schoolType)) + geom_boxplot(width=.4, outlier.color = "red") + labs(title="SAT Boxplot", x="School Type", y="SAT (Mdn)") + theme_classic() + theme(legend.position="none") + scale_color_manual(values=c("blue","purple")) #Change outlier criteria to 1.25*IQR ggplot(data=grade_DF_priv, aes(x=schoolType, y=SAT, color=schoolType)) + geom_boxplot(width=.4, outlier.color = "red", coef=1.25) + labs(title="SAT Boxplot", x="School Type", y="SAT (Mdn)") + theme_classic() + theme(legend.position="none") + scale_color_manual(values=c("blue","purple")) # #Check normality quantitatively library(nortest) lillie.test(grade_DF_privIND$SAT) lillie.test(grade_DF_privREL$SAT) # shapiro.test(grade_DF_privIND$SAT) shapiro.test(grade_DF_privREL$SAT) # #Check homogeneity of variance library(car) leveneTest(SAT~schoolType, data=grade_DF_priv) # ##Performing ind-sample t test---- SAT_priv_t <- t.test(SAT~schoolType, data=grade_DF_priv, alternative="two.sided", conf.level = .95, var.equal=T, paired=F) SAT_priv_t #One-tailed (directional) t test t.test(SAT~schoolType, data=grade_DF_priv, alternative = "greater") # ##Reporting Results---- library(apa) t_apa(SAT_priv_t, format = "doc") # #Descriptives by group library(dplyr) SATbySchoolTab <- grade_DF_priv %>% group_by(schoolType) %>% summarize(Grp_n=n(), SATmean=mean(SAT), SATsd=sd(SAT), SATse=SATsd/sqrt(Grp_n), LL95=SATmean-(SATse*qt(.975,Grp_n-1)), UL95=SATmean+(SATse*qt(.975,Grp_n-1))) %>% mutate_if(is.numeric, round, 3) View(SATbySchoolTab) #Visual for results ggplot(grade_DF_priv, aes(x=schoolType, y=SAT)) + stat_summary(fun="mean",geom="bar") + geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96, width=.3) + coord_cartesian(ylim=c(400,600)) # ##Considering Alternatives---- #Welch's t t.test(SAT~schoolType, data=grade_DF_priv, alternative="two.sided", conf.level = .95, var.equal=F) # #Wilcoxon-Mann-Whitney Test library(coin) wilcox_test(SAT~schoolType, data=grade_DF_priv, alternative="two.sided") # #Bootstrap library(MKinfer) boot.t.test(SAT~schoolType, data=grade_DF_priv, alternative="two.sided", R=5000) #