#SPSP Introduction to R #Video 8 Script: ANOVAs #Set working directory if needed---- setwd("C:/Users/.../Desktop/LearningR/IntroR") # #Installing and attaching packages---- PacMan <- function(pkg){new.pkg <- pkg[!(pkg %in% installed.packages() [,"Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, library, character.only = TRUE)} pkgs <- c("ggplot2","dplyr", "nortest", "apa", "apaTables", "car", "ez", "multcompView", "multcomp", "reshape2", "additivityTests", "PMCMRplus") PacMan(pkgs) # #Import data---- grade_DF <- read.csv( "gradeData_2020_v2.csv", header=TRUE, na.strings=c("NA", " ", "")) View(grade_DF) # #Between-Subject ANOVA---- ##Checking Assumptions---- #Univariate normality and outliers #Histogram of SAT by School Type ggplot(data=grade_DF, aes(x=SAT)) + geom_histogram(position="identity", alpha=0.8, binwidth = 10) + theme_classic() + facet_wrap(~schoolType) # #Boxplot: SAT by School Type with outliers ggplot(data=grade_DF, aes(x=schoolType, y=SAT, color=schoolType)) + geom_boxplot(width=.4, outlier.color = "red") + labs(title="SAT Boxplot", x="School Type", y="SAT") + theme_classic() + theme(legend.position="none") + scale_color_brewer(palette="Dark2") # #Check normality quantitatively #KS with Lilliefors correction library(dplyr) library(nortest) # LillTestObj <- lillie.test(grade_DF$SAT) View(LillTestObj) LillTestObj$statistic LillTestObj$p.value # LillieBySchool <- grade_DF %>% group_by(schoolType) %>% summarize( Lillie_D=lillie.test(SAT)$statistic, Lillie_pVal=lillie.test(SAT)$p.value) %>% mutate_if(is.numeric, round, 3) View(LillieBySchool) # LillieBySchool2 <- grade_DF %>% group_by(schoolType) %>% summarize( Lillie_D=lillie.test(SAT)[[1]], Lillie_pVal=lillie.test(SAT)[[2]]) %>% mutate_if(is.numeric, round, 3) View(LillieBySchool2) # LillieBySchool3 <- grade_DF %>% group_by(schoolType) %>% summarize( Lillie_D=lillie.test(SAT)[1][[1]], Lillie_pVal=lillie.test(SAT)[[2]]) %>% mutate_if(is.numeric, round, 3) View(LillieBySchool3) # #Shapiro ShapiroTestObj <- shapiro.test(grade_DF$SAT) View(ShapiroTestObj) View(LillTestObj) # ShapiroBySchool <- grade_DF %>% group_by(schoolType) %>% summarize( Shapiro_W=shapiro.test(SAT)$statistic, Shapiro_pVal=shapiro.test(SAT)$p.value) %>% mutate_if(is.numeric, round, 3) View(ShapiroBySchool) # #To learn more about extracting look into #Extract function from base package help(Extract) # # #Check homogeneity of variance library(car) leveneTest(SAT~schoolType, data=grade_DF) # leveneTest(SAT~schoolType, data=grade_DF, center="mean") # ##Performing Between-Subject ANOVA---- contrasts(grade_DF$schoolType) <- contr.sum SATaov <- aov(SAT~schoolType, data=grade_DF) summary.aov(SATaov) #Type III Sum of Squares Anova(SATaov, type=3) #Performing test with ezANOVA library(ez) ezSATaov <- ezANOVA(data=grade_DF, dv=SAT, wid=Part_ID, between=schoolType, type=3, return_aov=T, detailed=F) grade_DF$ID <- rownames(grade_DF) View(grade_DF) ezSATaov #Convert Part_ID to factor grade_DF$Part_ID <- factor(grade_DF$Part_ID) #Conduct between-sub ANOVA with ezANOVA ezSATaov2 <- ezANOVA(data=grade_DF, dv=SAT, wid=Part_ID, between=schoolType, type=3, return_aov=T, detailed=T) #Look at output from ezANOVA ezSATaov ezSATaov2 View(Anova(ezSATaov$aov, type=3)) # #Post-Hoc Tests TukeyHSD(ezSATaov$aov) # ##Reporting Results---- #Omnibus test library(apa) anova_apa(ezSATaov$aov, format = "doc") # library(apaTables) #Omnibus test in table apa.ezANOVA.table(ezSATaov, filename="ezSAT.doc") #Omnibus test table with CI apa.aov.table(ezSATaov$aov, filename="aovSAT.doc", type=3, conf.level = .95) #Descriptives by Group #Mean with CI and SD apa.1way.table(iv=schoolType, dv=SAT, data=grade_DF, filename="OneWayTab.doc", show.conf.interval = T) #Report Post-Hoc PostAOV <- TukeyHSD(ezSATaov$aov) View(round(PostAOV$schoolType, 3)) write.csv(round(PostAOV$schoolType, 3), file = "PostHoc.csv") # #Descriptive by group library(dplyr) SATbySchoolTab <- grade_DF %>% group_by(schoolType) %>% summarize(Grp_n=n(), SATmean=mean(SAT), SATsd=sd(SAT), SATse=SATsd/sqrt(Grp_n), LL95=SATmean- (SATse*qt(.975,Grp_n-1)), UL95=SATmean+ (SATse*qt(.975,Grp_n-1))) %>% mutate_if(is.numeric, round, 3) View(SATbySchoolTab) #Visual for results ggplot(grade_DF, aes(x=schoolType, y=SAT)) + stat_summary(fun="mean",geom="bar") + geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96, width=.3) + coord_cartesian(ylim=c(400,600)) # #Visual with letter for significance install.packages("multcompView", dependencies = T) library(multcompView) TukLett <- multcompLetters( PostAOV$schoolType[,"p adj"]) View(TukLett$Letters) TukLett_DF <- data.frame(TukLett$Letters) TukLett_DF$schoolType <- factor( rownames(TukLett_DF)) TukLett_DF2 <- merge(SATbySchoolTab, TukLett_DF) View(TukLett_DF2) # library(ggplot2) ggplot(data=grade_DF, aes(x=schoolType, y=SAT)) + geom_boxplot(width=.4, outlier.color = "red") + labs(title="SAT by School Type", x="School Type", y="SAT") + theme_classic() + theme(legend.position="none") + scale_color_brewer(palette="Set1") + #Use TukLett_DF2 to add letter(s) based on Tukey HSD test geom_text(data=TukLett_DF2, aes(x=schoolType, y=(SATmean+2*SATsd), label=TukLett.Letters, color=TukLett.Letters), nudge_x = .1, size=6) #Bar Graph ggplot(data=grade_DF, aes(x=schoolType, y=SAT)) + stat_summary(fun="mean",geom="bar")+ geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96, width=.3) + labs(title="SAT by School Type", x="School Type", y="SAT") + theme_classic() + theme(legend.position="none", plot.title=element_text(hjust=.5)) + scale_color_brewer(palette="Set1") + #Use TukLett_DF2 to add letter(s) based on Tukey HSD test geom_text(data=TukLett_DF2, aes(x=schoolType, y=SATmean+50, label=TukLett.Letters, color=TukLett.Letters), size=8, nudge_x = .12) #Bar Graph using one data frame #instead of grade_DF and TukLett_DF2 both View(TukLett_DF2) ggplot(data=TukLett_DF2, aes(x=schoolType, y=SATmean)) + geom_bar(stat="identity", width=.6) + geom_errorbar(aes(ymin=LL95, ymax=UL95), width=.3) + labs(title="SAT by School Type", x="School Type", y="SAT") + theme_classic() + theme(legend.position="none", plot.title=element_text(hjust=.5)) + scale_color_brewer(palette="Set1") + #Use TukLett_DF2 to add letter(s) based on Tukey HSD test geom_text(aes(y=SATmean+70, label=TukLett.Letters, color=TukLett.Letters), size=8, nudge_x = .12) #Other package options for visuals library(ggpubr) library(ggsignif) # ##Considering Alternatives---- kruskal.test(SAT~schoolType, data=grade_DF) pairWilcox <- pairwise.wilcox.test( x=grade_DF$SAT, g=grade_DF$schoolType, p.adj = "bonferroni", paired=F) View(round(pairWilcox$p.value, 4)) # #Bootstrapped results library(ez) bootAOV <- ezBoot(data=grade_DF, wid=Part_ID, dv=SAT, between=schoolType, iterations=500, resample_within=FALSE) ezPlot2(pred=bootAOV, x=schoolType) boot_DF <- data.frame(bootAOV$boots) View(boot_DF) BootTab <- boot_DF %>% group_by(schoolType) %>% summarize(N=n(),Mean=mean(value), LL= round(quantile( value, prob=.025), 3), UL= round(quantile( value, prob=.975), 3)) View(BootTab) #Visualize Bootstrapped Results ggplot(data=BootTab, aes(x=schoolType, y=Mean, fill=schoolType)) + geom_bar(stat="identity") + geom_errorbar(aes(ymin=LL, ymax=UL), width=.2) + scale_fill_brewer(palette="Set1") + labs(title="SAT", subtitle="by School Type", x="School Type", y="SAT") + theme_classic() + theme(axis.title = element_text(size = 12), plot.title=element_text(hjust=.5), plot.subtitle=element_text(hjust=.5), axis.text.x=element_text(size=12)) # # #Within-Subject ANOVA---- #Reshape Data from Wide Format to Long library(reshape2) Long_DF <- melt(grade_DF, id.vars=c("Part_ID","gender", "schoolType"), measure.vars=c("Exam1", "Exam2", "Exam3")) View(Long_DF) colnames(Long_DF)[ colnames(Long_DF)=="variable"] <- "ExamNum" View(Long_DF) ##Check Assumptions---- #Create Difference Scores #Check for Normality, Outliers grade_DF$ExamDiff_12 <- grade_DF$Exam1-grade_DF$Exam2 grade_DF$ExamDiff_13 <- grade_DF$Exam1-grade_DF$Exam3 grade_DF$ExamDiff_23 <- grade_DF$Exam2-grade_DF$Exam3 View(grade_DF) # hist(grade_DF$ExamDiff_12) boxplot(grade_DF$ExamDiff_12) # #DVs are associated DepVars <- as.matrix(subset.data.frame( grade_DF, select=c("Exam1", "Exam2", "Exam3"))) corTab <- round(cor(DepVars), 3) corTab corTab[upper.tri(corTab)] <- "" View(corTab) # library(additivityTests) tukey.test(DepVars) # #Sphericity ExamWithinAOVEZ <- ezANOVA( data=Long_DF, wid=Part_ID, dv=value, within=ExamNum, type=3, return_aov=TRUE) ExamWithinAOVEZ$`Mauchly's Test for Sphericity` # ##Conduct Within-Subjects Test---- ExamWithinAOVEZ <- ezANOVA( data=Long_DF, wid=Part_ID, dv=value, within=ExamNum, type=3, return_aov=TRUE) ExamWithinAOVEZ$ANOVA # #Post-Hoc Analyses ExamWithinAOV <- aov(value~ExamNum, data=Long_DF) library(multcomp) AOVmodel <- glht(ExamWithinAOV, linfct=mcp(ExamNum="Tukey"), test=adjusted(type="bonferroni")) summary(AOVmodel) confint(AOVmodel, level=.95) # #Planned contrasts t.test(grade_DF$Exam1, grade_DF$Exam2, paired=T) t.test(grade_DF$ExamDiff_12) # ##Report Findings---- model.tables(ExamWithinAOVEZ$aov, "means") #Descriptive by Group AOVsummTab<- Long_DF %>% group_by(ExamNum) %>% summarize(N=n(), mean=mean(value), sd=sd(value), se=sd/sqrt(N), ci=se*(qt(.975, N-1)), LL=mean-ci, UL=mean+ci) View(AOVsummTab) #Visualize Results ggplot(AOVsummTab, aes(x=ExamNum, y=mean)) + geom_bar(stat="identity", width=.6) + geom_errorbar(aes(ymin=LL, ymax=UL), width=.3)+ theme_classic()+ labs(title="Exam Scores", subtitle="with 95% CI", y="%")+ theme(axis.ticks.x=element_blank(), plot.title=element_text(hjust=.5), plot.subtitle=element_text(hjust=.5)) ##Consider Alternatives---- #Sphericity Corrections ExamWithinAOVEZ <- ezANOVA( data=Long_DF, wid=Part_ID, dv=value, within=ExamNum, type=3, return_aov=TRUE) ExamWithinAOVEZ$`Sphericity Corrections` # #Non-parametric test friedman.test(DepVars) #Post-hoc analyses library(PMCMRplus) friedmanTest(DepVars) FrdExact <- frdAllPairsExactTest(DepVars, p.adj="bonferroni") FrdExact$statistic # FrdConover <- frdAllPairsConoverTest(DepVars, p.adj="bonferroni") FrdConover$statistic FrdConover$p.value # #Introduction to Mixed ANOVA---- MixedAOV <- ezANOVA( data=Long_DF, wid=Part_ID, dv=value, within=ExamNum, between=schoolType, type=3, return_aov=TRUE) MixedAOV$`Mauchly's Test for Sphericity` MixedAOV$ANOVA # #Multivariate Test MixedAOV_LMfxn <- lm( cbind(Exam1, Exam2, Exam3)~schoolType, data=grade_DF) idata2 <- data.frame(ExamNum=ordered(1:3)) library(car) ModSum <- summary(Anova(MixedAOV_LMfxn, idata=idata2, idesign=~ExamNum), multivariate=TRUE, univariate=TRUE) # ModSum$multivariate.tests$ `schoolType:ExamNum`