#SPSP Introduction to R #Video 5 Script: Basic Associative Tests #Correlation and Chi Square #Set working directory if needed---- setwd("C:/Users/.../Desktop/LearningR/IntroR") # #Installing and attaching packages---- PacMan <- function(pkg){new.pkg <- pkg[!(pkg %in% installed.packages() [,"Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, library, character.only = TRUE)} pkgs <- c("ggplot2", "dplyr", "apa", "apaTables", "nortest", "MVN") PacMan(pkgs) # #Import data---- grade_DF <- read.csv( "gradeData_2020.csv", header=TRUE, na.strings=c("NA", " ", "")) # #Converting Final Exam Variable to Categorical---- grade_DF$Letter <- 0 grade_DF$Letter[grade_DF$FinalExam< 59.5] <- "F" grade_DF$Letter[grade_DF$FinalExam>=59.5 & grade_DF$FinalExam<69.5] <- "D" grade_DF$Letter[grade_DF$FinalExam>=69.5 & grade_DF$FinalExam<79.5] <- "C" grade_DF$Letter[grade_DF$FinalExam>=79.5 & grade_DF$FinalExam<89.5] <- "B" grade_DF$Letter[grade_DF$FinalExam>=89.5] <- "A" grade_DF$Letter <- factor(grade_DF$Letter) levels(grade_DF$Letter) View(grade_DF) # #Chi Square Test---- # #Data Screening and Assumption Testing---- GrdSchoolType <- chisq.test( grade_DF$Letter, grade_DF$schoolType, correct=FALSE) round(GrdSchoolType$expected, 2) LowExp <- sum(ifelse( GrdSchoolType$expected<=5,1,0)) Cells <- nlevels(grade_DF$Letter)* nlevels(grade_DF$schoolType) LowExp Cells #Percent of expectancies at or below 5 LowExp/Cells*100 # #Create combined letter grade for D and F levels(grade_DF$Letter) <- c(levels(grade_DF$Letter),"D or F") grade_DF$Letter[grade_DF$FinalExam<69.5] <- "D or F" grade_DF$Letter <- droplevels(grade_DF$Letter) View(grade_DF) levels(grade_DF$Letter) # #Screen new levels GrdSchoolType2 <- chisq.test( grade_DF$Letter, grade_DF$schoolType, correct=FALSE) round(GrdSchoolType2$expected, 2) LowExp <- sum(ifelse( GrdSchoolType2$expected<=5,1,0)) Cells <- nlevels(grade_DF$Letter)* nlevels(grade_DF$schoolType) LowExp Cells #Percent of expectancies at or below 5 LowExp/Cells*100 # #Performing Chi Square Test---- GrdSchoolType2 <- chisq.test( grade_DF$Letter, grade_DF$schoolType, correct=FALSE) GrdSchoolType2 # #Effect Size: Cramer's V CramerV <- function(x, y){ chiOut <- chisq.test(x, y, correct=FALSE) kx <- nlevels(x) ky <- nlevels(y) N <- sum(chiOut$observed [1:(kx*ky)]) V <- sqrt(chiOut$statistic/ (N*min((kx-1), (ky-1)))) names(V) <- "Cramer's V" return(c(chiOut$statistic, V)) } # CramerV(grade_DF$Letter, grade_DF$schoolType) # #Presenting Results---- #Letter & schoolType Tables GrdSchoolType$observed round(GrdSchoolType$expected, 2) # chisq_apa(GrdSchoolType2, format="docx") # #Considering Alternatives---- #Chi Square Output with Alt. tests X2_Out <- function(x, y){ chiOut <- chisq.test(x, y, correct=FALSE) df1 <- length(unique(x))-1 df2 <- length(unique(y))-1 N <- sum(chiOut$observed[ 1:((df1+1)*(df2+1))]) V <- round(sqrt(chiOut$statistic/ (N*min(df1, df2))), 3) ChiVal <- round(chiOut$statistic, 3) Chidf <- round(chiOut$parameter, 0) pVal <- format.pval(chiOut$p.value, eps=.001, digits=3) ChiResult <- data.frame(ChiVal, Chidf, pVal, V) # #Yates' Correction Yates <- chisq.test(x, y, correct=TRUE) ChiVal <- round(Yates$statistic, 3) Chidf <- round(Yates$parameter, 0) pVal <- format.pval(Yates$p.value, eps=.001, digits=3) V <- "" YatesCorrect <- data.frame(ChiVal, Chidf, pVal, V) # #Combine outputs ChiTab <- rbind.data.frame(ChiResult, YatesCorrect) names(ChiTab) <- c("Value", "df", "p Value", "Cramer's V") row.names(ChiTab) <- c( "Pearson's Chi Sq", "Yates' Correction") ChiTab } X2Result <- X2_Out(grade_DF$Letter, grade_DF$schoolType) View(X2Result) # #Use Monte Carlo simulation for p value chisq.test(grade_DF$Letter, grade_DF$schoolType, simulate.p.value = TRUE, B=5000) # #Bivariate Correlation---- # #Checking assumptions---- #Visually assess scatterplot ggplot(grade_DF, aes(x=SAT, y=FinalExam)) + geom_point(size=1, alpha=.2) + theme_classic() + geom_smooth(method=lm, color="black", fill="blue") #Univariate normality library(nortest) lillie.test(grade_DF$SAT) lillie.test(grade_DF$FinalExam) #Bivariate Normality library(MVN) names(grade_DF) NormTest <- mvn(grade_DF[,c(3,6)], mvnTest = "royston", univariateTest = "Lillie", multivariatePlot = "qq", univariatePlot = "qq") NormTest$multivariateNormality NormTest$univariateNormality # #Perform correlation---- cor.test(x=grade_DF$SAT, y=grade_DF$FinalExam, method="pearson") #APA correlation output CorrSATexam <- cor.test(x=grade_DF$SAT, y=grade_DF$FinalExam, method="pearson") cor_apa(CorrSATexam, format="docx") #Add confidence interval CorrSATexam CorrSATexam$conf.int #Table with confidence interval names(grade_DF) apa.cor.table(grade_DF[,c(3,6)], filename="CorOut.doc") # #Considering alternatives---- cor.test(x=grade_DF$SAT, y=grade_DF$FinalExam, method="kendall")