#SPSP Introduction to R #Video 4 Script: Visuals with ggplot2 #Set working directory if needed---- setwd("C:/Users/.../Desktop/LearningR/IntroR") # #Import data---- grade_DF <- read.csv( "gradeData_2020.csv", header=TRUE, na.strings=c("NA", " ", "")) #Check data imported correctly names(grade_DF) View(grade_DF) str(grade_DF) # #Attach ggplot2 package library(ggplot2) # #Frequency charts---- #Create base object SchoolFreq <- ggplot(data=grade_DF, aes(x=schoolType)) #Simple frequency chart SchoolFreq + geom_bar() #Frequency chart by gender SchoolFreq + geom_bar(aes(fill=gender), position="dodge") #Check frequencies schoolType by gender table(grade_DF$gender, grade_DF$schoolType) #Change fonts (size, face), #colors (scale_fill_brewer), #and titles (labs) SchoolFreq + geom_bar(aes(fill=gender), position="dodge") + scale_fill_brewer(palette = "Set1") + labs(title="Frequency School Type by Gender", x="School Type", y="Freq (n)", fill="Gender") + theme_classic() + theme(plot.title=(element_text( size=11, face="bold",hjust=.5))) # #Bar Graph: Continuous by Group---- SATbySchool <- ggplot(grade_DF, aes(x=schoolType, y=SAT)) #Create bar chart with mean instead of count SATbySchool + stat_summary(aes(fill=schoolType),fun="mean",geom="bar") #Focus on 400 to 600 for y axis (coord_cartesian) SATbySchool + stat_summary(aes(fill=schoolType),fun="mean",geom="bar")+ coord_cartesian(ylim=c(400,600)) #Add error bars (95% CI) SATbySchool + stat_summary(aes(fill=schoolType),fun="mean",geom="bar")+ geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96) + coord_cartesian(ylim=c(400,600)) #Improve appeal #Narrow the width of the error bars (width) #Add title and labels for x and y axes (labs) #Set font for title (size, face) #Adjust the horizontal placement of title (hjust) SATbySchool + stat_summary(aes(fill=schoolType),fun="mean",geom="bar") + geom_errorbar(stat="summary", fun.data="mean_se", fun.args=1.96, width=.6) + coord_cartesian(ylim=c(400,600)) + scale_fill_brewer(palette = "Set1") + labs(title="SAT by School Type", x="School Type", y="SAT (M)") + theme_classic() + theme(plot.title=(element_text( size=11, face="bold",hjust=.5))) # #Bar graph using dplyr table from Video 3 library(dplyr) SATbySchoolTab <- grade_DF %>% group_by(schoolType) %>% summarize(Grp_n=n(), SATmean=mean(SAT), SATsd=sd(SAT), SATse=SATsd/sqrt(Grp_n), LL95=SATmean-(SATse*qt(.975,Grp_n-1)), UL95=SATmean+(SATse*qt(.975,Grp_n-1))) View(SATbySchoolTab) #Chart from dplyr table ggplot(data=SATbySchoolTab, aes(x=schoolType, y=SATmean)) + geom_bar(aes(fill=schoolType), stat="identity") + geom_errorbar( aes(ymin=LL95, ymax=UL95), width=.6) + coord_cartesian(ylim=c(400,600)) + scale_fill_brewer(palette = "Set1") + labs(title="SAT by School Type", x="School Type", y="SAT (M)") + theme_classic() + theme(plot.title=(element_text( size=11, face="bold",hjust=.5))) # #Boxplot---- #SAT overall average ggplot(grade_DF, aes(x="", y=SAT)) + geom_boxplot() + scale_y_continuous(breaks=seq(420, 660, 30), minor_breaks = seq(420, 660, 10)) median(grade_DF$SAT) IQR(grade_DF$SAT) #SAT by School Type ggplot(grade_DF, aes(schoolType, SAT, fill=schoolType)) + geom_boxplot() #Improve appeal #Narrow width of boxplot (width) #Change color of outliers (outlier.color) #Remove legend (legend.position="none") #Format y axis with breaks every 50 points #from 425 to 625 ggplot(grade_DF, aes(schoolType, SAT, fill=schoolType)) + geom_boxplot(width=.4, outlier.color="red") + theme_classic() + theme(legend.position = "none") + scale_y_continuous(breaks=seq(425, 625, 50)) # #Visualizing Distributions---- #Histogram for SAT ggplot(grade_DF, aes(SAT)) + geom_histogram(binwidth=20, col="white") #Histogram with normal curve (dnorm) SATmean <- mean(grade_DF$SAT) SATsd <- sd(grade_DF$SAT) SATn <- length(grade_DF$SAT) SATbinwid <- 20 # ggplot(grade_DF, aes(SAT)) + geom_histogram(binwidth = SATbinwid, col="white") + stat_function(fun=function(x){ dnorm(x, mean=SATmean, sd=SATsd)* SATn*SATbinwid}, color="blue", size=1.2) #Histogram by schoool type (fill) #Alpha controls transparency of bars ggplot(grade_DF, aes(SAT, fill=schoolType)) + geom_histogram(binwidth=20, position="identity", col="white", alpha=.4) #Use bar color (color) instead of fill #Make fill transparent (fill=NA) #Use palette for color (scale_color_brewer) ggplot(grade_DF, aes(SAT, color=schoolType)) + geom_histogram(binwidth=20, position="identity", fill=NA) + scale_color_brewer(palette="Set1") #Density plot by school type #Adjust controls smoothing ggplot(grade_DF, aes(SAT, fill=schoolType)) + geom_density(position="identity", alpha=.4, adjust=.8) + scale_fill_brewer(palette="Dark2") + theme_classic() # #Scatterplots---- #Simple Scatterplot (geom_point) ggplot(data=grade_DF, aes(x=SAT, y=FinalExam))+ geom_point() # #Scatterplot by group (color) ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_point() #Use different shapes (shape) #to represent schoolType ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_point(aes(shape=schoolType)) # #Account for overlap #by making size smaller (size) ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_point(size=.8) + theme_classic() #Avoid overlap with position_jitter #Use shape without fill (shape=1) ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_point(shape=1, position=position_jitter(width=.8, height=.3)) #Use geom_jitter for jitter ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_jitter(shape=1) # #Add fit lines (geom_smooth) #Simple Scatterplot ggplot(data=grade_DF, aes(x=SAT, y=FinalExam))+ geom_point() + geom_smooth(method=lm) #Simple Scatterplot without CI (se=F) ggplot(data=grade_DF, aes(x=SAT, y=FinalExam))+ geom_point() + geom_smooth(method=lm, se=F) # #Scatterplot by group ggplot(data=grade_DF, aes(x=SAT, y=FinalExam, color=schoolType)) + geom_point(size=.8) + geom_smooth(method=lm, se=F) # #Exploring ggplot2 with GUI---- install.packages("ggplotgui", dependencies = T) library(ggplotgui) ggplot_shiny(grade_DF[,c(3,6)])