# 1-factor ANOVA
#
# Conducting a 1-factor ANOVA from our survey data is easy using the 'aov' function.
# Interpreting and plotting the results will require two libraries, 'ggplot' for
# plotting and 'broom' for cleaning up the output of 'aov'.
# install.packages("ggplot2")
library(ggplot2)
# Here we'll run the ANOVA from the 'one_factor_ANOVA_tutorial' which looked at the mean
# preferred temperature across the levels of how much weather affects your mood.
rm(list = ls())
# load in the data. Note the new option 'na.strings = ""'. This converts missing
# responses in nominal data to 'NA's.
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv",
na.strings = "")
# To run a one-factor ANOVA using the function 'lm' which stands for 'linear model'. It's
# beyond the scope of this class, but if you're interested, an ANOVA can be run as a fit
# of a 'linear model' to the data. 'lm' is a powerful function in R that can do much more
# than ANOVA.
# We give 'lm' a dependent (ratio scale) variable and tell it to sort this variable across
# 'levels' of a nominal scale variable. For this example, the ratio scale variable is
# 'temperature' and the nominal scale variable is 'weather'.
#
# The second input is the data itself, which is a data frame that should at least have variables
# that match the ratio and nominal scale names. The third input tells 'lm' what to do
# with 'NA's. Here we'll use 'na.action = na.omit' which removes data with NA's in either
# the ratio scale or nominal scale variables.
out <- lm(temperature ~ weather,data = survey,na.action = na.omit)
# It's hard to find the p-value and other statistics in this output, but there's
# a function called 'anova' that takes the output of 'lm' and gives you a standard
# anova table:
anova.out <- anova(out)
# Hopefully these numbers match the values in the tutorial!
# Useful fields in anovalout are 'Df', 'F value', and 'Pr(>F)' which is the p-value.
# We can use this output and 'sprintf' to present the results in APA format:
sprintf('F(%g,%g) = %0.2f, p = %0.4f',anova.out$Df[1],anova.out$Df[2],
anova.out$`F value`[1],anova.out$`Pr(>F)`[1])
# Effect size: eta-squared is the ratio of SS between and SS total
# The output 'anova.out' contains a field 'Sum Sq' that contains SS bet and SS within:
anova.out$`Sum Sq`
# This is how you can calculate eta-squared:
SS <- anova.out$`Sum Sq`
# Remember, SS total is SS between + SS within, so:
eta.squared <- SS[1]/(SS[1]+SS[2])
# Displaying the result:
sprintf('Eta-squared = %0.4f',eta.squared)
# Next we'll make a bar plot of means across levels with error bars representing the
# standard error of the mean. This will use 'ggplot', much like we did in
# 'TwoSampleIndependentTTest.R'.
# This generates a data frame containing statistics for each level of 'weather'. Note
# the way we're removing NA's
summary <- data.frame(
mean <- tapply(survey$temperature,survey$weather,mean,na.rm = TRUE),
n <- tapply(survey$temperature,survey$weather,function(x) sum(!is.na(x))),
sd <- tapply(survey$temperature,survey$weather,sd,na.rm = TRUE))
summary$sem <- summary$sd/sqrt(summary$n)
colnames(summary) = c("mean","n","sd","sem")
summary
# The levels in 'weather' come out in alphabetical order. To re-order them
# from 'Not at all' to 'Very much' we'll define a list in right order
# and use this list in ggplot
levels <- row.names(summary)
levels <- levels[c(3,2,1,4)]
# Bar graph:
# Define y limits for the bar graph
ylimit <- c(min(summary$mean-1.5*summary$sem),
max(summary$mean+1.5*summary$sem))
# Plot bar graph with error bar as one standard error (standard error of the mean/SEM)
ggplot(summary, aes(x = row.names(summary), y = mean)) +
xlab("How does weather affect your mood?") +
geom_bar(position = position_dodge(), stat="identity", fill="blue") +
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem),width = .5) +
scale_y_continuous(name = "Preferred Temperature (F)") +
scale_x_discrete(limits = levels) +
coord_cartesian(ylim=ylimit)