# 1-factor ANOVA
#
# Conducting a 1-factor ANOVA from our survey data is easy using the 'aov' function.
# Interpreting and plotting the results will require two libraries, 'ggplot' for
# plotting and 'broom' for cleaning up the output of 'aov'.
install.packages("ggplot2")
library(ggplot2)
library(broom)
# Here we'll run the ANOVA from the 'one_factor_ANOVA_tutorial' which looked at the mean
# preferred temperature across the levels of how much weather affects your mood.
rm(list = ls())
# load in the data. Note the new option 'na.strings = ""'. This converts missing
# responses in nominal data to 'NA's.
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W19survey.csv",
na.strings = "")
# To run a one-factor ANOVA using 'aov', we give it a dependent (ratio scale) variable
# and tell aov to sort this variable across 'levels' of a nominal scale variable. For
# this example, the ratio scale variable is 'temperature' and the nominal scale variable
# is 'weather'.
#
# The second input is the data itself, which is a data frame that should at least have variables
# that match the ratio and nominal scale names. The third input tells aov what to do
# with 'NA's. Here we'll use 'na.action = na.omit' which removes data with NA's in either
# the ratio scale or nominal scale variables.
out <- aov(temperature ~ weather,data = survey,na.action = na.omit)
# It's hard to find the p-value and other statistics in this output, but there's
# a function 'tidy' that cleans up the output:
tidy.out <- tidy(out)
# The output of 'tidy' gives you that familiar table of results. Hopefully it matches the tutorial.
tidy.out
# Useful fields in tidy.out are 'df', 'statistic' (F value), and 'p.value'
# We can use this output and 'spritnf' to present the results in APA format:
sprintf('F(%g,%g) = %0.2f, p = %0.4f',tidy.out$df[1],tidy.out$df[2],
tidy.out$statistic[1],tidy.out$p.value[1])
# Effect size: eta-squared is the ratio of SS between and SS total
# The output 'tidy.out' contains a field 'sumsq' that contains SS bet and SS within:
tidy.out$sumsq
# For some reason, these numbers aren't 'numeric' so we first have to convert the
# SS values using 'as.numeric'. This is how you can calculate eta-squared:
SS <- as.numeric(tidy.out$sumsq)
# Remember, SS total is SS between + SS within, so:
eta.squared <- SS[1]/(SS[1]+SS[2])
# Displaying the result:
sprintf('Eta-squared = %0.4f',eta.squared)
# Next we'll make a bar plot of means across levels with error bars representing the
# standard error of the mean. This will use 'ggplot', much like we did in
# 'TwoSampleIndependentTTest.R'.
# This generates a data frame containing statistics for each level of 'weather'. Note
# the way we're removing NA's
summary <- data.frame(
mean <- tapply(survey$temperature,survey$weather,mean,na.rm = TRUE),
n <- tapply(survey$temperature,survey$weather,function(x) sum(!is.na(x))),
sd <- tapply(survey$temperature,survey$weather,sd,na.rm = TRUE))
summary$sem <- summary$sd/sqrt(summary$n)
colnames(summary) = c("mean","n","sd","sem")
summary
# The levels in 'weather' come out in alphabetical order. To re-order them
# from 'Not at all' to 'Very much' we'll define a list in right order
# and use this list in ggplot
levels <- row.names(summary)
levels <- levels[c(3,2,1,4)]
# Bar graph:
# Define y limits for the bar graph
ylimit <- c(min(summary$mean-1.5*summary$sem),
max(summary$mean+1.5*summary$sem))
# Plot bar graph with error bar as one standard error (standard error of the mean/SEM)
ggplot(summary, aes(x = row.names(summary), y = mean)) +
xlab("How does weather affect your mood?") +
geom_bar(position = position_dodge(), stat="identity", fill="blue") +
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem),width = .5) +
scale_y_continuous(name = "Preferred Temperature (F)") +
scale_x_discrete(limits = levels) +
coord_cartesian(ylim=ylimit)