# 1-factor ANOVA # # Conducting a 1-factor ANOVA from our survey data is easy using the 'aov' function. # Interpreting and plotting the results will require two libraries, 'ggplot' for # plotting and 'broom' for cleaning up the output of 'aov'. install.packages("ggplot2") library(ggplot2) library(broom) # Here we'll run the ANOVA from the 'one_factor_ANOVA_tutorial' which looked at the mean # preferred temperature across the levels of how much weather affects your mood. rm(list = ls()) # load in the data. Note the new option 'na.strings = ""'. This converts missing # responses in nominal data to 'NA's. survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W19survey.csv", na.strings = "") # To run a one-factor ANOVA using 'aov', we give it a dependent (ratio scale) variable # and tell aov to sort this variable across 'levels' of a nominal scale variable. For # this example, the ratio scale variable is 'temperature' and the nominal scale variable # is 'weather'. # # The second input is the data itself, which is a data frame that should at least have variables # that match the ratio and nominal scale names. The third input tells aov what to do # with 'NA's. Here we'll use 'na.action = na.omit' which removes data with NA's in either # the ratio scale or nominal scale variables. out <- aov(temperature ~ weather,data = survey,na.action = na.omit) # It's hard to find the p-value and other statistics in this output, but there's # a function 'tidy' that cleans up the output: tidy.out <- tidy(out) # The output of 'tidy' gives you that familiar table of results. Hopefully it matches the tutorial. tidy.out # Useful fields in tidy.out are 'df', 'statistic' (F value), and 'p.value' # We can use this output and 'spritnf' to present the results in APA format: sprintf('F(%g,%g) = %0.2f, p = %0.4f',tidy.out\$df[1],tidy.out\$df[2], tidy.out\$statistic[1],tidy.out\$p.value[1]) # Effect size: eta-squared is the ratio of SS between and SS total # The output 'tidy.out' contains a field 'sumsq' that contains SS bet and SS within: tidy.out\$sumsq # For some reason, these numbers aren't 'numeric' so we first have to convert the # SS values using 'as.numeric'. This is how you can calculate eta-squared: SS <- as.numeric(tidy.out\$sumsq) # Remember, SS total is SS between + SS within, so: eta.squared <- SS[1]/(SS[1]+SS[2]) # Displaying the result: sprintf('Eta-squared = %0.4f',eta.squared) # Next we'll make a bar plot of means across levels with error bars representing the # standard error of the mean. This will use 'ggplot', much like we did in # 'TwoSampleIndependentTTest.R'. # This generates a data frame containing statistics for each level of 'weather'. Note # the way we're removing NA's summary <- data.frame( mean <- tapply(survey\$temperature,survey\$weather,mean,na.rm = TRUE), n <- tapply(survey\$temperature,survey\$weather,function(x) sum(!is.na(x))), sd <- tapply(survey\$temperature,survey\$weather,sd,na.rm = TRUE)) summary\$sem <- summary\$sd/sqrt(summary\$n) colnames(summary) = c("mean","n","sd","sem") summary # The levels in 'weather' come out in alphabetical order. To re-order them # from 'Not at all' to 'Very much' we'll define a list in right order # and use this list in ggplot levels <- row.names(summary) levels <- levels[c(3,2,1,4)] # Bar graph: # Define y limits for the bar graph ylimit <- c(min(summary\$mean-1.5*summary\$sem), max(summary\$mean+1.5*summary\$sem)) # Plot bar graph with error bar as one standard error (standard error of the mean/SEM) ggplot(summary, aes(x = row.names(summary), y = mean)) + xlab("How does weather affect your mood?") + geom_bar(position = position_dodge(), stat="identity", fill="blue") + geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem),width = .5) + scale_y_continuous(name = "Preferred Temperature (F)") + scale_x_discrete(limits = levels) + coord_cartesian(ylim=ylimit)