# The following the example is the t-test for independent means, where we compared # heights of female students who's mothers were taller or shorter than the median. # Load in the survey data survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W19survey.csv") # First find the heights of the mothers of female students, removing NA's mheight <- survey\$mheight[!is.na(survey\$mheight) & survey\$gender=="Female" ] # This is the median of the mother's heights: median(mheight) # Find the heights of the female students who's mother's aren't NA's: height <- survey\$height[!is.na(survey\$mheight) & survey\$gender=="Female"] # Find the heights of female students who's mothers are taller than the median. Call them 'x' x <- height[mheight>median(mheight)] # Find the heights of female students who's mother's heights are lessthan or equal to the median. Call them 'x' y <- height[mheight<=median(mheight)] # Run the two-tailed t-test. If you send in both x and y, t.test # assumes it's a two-sample independent measures t-test. The 'var.equal = TRUE' # tells R to use the pooled standard deviation to combine the two measures # of standard deviation. out <- t.test(x,y, alternative = "two.sided", var.equal = TRUE) # The p-pvalue is: out\$p.value # Displaying the result in APA format: sprintf('t(%g) = %4.2f, p = %5.1f',out\$parameter,out\$statistic,out\$p.value) mx <- mean(x) my <- mean(y) nx <- length(x) ny <- length(y) sx <- sd(x) sy <- sd(y) # pooled sd sp <- sqrt( ((nx-1)*sx^2 + (ny-1)*sy^2)/(nx-1+ny-1)) sp #effect size d <- abs(mx-my)/sp d # Find observed power from d, alpha and n out <- power.t.test(n = (nx+ny)/2, d = d, sig.level = .05, power = NULL, alternative = "two.sided", type = "two.sample") out\$power # Find desired n from d, alpha and power =0.8 out <- power.t.test(n = NULL, d = d, sig.level = .05, power = 0.8, alternative = "two.sided", type = "two.sample") out\$n # Making a bar graph with error bars. Adding error bars is a bit more # complicated than the basic bar graph. First, it requires loading in # a new 'library' called 'ggplot2' which can be done with the 'install.library' # function: # install.library(ggplot2) # I've commented it out here because I've already done this. Once you've # installed a library once, you can tell R that you want to use it with # the 'library' function: library(ggplot2) # Now we'll generate a 'data frame' containing statistics for x and y: summary <- data.frame( mean <- c(mean(x),mean(y)), n <- c(length(x),length(y)), sd <- c(sd(x),sd(y))) summary\$sem <- summary\$sd/sqrt(summary\$n) colnames(summary) = c("mean","n","sd","sem") row.names(summary) = c("Tall Mothers","Less Tall Mothers") # This was a bit of work, but it creates a nice table: summary # Once you have this summary table, the rest will give you a nice looking # bar plot with error bars: # Define y limits for the bar graph based on means and sem's ylimit <- c(min(summary\$mean-1.5*summary\$sem), max(summary\$mean+1.5*summary\$sem)) # Plot bar graph with error bar as one standard error (standard error of the mean/SEM) ggplot(summary, aes(x = row.names(summary), y = mean)) + xlab("students") + geom_bar(position = position_dodge(), stat="identity", fill="blue") + geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem),width = .5) + theme_bw() + theme(panel.grid.major = element_blank()) + scale_y_continuous(name = "height") + coord_cartesian(ylim=ylimit)