# TwoSampleIndependentTTest.R
#
# The following the example is the t-test for independent means, where we compared
# heights of female students who's mothers were taller or shorter than the median.
# Load in the survey data
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv")
# First find the heights of the mothers of female students, removing NA's
mheight <- survey$mheight[!is.na(survey$mheight) & survey$gender=="Female" ]
# This is the median of the mother's heights:
median(mheight)
# Find the heights of the female students who's mother's aren't NA's:
height <- survey$height[!is.na(survey$mheight) & survey$gender=="Female"]
# Find the heights of female students who's mothers are taller than the median. Call them 'x'
x <- height[mheight>median(mheight)]
# Find the heights of female students who's mother's heights are lessthan or equal to the median. Call them 'x'
y <- height[mheight<=median(mheight)]
# Run the two-tailed t-test. If you send in both x and y, t.test
# assumes it's a two-sample independent measures t-test. The 'var.equal = TRUE'
# tells R to use the pooled standard deviation to combine the two measures
# of standard deviation.
out <- t.test(x,y,
alternative = "two.sided",
var.equal = TRUE)
# The p-pvalue is:
out$p.value
# Displaying the result in APA format:
sprintf('t(%g) = %4.2f, p = %5.4f',out$parameter,out$statistic,out$p.value)
mx <- mean(x)
my <- mean(y)
nx <- length(x)
ny <- length(y)
sx <- sd(x)
sy <- sd(y)
# pooled sd
sp <- sqrt( ((nx-1)*sx^2 + (ny-1)*sy^2)/(nx-1+ny-1))
sp
# standard error of the mean
sem <- sp*sqrt(1/nx+1/ny)
sem
#effect size
d <- abs(mx-my)/sp
d
# Find observed power from d, alpha and n
out <- power.t.test(n = (nx+ny)/2,
d = d,
sig.level = .05,
power = NULL,
alternative = "two.sided",
type = "two.sample")
out$power
# Find desired n from d, alpha and power =0.8
out <- power.t.test(n = NULL,
d = d,
sig.level = .05,
power = 0.8,
alternative = "two.sided",
type = "two.sample")
out$n
# Making a bar graph with error bars. Adding error bars is a bit more
# complicated than the basic bar graph. First, it requires loading in
# a new 'library' called 'ggplot2' which can be done with the 'install.library'
# function:
install.packages('ggplot2')
# I've commented it out here because I've already done this. Once you've
# installed a library once, you can tell R that you want to use it with
# the 'library' function:
library(ggplot2)
# Now we'll generate a 'data frame' containing statistics for x and y:
summary <- data.frame(
mean <- c(mean(x),mean(y)),
n <- c(length(x),length(y)),
sd <- c(sd(x),sd(y)))
summary$sem <- summary$sd/sqrt(summary$n)
colnames(summary) = c("mean","n","sd","sem")
row.names(summary) = c("Tall Mothers","Less Tall Mothers")
# This was a bit of work, but it creates a nice table:
summary
# Once you have this summary table, the rest will give you a nice looking
# bar plot with error bars:
# Define y limits for the bar graph based on means and sem's
ylimit <- c(min(summary$mean-1.5*summary$sem),
max(summary$mean+1.5*summary$sem))
# Plot bar graph with error bar as one standard error (standard error of the mean/SEM)
ggplot(summary, aes(x = row.names(summary), y = mean)) +
xlab("Students") +
geom_bar(position = position_dodge(), stat="identity", fill="blue") +
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem),width =.5) +
theme_bw() +
theme(panel.grid.major = element_blank()) +
scale_y_continuous(name = "Height (in)") +
coord_cartesian(ylim=ylimit)