# OneSampleTTest.R
#
# The are two ways to calculate p-values in R for t-tests,
# depending on whether you're working with raw data or
# with summary statistics (mean and standard deviation)
#
# Load the survey data
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv")
# If you have the mean and standard deviation of your sample (and degrees of freedom), then
# you can use R's 'pt' function to calculate the p-value. 'pt' is a lot like the 'pnorm' function
# for the z-distribution.
#
# Example 1 (from the t-test tutorial):
#
# Given sample of 25 systolic blood pressure scores with a mean of 131.66 and a standard
# deviation of 22.2532 mm Hg, is this mean significantly greater than a normal value of
# 120 mm HG?
#
# First, define our variables
m <- 131.66
s <- 22.2532
H0 <- 120
n <- 25
# Next, calculate the t-statistic:
sem <- s/sqrt(n)
t<- (m-H0)/sem
# Finally, calculate the p-value. We'll use the option 'lower.tail = FALSE' for
# a one-tailed test in the positive direction. Use 'TRUE' when rejecting for
# negative values of t:
p <- pt(t,n-1,lower.tail = FALSE)
p
# To calculate effect size (Cohen's d):
d <- abs(m-H0)/s
d
# Example 2: Are the heights of women with fathers 6 feet or taller significantly
# different from 64 inches?
H0 <- 64
# This example will conduct a t-test from the raw data instead of the summary statstics.
# We'll use R's 't.test' function.
# Load the survey data
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv")
# Find the female students with fathers 6 feet (72 inches) or taller:
female.students <- survey$gender== "Female" & survey$pheight >= 72 & !is.na(survey$pheight)
female.height <- survey$height[female.students]
# Use t.test
out <- t.test(female.height,
mu = H0,
alternative = "two.sided")
# The option 'mu = H0' defines the mean for the null hypothesis (64 in this example)
# The option 'alternative = "two.sided" is for a two-sided test (of course)
# Other options are "less" or "greater" for one sided tests.
# Here's the t-statistic from the output:
out$statistic
# Here are the degrees of freedom:
out$parameter
# Here's the p-value
out$p.value
# Calculate effect size:
d <- abs(mean(female.height)-H0)/sd(female.height)
d
# Using R to print the result in APA format
#
# You can use R's 'sprintf' function to output the results of your hypothesis test
# in APA format. This output can then be copied and pasted into your paper.
# 'sprintf' is an old command from the C programming language that prints out a formatted
# 'string', which is a list of characters. To stick a number into a string you can do this:
sprintf('My favorite number is %g.',pi)
# the '%g' tells sprintf to replace '%g' with the number supplied afterward. If there is
# more than one '%g' then replace '%g' with numbers in the order supplied:
sprintf('My favorite number is %g, and the square root of that number is %g',pi,sqrt(pi))
# If you want to put in characters instead of numbers, use '%s':
sprintf('It is a %s day for statistics!','great')
# If you want to control the number of decimal points, use '%a.bf' where a is the total
# number of digits (including decimal points) and b is the number of digits to the right
# of the decimal point.
sprintf('pi to the nearest 2 digits is %3.2f',pi)
# Putting this together, we can report the results of our last t-test in APA format:
sprintf('t(%g) = %4.2f, p = %5.4f',out$parameter,out$statistic,out$p.value)
# Even if you don't understand 'sprintf' well, you can just cut and paste the line above
# with 'out' as the output of the t-test and it'll give you what you want.