# Comparing one correlation to the null hypothesis that rho = 0.
#
# If you have your raw x and y data, R's function 'cor.test' gives you a p-value for testing
# the hypothesis that a correlation was drawn from a population that has a correlation of zero.
# It takes in the x and y variables along with 'alternative' which can be "greater", "less"
# for a one-tailed test or "two.sided"for a two-tailed test. Here are the examples from the
# comparing_one_correlation tutorial.
# By default, cor.test converts the correlation of x and y into a t-statistic and then computes
# a t-test.
# Load in the survey data
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv")
# Example 1: The correlation between heights of your parents
x <- survey$mheight
y <- survey$pheight
# cor.test by default removes 'NA's, but if you want to do it yourself:
# goodvals = !is.na(x) & !is.na(y)
# x <- x[goodvals]
# y <- y[goodvals]
# cor.test runs the t-test for you:
out <- cor.test(x,y,alternative = "greater")
# 'estimate' is the correlation
out$estimate
# 'p.value' is the p-value
out$p.value
# 'statistic' is the t-statistic used in the test:
out$statistic
# with degrees of freedom:
out$parameter
# Here's how to display your results in APA format:
sprintf('r(%g) = %4.2f, p = %5.4f',out$parameter,out$estimate,out$p.value)
# This is calculated directly from r and df. So this should be the same
# thing (See the beginning of the compariong_one_correlation_tutorial.pdf
out$estimate/sqrt((1-out$estimate^2)/(length(x)-2))
# Example 2: Two tailed test on the correlation between drinking and Facebook friends
x <- survey$drink
y <- survey$friends
goodvals = !is.na(x) & !is.na(y)
x <- x[goodvals]
y <- y[goodvals]
out <- cor.test(x,y,alternative = "two.sided")
# APA format:
sprintf('r(%g) = %4.2f, p = %5.4f',out$parameter,out$estimate,out$p.value)
# If you just have r and the sample size but not the raw data, you can calculate
# the p-value by running a t-test by hand:
# The correlation between x and y is:
r <- cor(x,y)
# The degrees of freedom is
df <- length(x) -2
# The t-statistic us computed using the 'Fisher's z transform':
t <- r/sqrt((1-r^2)/df)
# The p-value for a two-tailed for t not equal to 0 should give you the same p-value as for cor.test:
2*pt(abs(t),df,0,lower.tail = FALSE)