# Chi-squared test for independence
#
# Just like for frequencies, R's 'chisq.test' provides a p-value for the chi-squared
# test for independence.
# Here we'll run the two examples in the chi_2_test_independence_tutorial
rm(list=ls())
# Load in the survey data
survey <-read.csv("http://www.courses.washington.edu/psy315/datasets/Psych315W21survey.csv")
# Example 1: Computer users by gender
# create a table from the survey
fo <- table(survey$gender, survey$computer)
# Only keep the 1st (Apple) and 4th (PC) columns of the table
fo <- fo[c(2,3),c(1,3)] # rows then columns
# run the chi-squared test. The 'correct' parameter is a specifc case for a
# 2x2 table. If 'correct = TRUE' then chisq.test will use a 'Yates continuity
# correction', which is essentially the same thing as we did back in the normal
# approximation to the binomial when we added or subtracted .5 from X. To get
# the same p-value as in the tutorial we'll set correct = FALSE:
out <- chisq.test(fo, correct = FALSE)
# The chi-squared statistic is:
out$statistic
# The degrees of freedom is:
out$parameter
# And the p-value is:
out$p.value
# Writing in APA format can be done like this:
sprintf('Chi-Squared(%d,N=%d) = %5.2f, p = %5.4f',out$parameter,sum(fo),out$statistic,out$p.value)
# Plot the results:
barplot(fo,
beside=TRUE,
legend = row.names(fo))
# Example 2: Does where you sit in class depend on gender?
fo <- table(survey$gender,survey$sit)
fo <- fo[c(2,3),]
# Run the chi-squared test. If we don't specify the expected frequency the
# test assumes that expected frequencies are equal across categories.
out <- chisq.test(fo,correct = FALSE)
# result in APA format:
sprintf('Chi-Squared(%d,N=%d) = %5.2f, p = %5.4f',out$parameter,sum(fo),out$statistic,out$p.value)
# Plot the results:
barplot(fo,
beside=TRUE,
legend = row.names(fo),
col = c("Pink","Blue"))