# NerfRegression.R
#
# Demonstrates regression to the mean from Nerf gun shooting data.
#
# Each student took two shots at a dart board during class. The csv file contains two fields:
# 'shot1' and 'shot2'. In this script we'll load in the data
# load in the data
mydata <-read.csv("http://www.courses.washington.edu/psy315/datasets/NerfDartRegression.csv")
mydata
# We'll look at the scatterplot
# of shot 1 vs shot 2 across students
# let 'x' be scores for shot 1, 'y' be scores for shot 2
x <- mydata$shot1
y <- mydata$shot2
# calculate the correlation
r <- cor(x,y)
r
# Scatterplot of x vs y
plot(x,y,
xlab = "Shot 1",
ylab = "Shot 2",
pch = 19,
col = "blue",
as = 1,
cex = 2)
# regression line
# slope
m <- r*sd(y)/sd(x)
# intercept
b <- mean(y) - m*mean(x)
sprintf('y = %5.1f X + %5.1f',m,b)
# draw the regression line
abline(b,m)
# z-scores
zx <- (x-mean(x))/sd(x)
zy = (y-mean(y))/sd(y)
# Scatterplot
plot(zx,zy,
xlab = "zx",
ylab = "zy",
pch = 19,
col = "blue",
as = 1,
cex = 2)
# correlation
rz <- cor(zx,zy)
rz
# regression line has intercept 0 and slope r!
abline(0,r)
# Demonstrate regression to the mean:
# Find the mean z-score for students that did better than average
# on the first shot:
students <- zx>0
m1 <- mean(zx[students])
m1
# Find the mean z-score for the same students on their second shot:
m2 <- mean(zy[students])
m2
barplot(c(m1,m2),
names.arg = c("shot 1","shot 2"),
las = 1,
ylab = "mean z score",
main = "Students that did better than average on shot 1")
# worse than average on the first shot...
students <- zx<0
m1 <- mean(zx[students])
m1
# Find the mean z-score for the same students on their second shot:
m2 <- mean(zy[students])
m2
barplot(c(m1,m2),
names.arg = c("shot 1","shot 2"),
las = 1,
ylab = "mean z score",
main = "Students that did worse than average on shot 1")