# NerfRegression.R # # Demonstrates regression to the mean from Nerf gun shooting data. # # Each student took two shots at a dart board during class. The csv file contains two fields: # 'shot1' and 'shot2'. In this script we'll load in the data # load in the data mydata <-read.csv("http://www.courses.washington.edu/psy315/datasets/NerfDartRegression.csv") mydata # We'll look at the scatterplot # of shot 1 vs shot 2 across students # let 'x' be scores for shot 1, 'y' be scores for shot 2 x <- mydata\$shot1 y <- mydata\$shot2 # calculate the correlation r <- cor(x,y) r # Scatterplot of x vs y plot(x,y, xlab = "Shot 1", ylab = "Shot 2", pch = 19, col = "blue", as = 1, cex = 2) # regression line # slope m <- r*sd(y)/sd(x) # intercept b <- mean(y) - m*mean(x) sprintf('y = %5.1f X + %5.1f',m,b) # draw the regression line abline(b,m) # z-scores zx <- (x-mean(x))/sd(x) zy = (y-mean(y))/sd(y) # Scatterplot plot(zx,zy, xlab = "zx", ylab = "zy", pch = 19, col = "blue", as = 1, cex = 2) # correlation rz <- cor(zx,zy) rz # regression line has intercept 0 and slope r! abline(0,r) # Demonstrate regression to the mean: # Find the mean z-score for students that did better than average # on the first shot: students <- zx>0 m1 <- mean(zx[students]) m1 # Find the mean z-score for the same students on their second shot: m2 <- mean(zy[students]) m2 barplot(c(m1,m2), names.arg = c("shot 1","shot 2"), las = 1, ylab = "mean z score", main = "Students that did better than average on shot 1") # worse than average on the first shot... students <- zx<0 m1 <- mean(zx[students]) m1 # Find the mean z-score for the same students on their second shot: m2 <- mean(zy[students]) m2 barplot(c(m1,m2), names.arg = c("shot 1","shot 2"), las = 1, ylab = "mean z score", main = "Students that did worse than average on shot 1")