# Clear the workspace:
rm(list = ls())
# The .csv file containing the grades can be found at:
# http://www.courses.washington.edu/psy315/datasets/ExampleGrades.csv
#
# If you open up the .csv file you'll see that it contains a
# single column of numbers with the name 'Grades' as a column
# header.
# Load in the grades from the .csv file on the course website
mydata <-read.csv("http://www.courses.washington.edu/psy315/datasets/ExampleGrades.csv")
# The command 'mydata <- read.csv' loads the data into variable
# called 'mydata'.
#
# The grades are in a field defined by the column header, 'Grades'.
# We access fields of variable with the dollar sign.
# We can use 'head' to show just the first few scores:
head(mydata$Grades)
# Use 'hist' to make a histogram.
# The simplest way is like this:
hist(mydata$Grades)
# By default, R chooses the class interval and axis labels.
#
# Let's chose our own class intervals or 'breaks' using
# R's 'seq' function. 'seq' returns a sequence of numbers
# beginning with the first value, ending with the second
# value, and stepping with the third. To generate our
# class interval boundaries, we can define a new variable
# 'class.interval' like this:
class.interval <- seq(54,80,2)
# Note, we could have called this variable whatever we want.
# You can your histogram by defining parameters like:
# 'main' for the title
# 'xlab' for the xlabel
# 'col' for the color
# 'xlim' for the x axis limits and
# 'breaks' for the class intervals:
hist(mydata$Grades,
main="Histogram of Grades",
xlab="Score",
col="blue",
xlim=c(54,80),
breaks =class.interval
)
# I don't like R's choice for the X-axis and y-axis
# ticks. For one thing, frequencies are whole
# numbers, so there's no reason to have 1/2 increments.
# in the y-axis.
#
# You can customize the x and y axes by first using
# 'xaxt' = n and 'yaxt' = n in 'hist' to turn off the
# x and y axis labels:
hist(mydata$Grades,
main="Histogram of Grades",
xlab="Score",
col="blue",
xlim=c(54,80),
xaxt='n',
yaxt = 'n',
breaks =class.interval
)
# and then adding your own axes with the 'axis' function
# Axis 1 is 'x' and 2 is 'y':
axis(1, at=class.interval)
axis(2, at=seq(0,4),las = 1)
# In the tutorial we made a cumulative percentage curve. We can do
# this in R too.
#
# First, we'll find out how many scores fall into each class
# interval. We aready plotted this with 'hist'. 'hist' will
# return these values if we ask it to. Here we'll have 'hist'
# send the information into the variable 'freq', and suppress
# the plotting by using 'plot = FALSE':
freq <- hist(mydata$Grades,
breaks =class.interval,
plot = FALSE)
# The field 'counts' in freq holds the frequencies for the class intervals:
print(freq$counts)
# Next we'll accumulate these frequencies like we did in the tutorial
# using R's 'cumsum' function. We'll also scale it by 100 and divide
# by the total number of scores, which can be found with the 'length'
# function:
y <- 100*cumsum(freq$counts)/length(mydata$Grades)
# We'll concatinate a zero to the beginning of the list:
y = c(0,y)
# And plot:
plot(class.interval,y,
xlab ='Score',
ylab = 'Cumulative Frequency (%)',
xaxt = 'n',
yaxt ='n')
# That just made symbols. To add lines we use:
lines(class.interval,y)
# And set our x and y axes ticks like we did with 'hist':
axis(1, at=class.interval)
axis(2, at=seq(0,100,10),las = 1)
# This should look like the cumulative frequency percentage
# curve in the tutorial