# Bret Larget
# August 25, 2011
# R examples for cow data
# Students can copy and paste commands into R to see effects.
# The entire file can be read into R and executed with the command
#
# > source("cows.R")
#
# or from a menu in R.
# Doing this causes all the graphs to be displayed one after another on the graphics device.
# The graphs can be captured into a PDF file by uncommenting the command here
### pdf("cows.pdf")
# and the dev.off() command at the end of the file.
####################################################################################################
# read in the cow data
# make sure that working directory for R contains the file cows.txt
# note that the top row is a header
# each field is separated by white space (spaces, tabs, and so on)
cows = read.table("cows.txt",header=T)
# check the structure to make sure data was read properly
# make sure that the number of observations and type of each observation is correct
str(cows)
# load the lattice library
# this is needed for the dotplot() and densityplot() commands
library(lattice)
# For this data set, it is most interesting to compare quantitative variables by treatment group
# The first argument to all lattice graphing commands is a formula of the form y ~ x
# where y and x are usually variables.
# For dotplot(), however, there is only an x variable so we leave y empty.
# Here there is a | added to the formula followed by a factor (categorical variable).
# The resulting plot will be broken into sections with a separate graph for each portion of the data
# with the data partitioned by levels of the variable treatment.
# The second argument to dotplot() is the name of the data frame that contains the variables used.
plot( dotplot(~age | treatment, data=cows) )
# The previous plot arranged the graphs in a 2 by 2 layout.
# To better compare groups, force the layout to have one column and four rows.
plot( dotplot(~age | treatment, data=cows, layout=c(1,4)) )
# The tretment levels are ordered alphabetically (and graphed low to high).
# Ordering treatment levels by level of additive makes more sense.
# This command replaces the treatment variable in the cows data frame
# with the same data, but with the levels ordered by the variable level.
# Note the use of $ to specify a variable from the data frame.
cows$treatment = reorder(cows$treatment,cows$level)
# Redo the previous plot to see the difference
plot( dotplot(~age | treatment, data=cows, layout=c(1,4)) )
# A more interesting variable is fat, where there looks to be more of a difference among groups
plot( dotplot(~fat | treatment, data=cows, layout=c(1,4)) )
# Density plots overlayed is another effective way to examine this data
# Here, rather than adding `| treatment' to the formula which would produce four graphs,
# adding the argument groups=treatment results in a single graph with different colors for each treatment group.
plot( densityplot(~fat, data=cows, groups=treatment) )
# Add a simple key to the right of the plot to show names of groups.
plot( densityplot(~fat, data=cows, groups=treatment, auto.key=list(space="right")) )
# Compute mean fat for each group
print( with(cows, sapply(split(fat,treatment),mean) ) )
# Looking way ahead, do a quick one-way ANOVA analysis
# See that there is statistical evidence for differences in mean milk fat percentage by weight
# among the groups, and this difference is largely explained by a higher mean in the high treatment group
# with level 0.3.
fit = lm(fat ~ treatment, data=cows)
print( anova(fit) )
print( summary(fit) )
### dev.off()