# Math 322 Biostatistics # Lecture, April 13, 2009 # R examples for chapter 11.9 (Multiple regression) # Geir Arne Hjelle (hjelle@math.wustl.edu) # Read in data about bloodpressure, birthweight and age (in days) bp <- read.table("http://www.math.wustl.edu/~hjelle/m322/r/bloodpressure.txt", header = TRUE) attach(bp) # One way to get a quick overview of the data is to plot them. If you try # to plot a dataframe containing more than 2 variables, R will make a matrix # of plots of all the possible pairs of variables. plot(bp) # To carry out the least squares method, we specify a linear model. To say # that we have two explanatory variables we use +. lm(BloodPressure ~ Birthweight + Age) # Note that these partial regression coefficients are different from the # ones we obtain by doing simple regression on one of the variables. lm(BloodPressure ~ Birthweight) lm(BloodPressure ~ Age) # To compare the coefficients to find which is most important, we need # to standardize them, so that they are on the same scale. This is done # by multiplying and dividing by the standard deviations of the data. coeff.Birthweight <- coefficients(lm(BloodPressure ~ Birthweight + Age) )["Birthweight"] coeff.Birthweight * sd(Birthweight) / sd(BloodPressure) coeff.Age <- coefficients(lm(BloodPressure ~ Birthweight + Age))["Age"] coeff.Age * sd(Age) / sd(BloodPressure) # These can be thought of as correlation coefficients where we have adjusted # for the other variable(s). They will not equal the correlation coefficients # from simple regression. cor(Birthweight, BloodPressure) cor(Age, BloodPressure) # Tests for the regression is again done by applying summary to our linear # model summary(lm(BloodPressure ~ Birthweight + Age))