################################################################################ #### Example of Simple Regression Model #### ################################################################################ #### By Jimin Ding, 04/14/2017 ## Sir Francis Galton (1822-1911) studied a data set of 1078 heights of fathers and sons. ## We will build a regression model to analyze the relationship between father and son. ######################## Step 1. Load Data ################################ library(UsingR) data(father.son) ################ Step 2. Regress the heights of Son to that of fateher ###################### regfit=lm(sheight ~ fheight, data=father.son) regfit summary(regfit) confint(regfit) ## confidence intervals for intercept and slope attributes(regfit) attributes(summary(regfit)) ## Plot data with regression line plot(sheight ~ fheight, data=father.son,bty="l",pch=20, xlab="Father's Height (inches)", ylab="Son's Height (inches)", main="Heights of Fathers and Their Full Grown Sons") abline(regfit,lty=1,lwd=2,col='red') ## Prediction predict(regfit) ## This is same as regfit\$fitted ## predict son's height if his father's height is 65, 67, or 69. newdata=data.frame(fheight=c(65, 67,69)) predict(regfit,newdata) ## confidence interval for the mean of response variable predict(regfit,newdata,interval="confidence") ## prediction interval predict(regfit,newdata,interval="prediction") ## Other functions for lm object coef(regfit) resid(regfit) fitted(regfit) ################ Step 3. Model Diagnosis ###################### par(mfrow=c(2,2)) plot(regfit) #### check homogeniety (constant variance, homoscedasticity) assumption summary(lm(abs(residuals(regfit))~fitted(regfit))) ## Insignificant. Hence the homogeniety assumption holds reasonably well. #### check serial correlation library(lmtest) dwtest(regfit) acfvalues=acf(regfit\$residuals) ## There is a significant positive autocorrelation. ## Usually this is caused by missing important predictors. ## It is possible to remedy by adding other predictor ## Otherwise, consider transformation of data to detrend. #### Check normaility shapiro.test(residuals(regfit)) ################ Step 4. Regression through Origin ###################### regfito=lm(sheight ~ fheight-1, data=father.son) summary(regfito)