Math 322, Biostatistics HW 9 ######################################## Exercise 1 ################# # Input: # alanine <- scan() 21.5 14.5 16.0 14.8 12.1 14.4 19.6 17.4 20.3 15.6 11.4 14.7 20.9 15.0 18.5 13.5 12.7 13.8 22.8 17.8 19.3 16.4 14.5 12.0 species <- gl(6,1,24,labels=c("S1","S2","S3","S4","S5","S6")) #(a) anova(lm(alanine ~ species)) #(b) pairwise.t.test(alanine,species)) #(c) hsd<-TukeyHSD(aov(alanine ~ species)); hsd; plot(hsd); abline(v=0,lty=3); ######################################## Exercise 2 # Response to Factors A and B # a1 a2 a3 # ---------- ---------- ---------- # b1 b2 b1 b2 b1 b2 # ---- ---- ---- ---- ---- ---- data <- scan() 34.1 35.6 38.6 40.3 41.0 42.1 36.9 36.3 39.1 41.3 41.4 42.7 33.2 34.7 41.3 42.7 43.0 43.1 35.1 35.8 41.4 41.9 43.4 44.8 34.8 36.0 40.7 40.8 42.2 44.5 a<-3; b<-2; n<-5; N<-a*b*n; A <- gl(a,b, N, labels=c("a1","a2","a3")); B <- gl(b,1, N, labels=c("b1","b2")); anova(lm(data ~ A*B)) ######################################## Exercise 3 a<-4; b<-3; c<-2; n<-4; N<-a*b*c*n; data <- scan() 4.1 4.6 3.7 4.9 5.2 4.7 5.0 6.1 5.5 3.9 4.4 3.7 4.3 4.9 3.9 4.6 5.6 4.7 5.4 6.2 5.9 3.3 4.3 3.9 4.5 4.2 4.1 5.3 5.8 5.0 5.7 6.5 5.6 3.4 4.7 4.0 3.8 4.5 4.5 5.0 5.4 4.5 5.3 5.7 5.0 3.7 4.1 4.4 4.8 5.6 5.0 4.9 5.9 5.0 6.0 6.0 6.1 4.1 4.9 4.3 4.5 5.8 5.2 5.5 5.3 5.4 5.7 6.3 5.3 3.9 4.7 4.1 5.0 5.4 4.6 5.5 5.5 4.7 5.5 5.7 5.5 4.3 4.9 3.8 4.6 6.1 4.9 5.3 5.7 5.1 5.7 5.9 5.8 4.0 5.3 4.7 A <- gl(a,b, N, labels=c("a1","a2","a3","a4")); B <- gl(b,1, N, labels=c("b1","b2","b3")); C <- gl(c,a*b*n, N, labels=c("c1","c2")); anova(lm(data ~ A*B*C)) ######################################## Exercise 4 ################# # Input: # data<-scan() 51.4 0.2 17.8 24.6 18.9 72.0 1.9 29.4 20.7 8.0 53.2 0.2 17.0 18.5 22.6 83.2 10.7 30.2 10.6 7.1 57.4 6.8 15.3 8.9 27.3 66.5 10.6 17.6 11.1 20.8 98.3 9.6 35.6 10.6 5.6 74.8 6.3 28.2 8.8 13.1 92.2 10.8 34.7 11.9 5.9 97.9 9.6 35.8 10.8 5.5 88.1 10.5 29.6 11.7 7.8 94.8 20.5 26.3 6.7 10.0 62.8 0.4 22.3 26.5 14.3 58.4 6.6 15.7 8.7 26.3 81.6 2.3 37.9 20.0 0.5 xy0 <- matrix(data,ncol=5,byrow=TRUE); xy0 # arrange data in the same format as the input dimnames(xy0) <- list(NULL,c("Y","X1","X2","X3","X4")); xy0 # Label the columns xy <- data.frame(xy0); xy # Package the data for linear modeling fit <- lm(Y~X1+X2+X3+X4, data=xy); fit # 2(a): Compute regression coefficients summary(fit)$f # 2(b): Test H_0:b1=b2=b3=b4=0. pf(summary(fit)$f[1], summary(fit)$f[2], summary(fit)$f[3], lower.tail=FALSE) # p-value # 2(c): F-tests anova(lm(Y~X1, data=xy)) # 2(c): Test H0: b1=0 anova(lm(Y~X2, data=xy)) # 2(c): Test H0: b2=0 anova(lm(Y~X3, data=xy)) # 2(c): Test H0: b3=0 anova(lm(Y~X4, data=xy)) # 2(c): Test H0: b4=0 # 2(c): t-tests summary(lm(Y~X1, data=xy)) # 2(c): Test H0: b1=0 summary(lm(Y~X2, data=xy)) # 2(c): Test H0: b2=0 summary(lm(Y~X3, data=xy)) # 2(c): Test H0: b3=0 summary(lm(Y~X4, data=xy)) # 2(c): Test H0: b4=0 summary(fit) # 2(d): Standard error and R squared new<-list(X1=5.4,X2=20.3, X3=18.7, X4=11.2); predict(fit, newdata=new) # 2(e): Predict Y at specified (X1,X2,X3,X4) predict(fit, newdata=new, interval="confidence", level=0.95) # 2(f): 95% confidence interval ######### # Output: # data<-scan() 1: 51.4 0.2 17.8 24.6 18.9 6: 72.0 1.9 29.4 20.7 8.0 11: 53.2 0.2 17.0 18.5 22.6 16: 83.2 10.7 30.2 10.6 7.1 21: 57.4 6.8 15.3 8.9 27.3 26: 66.5 10.6 17.6 11.1 20.8 31: 98.3 9.6 35.6 10.6 5.6 36: 74.8 6.3 28.2 8.8 13.1 41: 92.2 10.8 34.7 11.9 5.9 46: 97.9 9.6 35.8 10.8 5.5 51: 88.1 10.5 29.6 11.7 7.8 56: 94.8 20.5 26.3 6.7 10.0 61: 62.8 0.4 22.3 26.5 14.3 66: 58.4 6.6 15.7 8.7 26.3 71: 81.6 2.3 37.9 20.0 0.5 76: Read 75 items > xy0 <- matrix(data,ncol=5,byrow=TRUE); xy0 # arrange data in the same format as the input [,1] [,2] [,3] [,4] [,5] [1,] 51.4 0.2 17.8 24.6 18.9 [2,] 72.0 1.9 29.4 20.7 8.0 [3,] 53.2 0.2 17.0 18.5 22.6 [4,] 83.2 10.7 30.2 10.6 7.1 [5,] 57.4 6.8 15.3 8.9 27.3 [6,] 66.5 10.6 17.6 11.1 20.8 [7,] 98.3 9.6 35.6 10.6 5.6 [8,] 74.8 6.3 28.2 8.8 13.1 [9,] 92.2 10.8 34.7 11.9 5.9 [10,] 97.9 9.6 35.8 10.8 5.5 [11,] 88.1 10.5 29.6 11.7 7.8 [12,] 94.8 20.5 26.3 6.7 10.0 [13,] 62.8 0.4 22.3 26.5 14.3 [14,] 58.4 6.6 15.7 8.7 26.3 [15,] 81.6 2.3 37.9 20.0 0.5 > dimnames(xy0) <- list(NULL,c("Y","X1","X2","X3","X4")); xy0 # Label the columns Y X1 X2 X3 X4 [1,] 51.4 0.2 17.8 24.6 18.9 [2,] 72.0 1.9 29.4 20.7 8.0 [3,] 53.2 0.2 17.0 18.5 22.6 [4,] 83.2 10.7 30.2 10.6 7.1 [5,] 57.4 6.8 15.3 8.9 27.3 [6,] 66.5 10.6 17.6 11.1 20.8 [7,] 98.3 9.6 35.6 10.6 5.6 [8,] 74.8 6.3 28.2 8.8 13.1 [9,] 92.2 10.8 34.7 11.9 5.9 [10,] 97.9 9.6 35.8 10.8 5.5 [11,] 88.1 10.5 29.6 11.7 7.8 [12,] 94.8 20.5 26.3 6.7 10.0 [13,] 62.8 0.4 22.3 26.5 14.3 [14,] 58.4 6.6 15.7 8.7 26.3 [15,] 81.6 2.3 37.9 20.0 0.5 > xy <- data.frame(xy0); xy # Package the data for linear modeling Y X1 X2 X3 X4 1 51.4 0.2 17.8 24.6 18.9 2 72.0 1.9 29.4 20.7 8.0 3 53.2 0.2 17.0 18.5 22.6 4 83.2 10.7 30.2 10.6 7.1 5 57.4 6.8 15.3 8.9 27.3 6 66.5 10.6 17.6 11.1 20.8 7 98.3 9.6 35.6 10.6 5.6 8 74.8 6.3 28.2 8.8 13.1 9 92.2 10.8 34.7 11.9 5.9 10 97.9 9.6 35.8 10.8 5.5 11 88.1 10.5 29.6 11.7 7.8 12 94.8 20.5 26.3 6.7 10.0 13 62.8 0.4 22.3 26.5 14.3 14 58.4 6.6 15.7 8.7 26.3 15 81.6 2.3 37.9 20.0 0.5 > > fit <- lm(Y~X1+X2+X3+X4, data=xy); fit # 2(a): Compute regression coefficients Call: lm(formula = Y ~ X1 + X2 + X3 + X4, data = xy) Coefficients: (Intercept) X1 X2 X3 X4 -30.1423 2.0735 2.5798 0.6407 1.1014 > > summary(fit)$f # 2(b): Test H_0:b1=b2=b3=b4=0. value numdf dendf 109.1550 4.0000 10.0000 > pf(summary(fit)$f[1], summary(fit)$f[2], summary(fit)$f[3], lower.tail=FALSE) # p-value value 3.313456e-08 # ...this is the F-statistic and its p-value from summary(fit) > ##### 2(c): F-tests > anova(lm(Y~X1, data=xy)) # 2(c): Test H0: b1=0 Analysis of Variance Table Response: Y Df Sum Sq Mean Sq F value Pr(>F) X1 1 1790.45 1790.45 11.132 0.005359 ** Residuals 13 2090.93 160.84 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(Y~X2, data=xy)) # 2(c): Test H0: b2=0 Analysis of Variance Table Response: Y Df Sum Sq Mean Sq F value Pr(>F) X2 1 2889.57 2889.57 37.874 3.464e-05 *** Residuals 13 991.81 76.29 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(Y~X3, data=xy)) # 2(c): Test H0: b3=0 Analysis of Variance Table Response: Y Df Sum Sq Mean Sq F value Pr(>F) X3 1 805.6 805.6 3.4049 0.0879 . Residuals 13 3075.8 236.6 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(lm(Y~X4, data=xy)) # 2(c): Test H0: b4=0 Analysis of Variance Table Response: Y Df Sum Sq Mean Sq F value Pr(>F) X4 1 2640.63 2640.63 27.667 0.0001542 *** Residuals 13 1240.76 95.44 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > ##### 2(c): t-tests > summary(lm(Y~X1, data=xy)) # 2(c): Test H0: b1=0 Call: lm(formula = Y ~ X1, data = xy) Residuals: Min 1Q Median 3Q Max -17.4294 -9.1189 0.9747 8.1850 17.7813 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 61.013 5.440 11.215 4.68e-08 *** X1 2.032 0.609 3.336 0.00536 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 12.68 on 13 degrees of freedom Multiple R-Squared: 0.4613, Adjusted R-squared: 0.4199 F-statistic: 11.13 on 1 and 13 DF, p-value: 0.005359 > summary(lm(Y~X2, data=xy)) # 2(c): Test H0: b2=0 Call: lm(formula = Y ~ X2, data = xy) Residuals: Min 1Q Median 3Q Max -14.767 -5.754 1.419 5.665 19.162 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 28.6402 7.9423 3.606 0.00319 ** X2 1.7870 0.2904 6.154 3.46e-05 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 8.735 on 13 degrees of freedom Multiple R-Squared: 0.7445, Adjusted R-squared: 0.7248 F-statistic: 37.87 on 1 and 13 DF, p-value: 3.464e-05 > summary(lm(Y~X3, data=xy)) # 2(c): Test H0: b3=0 Call: lm(formula = Y ~ X3, data = xy) Residuals: Min 1Q Median 3Q Max -24.256 -11.929 3.591 11.903 18.691 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 92.3719 9.9655 9.269 4.31e-07 *** X3 -1.2041 0.6525 -1.845 0.0879 . --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 15.38 on 13 degrees of freedom Multiple R-Squared: 0.2076, Adjusted R-squared: 0.1466 F-statistic: 3.405 on 1 and 13 DF, p-value: 0.0879 > summary(lm(Y~X4, data=xy)) # 2(c): Test H0: b4=0 Call: lm(formula = Y ~ X4, data = xy) Residuals: Min 1Q Median 3Q Max -14.301 -8.438 3.911 5.332 14.522 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 96.6576 4.7468 20.36 3.03e-11 *** X4 -1.6379 0.3114 -5.26 0.000154 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 9.77 on 13 degrees of freedom Multiple R-Squared: 0.6803, Adjusted R-squared: 0.6557 F-statistic: 27.67 on 1 and 13 DF, p-value: 0.0001542 > > summary(fit) # 2(d): Standard error and R squared Call: lm(formula = Y ~ X1 + X2 + X3 + X4, data = xy) Residuals: Min 1Q Median 3Q Max -4.1683 -1.3696 -0.7218 2.0893 4.0194 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -30.1423 35.6129 -0.846 0.417130 X1 2.0735 0.4304 4.818 0.000704 *** X2 2.5798 0.7016 3.677 0.004268 ** X3 0.6407 0.4325 1.481 0.169303 X4 1.1014 0.7233 1.523 0.158830 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 2.948 on 10 degrees of freedom Multiple R-Squared: 0.9776, Adjusted R-squared: 0.9687 F-statistic: 109.2 on 4 and 10 DF, p-value: 3.313e-08 > > new<-list(X1=5.4,X2=20.3, X3=18.7, X4=11.2); > predict(fit, newdata=new) # 2(e): Predict Y at specified (X1,X2,X3,X4) [1] 57.7429 > > predict(fit, newdata=new, interval="confidence", level=0.95) # 2(f): 95% confidence interval fit lwr upr [1,] 57.7429 47.99115 67.49465 > ######################################## Exercise 5 Step 1 is done in Exercise 4: summary( lm(Y~X1+X2+X3+X4, data=xy) ) Call: lm(formula = Y ~ X1 + X2 + X3 + X4, data = xy) Residuals: Min 1Q Median 3Q Max -4.1683 -1.3696 -0.7218 2.0893 4.0194 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -30.1423 35.6129 -0.846 0.417130 X1 2.0735 0.4304 4.818 0.000704 *** X2 2.5798 0.7016 3.677 0.004268 ** X3 0.6407 0.4325 1.481 0.169303 X4 1.1014 0.7233 1.523 0.158830 Step 2: Exclude X3 and repeat. summary( lm(Y~X1+X2+X4, data=xy) ) Call: lm(formula = Y ~ X1 + X2 + X4, data = xy) Residuals: Min 1Q Median 3Q Max -4.5279 -1.7084 -0.5657 2.3629 4.4519 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 18.1039 15.1715 1.193 0.25786 X1 1.4740 0.1540 9.569 1.15e-06 *** X2 1.7031 0.3968 4.292 0.00127 ** X4 0.1720 0.3792 0.454 0.65884 Step 3: Exclude X4 and repeat: summary( lm(Y~X1+X2, data=xy) ) Call: lm(formula = Y ~ X1 + X2, data = xy) Residuals: Min 1Q Median 3Q Max -4.6330 -1.3329 -0.6407 2.2510 4.8163 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 24.8652 2.7538 9.029 1.07e-06 *** X1 1.4752 0.1488 9.912 3.94e-07 *** X2 1.5297 0.1030 14.846 4.37e-09 *** Now all partial regression coefficients are significant. ######################################## Exercise 6 ################# 4(a) cor(xy) Y X1 X2 X3 X4 Y 1.0000000 0.6791849 0.86282623 -0.45558037 -0.82482122 X1 0.6791849 1.0000000 0.25194242 -0.80577226 -0.23873447 X2 0.8628262 0.2519424 1.00000000 -0.09089558 -0.96534792 X3 -0.4555804 -0.8057723 -0.09089558 1.00000000 -0.04742297 X4 -0.8248212 -0.2387345 -0.96534792 -0.04742297 1.00000000 ################# 4(b) # Input: # summary( lm(Y~X1+X2+X3+X4, data=xy) ) summary( lm(X1~Y+X2+X3+X4, data=xy) ) summary( lm(X2~X1+Y+X3+X4, data=xy) ) summary( lm(X3~X1+X2+Y+X4, data=xy) ) summary( lm(X4~X1+X2+X3+Y, data=xy) ) ######## # Output: > summary( lm(Y~X1+X2+X3+X4, data=xy) ) ... Multiple R-Squared: 0.9776, Adjusted R-squared: 0.9687 F-statistic: 109.2 on 4 and 10 DF, p-value: 3.313e-08 > summary( lm(X1~Y+X2+X3+X4, data=xy) ) ... Multiple R-Squared: 0.9674, Adjusted R-squared: 0.9544 F-statistic: 74.25 on 4 and 10 DF, p-value: 2.141e-07 > summary( lm(X2~X1+Y+X3+X4, data=xy) ) ... Multiple R-Squared: 0.9917, Adjusted R-squared: 0.9884 F-statistic: 298.9 on 4 and 10 DF, p-value: 2.340e-10 > summary( lm(X3~X1+X2+Y+X4, data=xy) ) ... Multiple R-Squared: 0.9314, Adjusted R-squared: 0.904 F-statistic: 33.97 on 4 and 10 DF, p-value: 8.569e-06 > summary( lm(X4~X1+X2+X3+Y, data=xy) ) ... Multiple R-Squared: 0.9863, Adjusted R-squared: 0.9808 F-statistic: 180 on 4 and 10 DF, p-value: 2.861e-09 ################# 4(c) Partial correlation coefficients: sscp <- cov(xy) d0 <- diag(1/sqrt(diag(sscp))) r <- d0 %*% sscp %*% d0; r # simple correlation coefficients cor(xy) # should equal r c <- solve(r); c # inverse of the correlation matrix d1 <- diag(1/sqrt(diag(c))) p <- -d1 %*% c %*% d1; p # partial correlation coefficients -cov2cor(solve(cor(xy))) # should equal p ######## # Output: > sscp <- cov(xy) > d0 <- diag(1/sqrt(diag(sscp))) > r <- d0 %*% sscp %*% d0; r # simple correlation coefficients [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.6791849 0.86282623 -0.45558037 -0.82482122 [2,] 0.6791849 1.0000000 0.25194242 -0.80577226 -0.23873447 [3,] 0.8628262 0.2519424 1.00000000 -0.09089558 -0.96534792 [4,] -0.4555804 -0.8057723 -0.09089558 1.00000000 -0.04742297 [5,] -0.8248212 -0.2387345 -0.96534792 -0.04742297 1.00000000 > cor(xy) # should equal r Y X1 X2 X3 X4 Y 1.0000000 0.6791849 0.86282623 -0.45558037 -0.82482122 X1 0.6791849 1.0000000 0.25194242 -0.80577226 -0.23873447 X2 0.8628262 0.2519424 1.00000000 -0.09089558 -0.96534792 X3 -0.4555804 -0.8057723 -0.09089558 1.00000000 -0.04742297 X4 -0.8248212 -0.2387345 -0.96534792 -0.04742297 1.00000000 > c <- solve(r); c # inverse of the correlation matrix [,1] [,2] [,3] [,4] [,5] [1,] 44.66198 -30.95560 -55.63327 -10.82757 -24.77095 [2,] -30.95560 30.69834 55.48911 17.39292 36.18703 [3,] -55.63327 55.48911 120.55821 34.37288 85.37034 [4,] -10.82757 17.39292 34.37288 14.58601 29.09498 [5,] -24.77095 36.18703 85.37034 29.09498 72.99934 > d1 <- diag(1/sqrt(diag(c))) > p <- d1 %*% c %*% d1; p # partial correlation coefficients [,1] [,2] [,3] [,4] [,5] [1,] -1.0000000 0.8360126 0.7581710 0.4242227 0.4338247 [2,] 0.8360126 -1.0000000 -0.9121198 -0.8219524 -0.7644270 [3,] 0.7581710 -0.9121198 -1.0000000 -0.8196893 -0.9100163 [4,] 0.4242227 -0.8219524 -0.8196893 -1.0000000 -0.8916415 [5,] 0.4338247 -0.7644270 -0.9100163 -0.8916415 -1.0000000 > cov2cor(solve(cor(xy))) # should equal p Y X1 X2 X3 X4 Y -1.0000000 0.8360126 0.7581710 0.4242227 0.4338247 X1 0.8360126 -1.0000000 -0.9121198 -0.8219524 -0.7644270 X2 0.7581710 -0.9121198 -1.0000000 -0.8196893 -0.9100163 X3 0.4242227 -0.8219524 -0.8196893 -1.0000000 -0.8916415 X4 0.4338247 -0.7644270 -0.9100163 -0.8916415 -1.0000000 ################## Exercise 7 # # R commands # # Cut and paste the definition of `kendall.w()' into your R session # from the file `kendall.w.R' on the class website. # # Alternatively, save "kendall.w.R" to a file and use the R command # > source("kendall.w.R") # data <- scan() 5 4 3 1 2 4 5 3 2 1 5 4 1 2 3 5 3 2 4 1 4 5 2 3 1 5 4 1 3 2 table <- t(matrix(data,nrow=5)) # don't forget to transpose! kendall.w(table) # ...after source()'ing this function ########## # R Output: # Kendall's W for ordinal data W = 0.7166667 p(table) <0.01