Example R programs and commands 20. Chi-squared tests for independence in contingency tables # All lines preceded by the "#" character are my comments. # All other left-justified lines are my input. # All other indented lines are the R program output. ############################################################ # CHI-SQUARED test for independence # # Read the contingency table, r=2 rows (X values), c=3 columns (Y values): data<-scan() 163 135 71 86 77 40 tab <- t(matrix(data, nrow=3,ncol=2)) # transpose, as usual tab # view the table to make sure it is right [,1] [,2] [,3] [1,] 163 135 71 [2,] 86 77 40 chisq.test(tab) # performs a test of independence for matrix input. Pearson's Chi-squared test data: tab X-squared = 0.1769, df = 2, p-value = 0.9153 # ==> DO NOT REJECT the null hypothesis that X,Y are independent # NOTE: the transposed table gives the same result, as X and Y are # independent if and only if Y and X are independent. chisq.test(t(tab)) Pearson's Chi-squared test data: t(tab) X-squared = 0.1769, df = 2, p-value = 0.9153 ############################################################ # LOG-LIKELIHOOD computation # Extract the observed and expected values. toi<-chisq.test(tab) # store the whole output data structure summary(toi) Length Class Mode statistic 1 -none- numeric parameter 1 -none- numeric p.value 1 -none- numeric method 1 -none- character data.name 1 -none- character observed 6 -none- numeric expected 6 -none- numeric residuals 6 -none- numeric # Compute the log-likelihood statistic directly from these parts: g<-2*sum(toi$observed*log(toi$observed/toi$expected)) g # Compute the p-value of g assuming it has the Chi-squared density: nu<-prod(dim(tab)-1) # this is (nrow-1)*(ncol-1) == (r-1)(c-1) 1-pchisq(g,nu) ############################################################ # YATES correction for CHI-SQUARED test in 2x2 contingency tables data<-scan() 163 135 86 77 tab <- matrix(data, nrow=2,ncol=2) # transpose or not, gives same result tab # view the table chisq.test(tab, correct=TRUE) # with Yates correction (default) chisq.test(tab, correct=FALSE) # without Yates correction ############################################################ # YATES correction for LOG-LIKELIHOOD test in 2x2 contingency tables data<-scan() 163 135 86 77 tab <- matrix(data, nrow=2,ncol=2) # transpose or not, gives same result tab # view the table ycmat <- matrix(0.5*c(-1,1,1,-1),nrow=2,ncol=2) # correction matrix tabc <- tab+sign(det(tab))*ycmat # add or subtract the corrections as needed tabc # view the corrected table toic<-chisq.test(tabc, correct=FALSE) # Do not use the Chi-sq Yates correction # Compute the log-likelihood statistic directly from these parts: g<-2*sum(toic$observed*log(toic$observed/toic$expected)) g # Compute the p-value of g assuming it has the Chi-squared density: nu<-prod(dim(tab)-1) # this is (nrow-1)*(ncol-1) == (r-1)(c-1) 1-pchisq(g,nu) ############################################################ # HIGHER-DIMENSIONAL contingency tables; test of independence: # Read the count data for this problem count<-scan() 12 34 23 4 47 11 35 31 11 34 10 18 12 32 9 18 13 19 12 12 14 9 33 25 # Create factor tags:r=rows, c=columns, t=tiers r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1","r2", "r3", "r4"))); r c <- factor(gl(3, 1, 2*3*4, labels=c("c1","c2", "c3"))); c t <- factor(gl(2, 3, 2*3*4, labels=c("t1","t2"))); t # Cross-tabulation of counts: xtabs(count~r+c+t) # all three factors xtabs(count~r+c) # RC: sum over tiers xtabs(count~r+t) # RT: sum over columns xtabs(count~c+t) # CT: sum over rows xtabs(count~r) # R: sum over columns and tiers xtabs(count~c) # C: sum over rows and tiers xtabs(count~t) # T: sum over rows and columns # 3-way Chi squared test of independence summary(xtabs(count~r+c+t)) # NOTE: The number of degrees of freedom in this 3-D case is the sum # of the degrees of freedom for all 3-way and 2-way interactions: nr <- 4; nc<-3; nt<-2; # number of rows, columns, tiers, respectively dof <- (nr-1)*(nc-1)*(nt-1)+(nr-1)*(nc-1)+(nr-1)*(nt-1)+(nc-1)*(nt-1) # 2-way Chi squared test of partial independence: C versus T summary(xtabs(count~c+t)) # dof=(nc-1)*(nt-1) # 2-way Chi squared test of partial independence: R versus T summary(xtabs(count~r+t)) # dof=(nr-1)*(nt-1) # 2-way Chi squared test of partial independence: R versus C summary(xtabs(count~r+c)) # dof=(nr-1)*(nc-1) # 1-way Chi squared test of no dependence on R: chisq.test(xtabs(count~r)) # dof= nr-1 # 1-way Chi squared test of no dependence on C: chisq.test(xtabs(count~c)) # dof= nc-1 # 1-way Chi squared test of no dependence on T: chisq.test(xtabs(count~t)) # dof= nt-1 #### Output and interpretation (at significance level alpha=0.05): # 3-way Chi squared test of independence > summary(xtabs(count~r+c+t)) Call: xtabs(formula = count ~ r + c + t) Number of cases in table: 478 Number of factors: 3 Test for independence of all factors: Chisq = 102.17, df = 17, p-value = 3.514e-14 # # ==> reject H0 in favor of # HA: r,c,t are NOT mutually independent # > # 2-way Chi squared test of partial independence: C versus T > summary(xtabs(count~c+t)) Call: xtabs(formula = count ~ c + t) Number of cases in table: 478 Number of factors: 2 Test for independence of all factors: Chisq = 2.3704, df = 2, p-value = 0.3057 # # ==> do not reject H0: c,t are mutually independent # > > # 2-way Chi squared test of partial independence: R versus T > summary(xtabs(count~r+t)) Call: xtabs(formula = count ~ r + t) Number of cases in table: 478 Number of factors: 2 Test for independence of all factors: Chisq = 10.057, df = 3, p-value = 0.01809 # # ==> reject H0 in favor of # HA: r,t are NOT mutually independent # > # 2-way Chi squared test of partial independence: R versus C > summary(xtabs(count~r+c)) Call: xtabs(formula = count ~ r + c) Number of cases in table: 478 Number of factors: 2 Test for independence of all factors: Chisq = 58.67, df = 6, p-value = 8.363e-11 # # ==> reject H0 in favor of # HA: r,c are NOT mutually independent # > # 1-way Chi squared test of no dependence on R: > chisq.test(xtabs(count~r)) # dof= nr-1 Chi-squared test for given probabilities data: xtabs(count ~ r) X-squared = 8.3264, df = 3, p-value = 0.03973 # # ==> reject H0 in favor of # HA: count is NOT independent of R # > # 1-way Chi squared test of no dependence on C: > chisq.test(xtabs(count~c)) # dof= nc-1 Chi-squared test for given probabilities data: xtabs(count ~ c) X-squared = 26.226, df = 2, p-value = 2.019e-06 # # ==> reject H0 in favor of # HA: count is NOT independent of C # > # 1-way Chi squared test of no dependence on T: > chisq.test(xtabs(count~t)) # dof= nt-1 Chi-squared test for given probabilities data: xtabs(count ~ t) X-squared = 0.033473, df = 1, p-value = 0.8548 # # ==> DO NOT reject H0: count is independent of T #