/* Cochran test for twoway layouts with 0,1 (dichotomous) data Suppose that we are given data X_ij where each X_ij is 1 or 0 (`success' or `failure') for n blocks or subjects (1 le i le n) and k treatments (1 le j le k). Suppose that we want to test if there is a treatment effect. That is, if some treatment groups have more successes than others to a statistically significant extent. Let L_j = Sum(i=n) X_ij be the number of successes for the j-th treatment. Since Sum(j=1,k) (L_j-Lbar)^2 = Sum(j=1,k) L_j^2 - k Lbar^2 where Lbar = (1/k)Sum(j=1,k) L_j = (1/k)Sum(j=1,k) Sum(i=1,n) X_ij = M/k where M is the total number of successes, a natural score for different effects of treatments is T = Sum(j=1,k) L_j^2 A natural permutation structure is to take independent permutations of each block as in the Friedman test. Cochran showed that the statistic Q = k(k-1) Sum(j=1,k) (L_j-Lbar)^2 / (kM - Sum(i=1,n) B_i^2) has a chi-square distribution with k-1 degrees of freedom under Friedman-like permutations for large n, so that Q can be used for a large-sample approximation of the P-value. In the formula for Q, Q, B_i = Sum(j=1,k) X_ij is the number of successes in the i-th block, M = Sum (i=1,n) B_i = Sum (j=1,k) L_j is the total number of successes, and Lbar = Sum(j=1,k) L_j/k = M/k as above. Curiously, this test is exactly equivalent to Friedman's test except that T and Q are easier to compute for dichotomous data. The key is that the within-block midranks of the 0s and 1s in the i-th block are equal to R_ij = (1+Z_i)/2 (X_ij=0) = (1+Z_i+k)/2 (X_ij=1) where Z_i = k-B_i is the number of failures (or 0s) in the i-th block. This is because the 0 scores have tentative ranks 1 to Z_i while the 1 scores have tentative ranks Z_1+1 to k. Then the Friedman rank sums R_+j = Sum(i=1,n) R_ij = (Sum (i=1,n) (1+Z_i)/2) + (k/2) L_j = n(k+1)/2 - (k/2)(L_j - Lbar) since Lbar=M/k so that (R_+j - Rbar) = (k/2) (L_j - Lbar) Similarly, the usual large-sample approximation with tie correction for the Friedman statistic S' is exactly equal to Q. The denominator in Q results from the Friedman-test tie correction and a little bit of algebra. */ #include #include #include /* Prototypes for functions in statlib.c Compile by e.g. gcc -o ProgName Progname.c statlib.c with ProgName.c, statlib.c, and statlib.h in the default directory. */ #include "statlib.h" #define MAXK 10 /* Maximum possible number of treatments */ #define MAXN 50 /* Maximum possible number of subjects/blocks */ char *title_data = "Success for four methods of soothing newborn babies\n" "Source: Lehmann ``Nonparametrics: Statistical methods based on ranks''\n" " pages 267-270\n" "See Lehmann p283 for the full data set."; /* The number of treatment groups and the number of blocks (subjects) */ const int kk=4, nn=12; /* The default starting random-number seed */ unsigned long wseed=408408; /* Number of simulations to estimate P-values */ int nsims=100000; /* See OneWayLayout.c for comments about ARRAYS in C and how they are initialized. As in TwoWayFriedman.c, we store treatment groups as columns and blocks (subjects) as rows Note that we use an integer array here, not a double array */ /* Treatment group name */ const char *treatname[MAXK] = { "WarmWater", "Rocking", "Pacifier", "Sound" }; /* Shorter column heading names */ const char *treatname_short[MAXK] = { "WrmWa", "Rockg", "Pacif", "Sound" }; const int xx[MAXN][MAXK] = { { 0, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }, { 0, 1, 1, 1 }, { 1, 1, 0, 0 }, { 0, 1, 0, 1 }, { 0, 1, 1, 1 }, { 0, 0, 1, 0 }, { 1, 1, 0, 1 }, { 1, 1, 1, 1 } }; /* WARNING: Data here are in an integer array, NOT a double array */ /* as in other handouts */ void show_cochran_data(const int xx[MAXN][MAXK], int kk, int nn, const char *treatname[MAXK]); void do_cochran(const int xx[MAXN][MAXK], int kk, int nn); void show_friedman_data(const int xx[MAXN][MAXK], int kk, int nn, const char *treatname[MAXK]); void do_friedman(const int xx[MAXN][MAXK], int kk, int nn); int main(int argc, char **argv) { int i; printf ("\nIllustration of Cochran test for two-way layouts\n" " with 0,1 (dichotomous) data\n"); /* Helpful messages on segmentation faults and some other fatal errors */ /* This function is in statlib.c */ set_newsignals(); /* Text for this example */ printf ("\n%s\n", title_data); printf ("k=%d treatment groups and n=%d subjects:\n", kk,nn); /* Enter `ProgramName ' to set starting RN seed= */ if (argc>=2) wseed=atol(argv[1]); wseed=setrand(wseed); /* If wseed=0, replace by true starting seed */ printf ("\nStarting random-number seed: %lu\n",wseed); printf ("(Enter a number on the command line " "to specify a different seed.)\n"); printf ("\nTable of treatment group headings:\n"); for (i=0; i= l2sum) nbig++; } /* Display the simulated P-value with a 95% confidence interval */ pp=(double)nbig/nsims; pwid=1.960*sqrt(pp*(1-pp)/nsims); printf (" P = %d/%d = %6.4f 95%% CI (%5.3f, %5.3f, %5.3f)\n", nbig, nsims, pp, pp-pwid, pp, pp+pwid); } void show_friedman_data(const int xx[MAXN][MAXK], int kk, int nn, const char *treatname[MAXK]) { char nbuff[88]; int i,j; double midr[MAXN][MAXK]; /* Find Friedman (within block) ranks (so 1 <= R(i,j) <= k) */ printf ("\nData with Friedman (within-block) ranks:\n"); for (i=0; i0) sstc /= (1.0 - tiesum/((double)nn*(kk-1)*kk*(kk+1))); printf ("\nFriedman statistic S'=%7.5f P=%6.4f (%d df, %d tiegroup%s)\n", sstc, pvchisq(kk-1,sstc), kk-1, totntg, (totntg==1)?"":"s"); if (totntg>0) { printf ("WITH NO TIE CORRECTION:\n"); printf ("Friedman statistic S=%7.5f P=%6.4f (%d df)\n", ss0, pvchisq(kk-1,ss0), kk-1); } /* Do simulation for Friedman statistic */ printf ("\nSimulating the Friedman P-value using %d permutations:\n", nsims); printf (" Friedman score (for simulations): %g\n", obscore); /* Carry out the simulations using Friedman permutations */ nbig=0; for (ns=0; ns=obscore) nbig++; } /* Display the simulated P-value with a 95% confidence interval */ pp=(double)nbig/nsims; pwid=1.960*sqrt(pp*(1-pp)/nsims); printf (" P = %d/%d = %6.4f 95%% CI (%5.3f, %5.3f, %5.3f)\n", nbig, nsims, pp, pp-pwid, pp, pp+pwid); }