/* One-Way layouts: If treatment groups are significantly different by the Kruskal-Wallis test, which PAIRS of treatments are significantly different? If the first treatment group is a control, which INDIVIDUAL treatments are significantly better than the control? Recall that if you do e.g. 50 tests, then even if there are no differences, approximately 0.05*50 = 2 1/2 tests will show up as false positives by chance. This is called the MULTIPLE COMPARISON problem. The goal is to make inferences that are not artifacts of multiple comparisons. */ #include #include #include /* Prototypes for all functions in statlib.c */ #include "statlib.h" #define MAXK 10 /* Maximum possible number of treatments */ #define MAXN 30 /* Maximum number of observations per treatment */ #define MAXTOT (MAXK*MAXN) /* Description of data set and treatment groups: */ char *title="Numbers of glucocorticoid receptor sites per Leukocyte cell"; char *source="Hollander & Wolfe (Table 6.4, p201)"; /* Number of treatment groups and the numbers of observations */ /* in each treatment group */ const int kk=5; const int nn[MAXK] = { 14, 5, 6, 4, 8 }; /* A double array for k treatment groups: */ /* See OneWayLayout.c for a discussion of double arrays in C */ /* and how to initialize them. */ /* Here xx[i][j] means the j-th element of the */ /* i-th row or treatment group. */ /* Treatment-group names */ const char *treatnames[MAXK] = { "Normal", "HCell Anemia", "CLL", "CML", "Acute" }; /* Treatment groups are rows: */ const double xx[MAXK][MAXN] = { { 3500, 3500, 3500, 4000, 4000, 4000, 4300, 4500, 4500, 4900, 5200, 6000, 6750, 8000 }, { 5710, 6110, 8060, 8080, 11400 }, { 2930, 3330, 3580, 3880, 4280, 5120 }, { 6320, 6860, 11400, 14000 }, { 3230, 3880, 7640, 7890, 8280, 16200, 18250, 29900 } }; /* The starting random-numbr seed. */ unsigned long wseed=123456; void do_multcomp(const double xx[MAXK][MAXN], int kk, const int nn[MAXK], const char *treatnames[MAXK]); int main(int argc, char *argv[]) { printf ("Multiple Comparison procedures for One-Way layouts:\n"); /* Helpful messages on segmentation faults and some other fatal errors */ /* This function is in statlib.c */ set_newsignals(); printf ("Kruskal-Wallis and multiple comparison tests.\n"); /* If a number is entered on the command line, use it as the */ /* starting seed instead of wseed. If the starting seed is 0, */ /* initialize the seed from the system clock. */ /* `atol' stands for `ASCII (text) to long (integer)' */ if (argc>=2) wseed=atol(argv[1]); wseed=setrand(wseed); /* If wseed=0, replace by true starting seed */ printf ("\nStarting random-number seed: %lu\n",wseed); printf (" (Enter a number on the command line " "to specify a different seed.)\n"); /* See above for dataset title and source */ printf ("\nData: %s\n%s\n", title, source); do_multcomp(xx,kk,nn,treatnames); show_randnums_used(); return 0; } /* Return with no errors */ double get_hobs (double *mrank, int *nfirst, int *nmax, int kk); double get_wilcoxon(const double *wboth, int nn1, int nn2, double *wstatptr); void do_multcomp(const double xx[MAXK][MAXN], int kk, const int nn[MAXK], const char *treatnames[MAXK]) { int i,j,r, ns,nbig,nsims, ntot,ntg, ncon; int nfirst[MAXK], nmax[MAXK], tgsize[MAXTOT], nbigg[MAXK][MAXK]; double hsum, hh, hobs, tiesum,tiecorr, maxzstar, pp,pwid; double wwdat[MAXTOT], mrank[MAXTOT], rankav[MAXK]; double wmcbuff[2*MAXN], wdobs[MAXK][MAXK]; double zdobs[MAXK][MAXK], wzdobs[MAXK][MAXK]; /* Set `ntot' to be the total number of observations */ ntot=0; for (i=0; i0 && (j%10)==0) printf("\n"); printf (" %5g", xx[i][j]); } printf("\n"); } /* Find the Kruskal-Wallis midranks */ /* See OneWayLayout.c for comments about method */ for (r=i=0; i=hobs-1e-8) nbig++; } pp=(double)nbig/nsims; pwid = 1.960*sqrt(pp*(1-pp)/nsims); printf (" P = %d/%s = %7.5f 95%% CI: (%7.5f, %7.5f, %7.5f)\n", nbig,commnum(nsims), pp, pp-pwid, pp, pp+pwid); /* Two-way multiple comparisons: Given that the Kruskal-Wallis test is significant, which PAIRS OF TREATMENTS are significantly different, allowing for all possible multiple comparisons? */ printf ( "\nWhich of the %d (= k*(k-1)/2) pairwise comparisons between the\n" " %d (=k) treatment groups are significant, allowing for\n" " multiple comparisons?\n", kk*(kk-1)/2, kk); printf ( "Using the Steel-Dwass-Crichlow-Fligner multiple-comparison\n" " procedure described in Section 6.5 of Hollander & Wolfe.\n" "That is, let ObsZ(i,j) be the pairwise Wilcoxon rank-sum score\n" " between all treatment-group pairs, normalized to have mean zero\n" " and variance one. For a permutation of the data, let mstar =\n" " max_{a,b}Z(a,b) where Z(i,j) is the corresponding normalized\n" " Wilcoxon score between pairs of permuted data. Finally, let\n" " P(i,j) be the proportion of permutations for which\n" " mstar >= |ObsZ(i,j)|.\n" "Then P(i,j) are multiple-comparison-corrected P-values for all\n" " treatment pairs (i,j).\n"); /* First, find the observed pairwise Wilcoxon rank-sum statistics (Wij=wdobs[i][j]) and the corresponding large-sample tie-corrected Z-statistics (zdobs[i][j]) Loop over pairs of treatments: */ maxzstar=0.0; /* For display */ for (i=0; imaxzstar) maxzstar=zstar; } printf ("\nFor comparison, the following are " "multiple-comparison-UNCORRECTED P-values.\n"); printf ("Pairwise Wilcoxon W and tie-corrected Z values:\n"); for (i=0; imstar) mstar=fabs(zstar); } /* mstar>=zdobs[i][j] is a `success' for pair (i,j) */ for (i=0; i zdobs[i][j]-(1e-12)) nbigg[i][j]++; }} printf ("Pairwise MULTIPLE-COMPARISON-CORRECTED P-values:\n"); /* starp(p) returns (*) for P<0.05 and (**) for P<0.01 */ for (i=0; irmax) rmax=zz; } zrange=rmax-rmin; /* zrange>=wzdobs[i][j] is a `success' for pair (i,j) */ for (i=0; iwzdobs[i][j]-(1e-12)) nbigg[i][j]++; } for (i=0; imaxzstar) maxzstar=zstar; } printf ( "\nFor comparison, the following are " "multiple-comparison-UNCORRECTED P-values.\n" "Pairwise Wilcoxon W and Z values:\n"); for (i=1; imstar) mstar=zstar; } /* Add to counts of mstar >= Z(1,i) */ for (i=1; i zdobs[0][i]-(1e-12)) nbigg[0][i]++; }} printf ("Pairwise MULTIPLE-COMPARISON-CORRECTED P-values:\n"); for (i=1; infirst[i]) { double rsum=0.0; for (j=nfirst[i]; jMAXTOT) /* Then buffers will overflow */ { printf ("\nERROR IN SUBROUTINE MakeRanks!\n"); printf ("Array length: %d Buffer sizes: %d\n", ntot,MAXTOT); exit(1); } /* Copy wwdat[] to buffer wtemp[] so that it won't be corrupted */ /* Also, initialize mranptr[] array to point to values in mrank[] */ /* The next step will sort (wtemp[r],mranptr[r]) together. */ /* The mranptr[r] value means that this subroutine will always know */ /* where to put the midrank of wwdat[r] */ for (r=0; r 1 */ for (i=0; i1 && tgsize!=NULL) tgsize[ntg++]=ts; } /* Keep track of tie-group sizes */ /* If the user doesn't want the value ntg, don't insist. */ if (ntgptr!=NULL) *ntgptr=ntg; }