/* How to handle nonparametric One-Way layouts: */ /* Kruskal-Wallis and Jonckheere-Terpstra tests. */ #include #include #include /* Prototypes for all functions in statlib.c */ #include "statlib.h" /* Compile with e.g. */ /* gcc -Wall OneWayLayout.c o OneWayLayout statlib.c */ /* with both statlib.h and statlib.c in the same directory as OneWayLayout.c */ /* Upper bounds for MAXK: The number of treatment groups (k) MAXN: The number of observations in a treatment group (nn[i]) MAXTOT: The total number of observations: */ #define MAXK 5 #define MAXN 20 #define MAXTOT 100 /* The starting random-numbr seed. */ unsigned long wseed=408408; /* The basic data is kk : the number of treatment groups nn[i] : an integer array with the treatment-group sizes xx[i][j]: the data itself, arranged as kk rows (0 <= i < kk) where the i-th rows has nn[i] entries. Here xx[i][j] is the j-th entry in the i-th row. Here the rows are all of the same length, but this is not true in general. */ int kk=3; int nn[MAXK] = { 5, 5, 5 }; double xx[MAXK][MAXN] = { { 53, 31, 139, 5, 16 }, { 75, 64, 205, 97, 139 }, { 54, 139, 194, 1253, 275 } }; /* NOTE: double xx[K][N]; (for example) sets aside room for K rows, where each row has room for N doubles. Syntactically, xx[][] is initializing by setting double xx[5][20] = { Row1, Row2, Row3 }; with (for example) Row1 = { 53, 31, 139, 5, 16 } Note that (i) Within a row, all numbers except the last are followed by commas. The last number is NOT followed by a comma. (This is the same as initialized a one-dimensional array.) (ii) All rows except the last are followed by commas (OUTSIDE OF the braces that define the row). The last rom is NOT followed by a comma. (iii) The last row is followed by a closing brace and a semicolon. In global arrays (that is, defined outside of any function), uninitialed values are set to 0.0. The sets of braces around rows tell C to fill out the rows with zeroes. If you give values for all matrix entries, then you don't have to enclose the rows in braces. For example, double xx[3][3] = { 1, 2, 4, 2, 4, 8, 4, 8, 17 }; has the same effect as double xx[3][3] = { { 1, 2, 4 }, { 2, 4, 8 }, { 4, 8, 17 } }; */ /* Prototypes: */ void do_kruskal_wallis (double *mrank, int ntot, int *nfirst, int *nmax, int kk, int *tgsize, int ntg); double get_hobs (double *mrank, int *nfirst, int *nmax, int kk); void do_sim_kruskal_wallis (double hobs, double *mrank, int ntot, int *nfirst, int *nmax, int kk, int nsims); void do_jonckheere_terpstra (double *xdat, int ntot, int *nfirst, int *nmax, int kk, int *tgsize, int ntg); void do_sim_jonckheere_terpstra (double *xdat, int ntot, int *nfirst, int *nmax, int kk, int nsims); int main(int argc, char *argv[]) { int i,j,r, ntot, ntg; int tgsize[MAXTOT]; /* Non-singlet tie-group sizes */ double hobs; double xdat[MAXTOT]; /* The data in a linear order */ double mrank[MAXTOT]; /* KW midranks of xdat[][] in a linear order */ /* The range of the i-th treatment group in xdat[] and mrank[] */ int nfirst[MAXK], nmax[MAXK]; printf ("Nonparametric one-way layouts:\n"); printf ("Kruskal-Wallis, Jonckheere-Terpstra, and " "multiple comparison tests.\n"); /* Argc is the number of words on the command line. The arguments themselves are argv[0], argv[1], ..., argv[argc-1] By convention, argc >=1 and argv[0] is the program name, so that a command-line argument was entered only if argc<1 If a number is entered on the command line, use it as the starting seed. Recall that 0 means to initializes the seed from the system clock. If a number is not entered on the command line, use the previous value of wseed, which is as above. */ if (argc>=2) wseed=atol(argv[1]); wseed=setrand(wseed); /* If wseed=0, replace by true starting seed */ printf ("\nStarting random-number seed: %lu\n",wseed); printf (" (Enter a number on the command line " "to specify a different seed.)\n"); /* Display the data */ printf ("\nThe data:\n"); /* Count the total number of observations */ for (ntot=i=0; i0) { for (j=nfirst[i]; j0) { /* Find the average rank */ double rsum=0.0, rdif; for (j=nfirst[i]; jnfirst[i]) { double rsum=0.0; for (j=nfirst[i]; j=hobs-1e-8) nbig++; } pp=(double)nbig/nsims; pwid = 1.960*sqrt(pp*(1-pp)/nsims); printf (" P = %d/%s = %6.4f 95%% CI: (%6.4f, %6.4f)\n", nbig,commnum(nsims), pp, pp-pwid, pp+pwid); } /* Return J = value of Jonckheere-Terpstra statistic */ /* Note that J is essentially the same as the Kendall (correlation) statistic between data values and the treatment group number */ double get_jtstat(double *xdat, int *nfirst, int *nmax, int kk) { int i,j; double jj=0.0; /* Sum over pairs of treatment groups */ for (i=0; i0) { int g; double tiesum1, tiesum2, tiesum3, nsum, nsum2; tiesum1=tiesum2=tiesum3=0.0; for (g=0; g=jscore) nbig++; } pp=(double)nbig/nsims; pwid = 1.960*sqrt(pp*(1-pp)/nsims); printf (" P = %d/%s = %7.5f 95%% CI: (%7.5f, %7.5f) (2-sided)\n", nbig,commnum(nsims), pp, pp-pwid, pp+pwid); }