/* Extended treatment of Miller jackknife procedure in the text. Test whether the variances of two samples are significantly different, not assuming that the samples are normally distributed Uses a stratified jackknife (that is, within each sample) for the sample log-standard-deviation. Compare with a stratified bootstrap */ #include #include #include /* Prototypes for functions in statlib.c : Compile by e.g. gcc -o ProgName Progname.c statlib.c with ProgName.c, statlib.c, and statlib.h in the default directory. */ #include "statlib.h" /* Tritiated water diffusion data across human chorioamnion */ /* Table 4.1 in Hollander & Wolfe */ /* xx[] values are at term. yy[] values are at 12-26 weeks. */ const char *title_data= "Diffusion of tritiated water (that is, water containing H^3)\n" " across human chorioamnion as a test of placental permeability\n" "The first sample is at term (approximately 39 weeks)\n" " The second sample is at 12-26 weeks.\n" "Table 4.1 (p110) in Hollander & Wolfe, 2nd 3dn"; #define MAXN 30 /* Maximum possible sample size */ #define NBOOT 10000 /* Do NBOOT resamples in stratified bootstrap */ /* Sample at term (control sample): */ const char *xname="At term (approx 39 weeks)"; const int mm=10; const double xx[MAXN] = { 0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46 }; /* Sample at 12-26 weeks (test sample): */ const char *yname="At 12-26 weeks"; const int nn=5; const double yy[MAXN] = { 1.15, 0.88, 0.90, 0.74, 1.21 }; /* Number of bootstrap replications */ int nboot=NBOOT; unsigned long startseed=123456; /* Subroutines to keep us from repeating standard blocks of code. */ /* The keyword `const' means that this function promises not to change */ /* any of the values pointed to. (If it tries to, the compiler will */ /* crash.) This means that there is one less thing for the calling */ /* function to worry about. */ void do_miller_jackknife(const double *xx, int mm, const double *yy, int nn, double gamma_obs); void do_bootstrap(const double *xx, int mm, const double *yy, int nn, double gamma_obs); int main(int argc, char **argv) { int i,j; double xmean,xvar,xstdev, ymean,yvar,ystdev; double gamma_obs; unsigned long wseed; printf ( "Use the Miller method to test whether or not the VARIANCES of\n" " two samples are the same (see Section 5.2 in text).\n" "Miller's idea is to apply a `stratified' jackknife to the\n" " log sample variance.\n" "Compare Miller's P-value and 95%% confidence interval with a\n" " stratified bootstrap P-value and 95%% confidence interval.\n"); /* Title data */ printf ("\n%s\n", title_data); /* Note: If ``var'' is any variable, ``&var'' is a POINTER to that */ /* variable, or equivalently the ADDRESS of that variable. */ /* A function in C cannot change the value of a variable in the */ /* calling function unless it is given its address in memory. */ /* The function getmeanvar() fills xmean,xvar with the */ /* sample mean and sample variance of the array xx[]. */ /* getmeanvar() is in statlib.c */ getmeanvar(xx,mm, &xmean,&xvar); xstdev=sqrt(xvar); printf ("\n%s (Xs, m=%d)\n", xname,mm); printf ("(Xbar=%g, SampStdev=%.3f, Sampvar=%.3f)\n", xmean,xstdev,xvar); for (i=0; i=2) startseed=atol(argv[1]); wseed=setrand(startseed); printf ("\nStarting random-number seed: %lu\n",wseed); printf ("\nBOOTSTRAP PROCEDURE (n=%s bootstrap replications):\n", commnum(nboot)); do_bootstrap(xx,mm, yy,nn, gamma_obs); show_randnums_used(); /* Remind the user */ printf ("Starting random-number seed: %lu\n",wseed); return 0; } /* ``Getlogvar'' returns the log of the sample variance */ /* This is the estimator that is being jackknifed and bootstrapped: */ /* Again, `const' means that the subroutine promises not to change */ /* any of the values in the array. */ double getlogvar(const double *xx, int num) { int i; double dsum,dmean, dvar; for (dsum=0.0, i=0; i0 && (i%5)==0) printf("\n"); printf (" %.4f", ps[i]); } printf("\n"); } void do_miller_jackknife(const double *xx, int mm, const double *yy, int nn, double gamma_obs) { /* Two sets of pseudo-values */ double aa[MAXN], bb[MAXN]; double abar,bbar, savar,sbvar, qsvar,qsdev, qq,gamma; printf( "First, get the jackknife pseudovalues for the log sample variance\n" " for each sample:\n"); /* Collect the pseudovalues for xx[] into aa[] and yy[] into bb[], */ /* following the notation in the text */ do_jackknife("LogSampVar",aa,"Sample 1",xx,mm); do_jackknife("LogSampVar",bb,"Sample 2",yy,nn); /* Find the means and sample variances of the two sets of pseudovalues */ /* Remember that aa[],bb[] are pseudovalues of LOG sample variances, */ /* not of the sample variances themselves. */ getmeanvar(aa,mm, &abar,&savar); getmeanvar(bb,nn, &bbar,&sbvar); /* The strategy is to use the two sets of pseudo-values as */ /* two independent samples with the sample variance */ /* The unpooled variance estimator for Stdev(abar-bbar) is: */ qsvar=(savar/mm)+(sbvar/nn); qsdev=sqrt(qsvar); printf ( "\nJACKKNIFE TEST of H_0:gamma=1 for gamma=Var(X)/Var(Y):\n" "Sample means and sample variances of the two samples of\n" " pseudovalues of log sample variances:\n" " Abar=%.3f Bbar=%.3f S2Avar=%.3f S2Bvar=%.3f\n" " Unpooled variance of Abar-Bbar=%.3f Unpooled StDev=%.3f\n", abar,bbar, savar,sbvar, qsvar,qsdev); /* The two-sample Z statistic for pooled variances: */ /* Recall that aa[],bb[] are log variances, not the variances */ /* themselves, so that gamma=1 corresponds to qq=0. */ qq=(abar-bbar)/qsdev; printf ("V1=S2Avar/m (etc) and the Miller two-sample Z-statistic:\n"); printf (" V1=%.3f V2=%.3f Z=%.3f P=%.4f (2-sided)\n", savar/mm, sbvar/nn, qq, normpval(qq)); /* Convert `abar-bbar' to gamma=Var(X)/Var(Y) */ gamma=exp(abar-bbar); printf ( "\nFor gamma(X,Y)=s2(X)/s2(Y):\n" "Bias-corrected gamma estimator and symmetric 95%% CI for gamma:\n" " %.4f (%.4f, %.4f) Observed gamma=%.4f\n", gamma, exp(abar-bbar-1.960*qsdev), exp(abar-bbar+1.960*qsdev), gamma_obs); } void do_bootstrap(const double *xx, int mm, const double *yy, int nn, double gamma_obs) { int i,j,mb, ns, nbig, kk,klow,khigh; double gammboot[NBOOT], bootmed, bootpval, gamma0; printf ("Use a `stratified bootstrap' to generate an array\n"); printf (" of gamma(star) bootstrap resampled values for\n"); printf (" gamma(star) = s(X)^2(star)/s(Y)^2(star)\n"); printf("Filling the bootstrap ``gamma(star)'' array:\n"); for (ns=0; ns1e-8, which should be safe */ for (j=0; j0 && (i%5)==0) printf("\n"); printf (" %.3f",gammboot[i]); } printf("\n"); printf("The last 10 gamma(star) values are:\n"); for (i=0; i<10; i++) { if (i>0 && (i%5)==0) printf("\n"); printf (" %.3f",gammboot[nboot-10+i]); } printf("\n"); /* Calculate the bootstrap median bias */ for (mb=i=0; i0) klow=kk-1; /* k-th from the bottom */ khigh=nboot-1-klow; /* k-th from the top */ printf ( " %.4f (%.4f, %.4f) Observed gamma=%.4f\n", bootmed, gammboot[klow], gammboot[khigh], gamma_obs); printf ("\nBootstrap P-value for H_0:gamma=gamma0=1 " "(gamma(observed)=%g):\n",gamma_obs); gamma0=1; nbig=0; if (gamma0<=bootmed) /* gamma0=1 is on lower tail of distribution */ { for (i=0; i=gamma_obs) nbig++; } printf ("(Twice the proportion of resampled gammas %s %g.)\n", (gamma0<=bootmed) ? "<=" : ">=", gamma0); bootpval=(double)(2*nbig)/nboot; if (bootpval>1.0) bootpval=1.0; printf (" P = 2*%d/%s = %.4f (2-sided)\n", nbig,commnum(nboot), bootpval); }