/* How can you estimate a variance, and also get a confidence interval for the variance, if the observations are not normally distributed? */ #include #include #include /* Compile by e.g. gcc -o ProgName Progname.c statlib.c with ProgName.c, statlib.c, and statlib.h in the default directory. */ #include "statlib.h" /* The `jackknife' and the `bootstrap' are two general methods for obtaining a confidence interval for nearly any parameter based on nearly any estimator, with an attempt to obtain a bias-corrected estimate of the parameter as a bonus. This program illustrates both methods for finding a confidence interval for the standard deviation of a sample that happens to be from a normal distribution. In that case, the sample variance is an unbiased estimator of the true variance and there is a classical symmetric confidence interval for the standard deviation based on quantiles of the chi-square distribution. Hence we will have something to compare the `jackknife' and `bootstrap' estimators with. We apply the `jackknife' method to both the standard deviation itself and to the log standard deviation, not forgetting to exponentiate the jackknifed interval for the log standard deviation afterwards. The `log standard deviation' method is to provide some protection against outliers. The (percentile) bootstrap is `scale free' or `fully nonparametric' and gives the same results before and after any monotonic one-one transformation of the data. This will give us four confidence intervals to compare: The classical, two jackknifes, and the bootstrap. In this case, the four confidence intervals turn out to be similar, although the `log standard deviation' jackknife seems to do the best of the non-exact methods. However, the `jackknife' and `bootstrap' can be applied to nearly any functional, while even for the standard deviation the classical method is exact only for normally-distributed random variables. Enter the program name followed by `0' or an explicit seed for a different `original' sample to analyze. Note that while the sample standard deviation is highly variable, the four estimates and the four confidence intervals track one another very closely. */ /* Default values for the sample size, the mean and standard deviation */ /* of the sample, and the starting random-number seed: */ #define MAXN 100 /* Largest possible sample size value */ #define NBOOT 1000 /* Number of bootstrap replications */ int nn=30, nboot=NBOOT; /* nn must be MAXN or less */ unsigned long wseed=23456; /* Starting random-number seed */ double Mu=3.0; /* The mean and standard deviation */ double Sdev=5.0; /* of the samples */ /* If we wanted to put in additional work, we could use these values as defaults and allow the option of changing them on the command line, with a syntax perhaps something like JknifeBstrap -n50 -b10000 -mu14.7 -w44444 to set nn=50, nboot=10000, startseed=44444, etc. `mu' `Mu' and `MU' are different symbols to C, so that all can have different values in a program. Here MU is changed by the C preprocessor to the string `3.0', so that MU is not (and could not be) a variable at all. */ /* Prototypes of functions used in this program: */ /* Display an array in %7.3f format with 8 values per line: */ /* `const' means that the function promises not to change the values */ /* in the array */ void show_array(int nn, const double *array); /* Save the values for a particular confidence interval in a table to be displayed later */ void table_ci (const char *descript, double estvalue, double clow, double chigh); void table_ci_tstat (const char *descript, double estvalue, double clow, double chigh); /* Show the tabled CI values */ void show_summary(void); /* The main (starting) function itself: */ int main(int argc, char *argv[]) { /* The syntax `*xvar' below means that `xvar' is a POINTER to a double, */ /* and is not a double itself. We will later store the ADDRESS of an */ /* array that is large enough to store NN doubles in `xvar', so that */ /* xvar[0], xvar[1], ..., xvar[NN-1] will be valid doubles. */ /* If `xvar' is a pointer to a double value, then we can fetch that */ /* value by saying either *xvar or xvar[0] . */ /* The language C usually does not distinguish between the name of */ /* an array and a pointer to the first element in that array, */ /* but there are rare instances in which it does. */ int i,j; double xvar[MAXN]; /* Variables for calculating standard deviations and their CIs */ double s1,s2,ss, Obs_xbar,Obs_sdev,Obs_logsdev; double sdev,logsdev, tcrit, sbar,swid; double p1,pmed,p2, clow,cmed,chigh, expclow,expsbar,expchigh; /* Variables used in jacknife calculations: */ double jdel_one[MAXN], jdel_logone[MAXN]; double pseudoval[MAXN], pseudologval[MAXN]; /* Variables used in bootstrap calculations: */ int ns,m, kk,klow,khigh; double bootarray[NBOOT]; double bmed, blow, bhigh; printf ( "An example of using the `jackknife' and `bootstrap' estimation:\n" " Finding bias-corrected estimates and confidence intervals for\n" " the true standard deviation from a sample.\n"); /* Enter `ProgName NNNN' (for example, Progname 1234) to initialize the random-number seet at NNNN instead of the default value */ if (argc>=2) wseed=atol(argv[1]); wseed=setrand(wseed); /* Returns `wseed' if wseed!=0 */ printf ("The starting random-number seed is %lu\n", wseed); /* Given an example of `nn' random N(0,1)s (standard normals): */ /* normrand() is a function in statlib.c */ printf ("\nA sample of %d standard normal R.V.s is:\n",nn); for (i=0; i0 && (i%8)==0) printf("\n"); printf (" %7.4f", xx); } printf("\n"); /* Fill xvar[] with `nn' N(Mu,Sdev^2)s and then display them.*/ /* If X is N(0,1), recall that Y = mu+C*X is N(mu,C^2). That is, */ /* Y has mean E(Y)=mu and standard deviation C. */ printf ( "\nA sample of %d normal variables with mean=%g and st.dev.=%g is\n", nn,Mu,Sdev); /* Generate the random sample that will be analyzed */ for (i=0; i0) klow=kk-1; /* k-th from the bottom */ khigh=nboot-1-klow; /* k-th from the top */ blow = bootarray[klow]; bhigh = bootarray[khigh]; printf ("Bootstrap median: %.4f 95%% Percentile CI: (%7.4f, %7.4f)\n", bmed, blow,bhigh); /* Remember these values */ table_ci("Bootstrap", bmed, blow,bhigh); /* Display the information that we stored in the 4 SAMPESTS structures */ /* in a single table: */ printf ("\nSUMMARY (Estimator of sdev, 95%% symmetric CI for sdev):\n"); show_summary(); /* Repeat basic parameters for clarity */ printf ("\nSample size: %d Number of bootstrap replications: %s\n", nn, commnum(nboot)); printf ("Starting random-number seed: %lu\n", wseed); printf ("\nThe `Sdev Jackknife' CI seems to track the exact CI best,\n" " even for different starting seeds and different numbers of\n" " bootstrap replication.\n"); show_randnums_used(); return 0; } /* Return with no errors */ /* Display an array in %7.3f format with 8 values per line: */ void show_array(int nn, const double *array) { int i; for (i=0; i0 && (i%8)==0) printf("\n"); printf (" %7.3f", array[i]); } printf("\n"); } #define MAXT 20 int ntab; const char *table_descript[MAXT]; double table_estvalue[MAXT]; double table_ci_low[MAXT]; double table_ci_high[MAXT]; int table_tstat[MAXT]; void table_ci (const char *descript, double estvalue, double clow, double chigh) { if (ntab