/* Comparison of 3 different methods of linear regression: (1) Least-squares (2) ``Theil'' linear regression (3) Rank regression */ #define PROG_NAME "RANKREGRESS" #define YOUR_NAME "YOURNAME" #include #include #include /* Prototypes for functions in statlib.c : Compile by e.g. gcc -o ProgName Progname.c statlib.c with ProgName.c, statlib.c, and statlib.h in the default directory. */ #include "statlib.h" #define DTOL 1e-12 /* Minimum floating-point error */ #define MAXN 50 /* Allow for up to this many pairs */ /* The `const' says that the program should not change these values */ /* without a compiler crash or at least a very strong warning. */ /* Text for this example */ const char *title_data= "Prepacked Lubricants and Survival Times for Engines\n" "(Source of data unknown)"; const char *xname="Lubricants"; const char *yname="Survival Time"; const char *ynameshort="Survival"; /* Number of treatment groups and subjects */ const int nn=20; const double xx[MAXN] = { 29, 21, 30, 45, 48, 92, 74, 61, 99, 24, 84, 37, 94, 56, 67, 79, 70, 93, 76, 72 }; const double yy[MAXN] = { 23, 36, 67, 104, 147, 164, 247, 304, 355, 408, 429, 489, 504, 925, 980, 1254, 2961, 5351, 9915, 11301 }; /* Default starting random-number seed and */ /* number of replications in bootstrap procedures */ unsigned long wseed=123456; #define NBOOT 10000 /* This many bootstrap replications */ int nboot=NBOOT; /* Describe and display the data */ void display_data(const char *title_data, const double *xx, const double *yy, int nn, const char *xname, const char *yname); /* Display three regression lines */ void do_regressions(const char *Ytype, const double *xx, const double *yy, int nn, const char *xname, const char *yname); /* Ordinary (least-squares) regression and P-values */ /* and bootstrap output */ void ci_pvals_lsq_regr(const char *Ytype, const double *xx, const double *yy, int nn); void bootstrap_beta_lsq_regression (const char *Ytype, const double *xx, const double *yy, int nn); /* Theil regression and P-values */ /* and bootstrap output */ void ci_pvals_theilregr(const char *Ytype, const double *xx, const double *yy, int nn); void bootstrap_beta_theil_regression (const char *Ytype, const double *xx, const double *yy, int nn); /* Rank regression and P-values */ /* and bootstrap output */ void bootstrap_beta_rank_regression (const char *Ytype, const double *xx, const double *yy, int nn); /* Display the summary lines so far */ void Dump_Regression_Summary(void); int main(int argc, char **argv) { int i; double logyy[MAXN]; /* Helpful for segmentation faults and some other fatal errors */ /* This function is in statlib.c */ set_newsignals(); printf ( "Comparison of 3 different methods of linear regression:\n" " (1) Least-squares\n" " (2) ``Theil'' linear regression\n" " (3) Rank regression\n"); printf ("Output of program %s\n", PROG_NAME); printf ("Written or modified by %s\n", YOUR_NAME); /* Enter `(ProgramName) ' to set starting RN seed= */ if (argc>=2) wseed=atol(argv[1]); wseed=setrand(wseed); /* If wseed=0, wseed=value from system clock */ printf ("\nStarting random-number seed: %lu\n",wseed); printf ("(Enter a number on the command line " "to specify a different seed.)\n"); /* Describe the data */ display_data(title_data, xx,yy,nn, xname,ynameshort); /* Make sure that nn is big enough */ if (nn<4) { printf ("\nN=%d is too small!\n",nn); exit(1); } printf ( "\n(The last two columns below are the errors\n" " MADf = (1/n)Sum(i=1,n) |Y_i-beta*X_i-mu| and\n" " RMS = Root( (1/n)Sum(i=1,n)(Y_i-beta*X_i-mu)^2 )\n"); /* Display regression lines for all three regressions */ do_regressions("Y", xx,yy,nn, xname,yname); /* Try log-transformed Y? */ /* Put log(Y) values in an array */ for (i=0; iDTOL) /* Unless an X-X tie */ rbuff[nd++] = (yy[j] - yy[i])/(xx[j] - xx[i]); } if (nd<=0) /* All X-values are the same */ { printf ("Find Theil values: All X values are constant!\n"); exit(1); } /* Sort the difference ratios */ /* qsort() is a standard C function */ /* sortdub() is in statlib.c */ qsort(rbuff,nd, sizeof(double),sortdub); /* Return the median */ return getmed(rbuff,nd); } double get_MedianWalshSumsResids(const double *xx, const double *yy, int nn, double beta) { int i,j,r; double resid[MAXN], walsh[MAXN*MAXN]; /* Put residuals in resid[] */ for (i=0; iyratio - ((PWRATIO)q)->yratio)>DTOL) return 1; else if (xx<-DTOL) return -1; if ( (xx = ((PWRATIO)p)->xdiff - ((PWRATIO)q)->xdiff)>DTOL) return 1; else if (xx<-DTOL) return -1; return 0; } /* Return the rank regression estimate of beta */ /* This is the value of beta that minimizes */ /* D(beta) = Sum (R_i(beta)-(n+1)/2)*(Y_i-beta*X_i) */ /* where R_i(beta) are the ranks of the residuals */ /* Y_i^* = Y_i-beta*X_i */ double getrankbeta(const double *xx, const double *yy, int nn, double *muptr) { int i,j,nd; double qsum, beta, lslope, nbar; /* An array of WRATIO structures */ WRATIO ww[MAXN*MAXN]; double midr[MAXN]; /* Put midranks of X_i in midr[] */ /* makeranks() is in statlib.c */ makeranks(midr, xx,nn, NULL,NULL); /* Compute Q = Sum (Rank(X_i)-(n+1)/2) X_i */ qsum=0.0; nbar=(double)(nn+1)/2; for (i=0; iDTOL) { ww[nd].yratio = (yy[j]-yy[i])/(xx[j]-xx[i]); ww[nd].xdiff = fabs(xx[j]-xx[i]); nd++; } if (nd<=0) /* All X-values are the same */ { printf ("While finding Rank Regression slope: " "All X values are constant!\n"); exit(1); } /* Sort ww[] by increasing yratio, then by increasing xdiff */ qsort(ww,nd, sizeof(WRATIO),sortwratio); /* Initialize to find the low point */ /* lslope is iniitially the slope just before ww[0] */ lslope=-qsum; for (i=0; i1.0) pval=1.0; /* Lower and upper limits of 95% CI for pval */ pvlow=pval-pwid; if (pvlow<0.0) pvlow=0.0; pvhigh=pval+pwid; if (pvhigh>1.0) pvhigh=1.0; printf (" One-sided P: P=%d/%s=%7.5f\n", nlsz,commnum(nboot), pval_one); printf (" Two-sided P: P=%7.5f 95%% CI: (%8.6f, %8.6f)\n", pval, pvlow, pvhigh); /* Write a note to the summary list */ addrsumm(boot_type, zmed,zlow,zhigh, pval, -1); } /* Theil regression and P-values */ /* Return Kendall K statistic */ int getkend(const double *xx, const double *yy, int nn) { int i,j, kk; for (kk=i=0; i 0.0) kk++; else kk--; }}} return kk; } void ci_pvals_theilregr(const char *Ytype, const double *xx, const double *yy, int nn) { int mm, j1,j2, k1,k2; double vark, cc,dd, th1,thmed,th2, zstat,pval; double rdiff[MAXN*MAXN]; printf("\nDISTRIBUTION-FREE EXACT 95%% CI for Theil's estimator:\n"); printf (" Large-sample approximations, %s = beta*X + mu:\n", Ytype); /* Write a note to the summary list */ addrcomm("Theil Regression", Ytype); /* Construct sorted difference ratios */ /* Returns the median of the array of difference ratios */ thmed=set_theil_diffratios(rdiff, xx,yy,nn); /* The variance of the Kendall K statistic given H_0 */ /* If there are any Y-Y or X-X ties, this has a more complex form, */ /* and rdiff[] has to be defined differently. */ vark = (nn*(nn-1)*(2*nn+5))/18; /* Find the associated exact nonparametric CI */ mm=(nn*(nn-1))/2; /* The number of difference ratios */ cc=1.960*sqrt(vark); /* For -mm<=K<=+mm */ dd=(mm-cc)/2; /* Convert to 0<=K<=mm, lower 2.5% tail */ k1=(int)floor(dd); /* Round down to next integer */ k2=mm+1-k1; /* Upper limit, (k1,k2) for range (1,d) */ j1=k1-1; j2=k2-1; /* (j1,j2) for range (0,d-1) */ if (j1<0) j1=0; if (j2<0) j2=0; /* Upper and lower limits */ th1=rdiff[j1]; th2=rdiff[j2]; printf (" Beta=%6.4f 95%% CI (%6.4f, %6.4f)\n" " (attained at diff.ratio offsets %d,%d in (1,%d))\n", thmed, th1,th2, k1,k2,mm); /* Find Kendall P-value for H_0:beta=0 */ zstat = getkend(xx,yy,nn)/sqrt(vark); pval=normpval(zstat); printf (" Large-sample P-value for H_0:beta=0:\n"); printf (" Z=%g P=%6.4f (2-sided)\n", zstat,pval); /* Write a note to the summary list */ addrsumm("Approx", thmed, th1,th2, pval, -1); } void bootstrap_beta_theil_regression (const char *Ytype, const double *xx, const double *yy, int nn) { int i,ns; double beta, resid[MAXN], zboot[NBOOT+3]; printf ("\n(THEIL REGRESSION) BOOTSTRAP for beta in %s = beta*X + mu:\n", Ytype); printf (" Medians and 95%% confidence intervals, %s replications.\n", commnum(nboot)); /* NULL means that we aren't interested in mu */ beta=gettheilbeta(xx,yy,nn, NULL); printf ("BOOTSTRAP ON OBSERVATIONS (Theil, beta=%g):\n", beta); for (ns=0; ns=0) sprintf(dbuff, " (df=%d)", df); else dbuff[0]=0; /* Write data to a summary line and increase the count */ sprintf(summlines[nsumm++], "%-12s %9.5f (%9.5f, %9.5f) P=%7.5f%s", RegName, beta, cilow,cihigh, pval, dbuff); } /* Display the summary lines */ void Dump_Regression_Summary(void) { int i; for (i=0; i