Example R programs and commands Bootstrap resampling to estimate sampling error # All lines preceded by the "#" character are my comments. # All other left-justified lines are my input. # All other indented lines are the R program output. #### BOOTSTRAP # GENERATE DATA --- samples from a non-normal pdf: set.seed(12345); # ...to get reproducible results X1 <- rexp(90,rate=1/2); # sample one non-normal pdf X2 <- runif(110, min=8, max=9) # sample another non-normal pdf data<- c(X1, X2); # ... and combine to get 200 non-normal samples hist(data) # see how non-normal these samples are. # Compute two parameters for this pdf: mean(data); # average value: 5.63 sd(data); # standard deviation: 3.614 # Alternative parameters describing this pdf summary(data); # quartiles for this pdf Min. 1st Qu. Median Mean 3rd Qu. Max. 0.009379 1.290000 8.172000 5.631000 8.654000 12.800000 # Individual quartiles are array elements 1,2,...,6: summary(data)[1] # Min. 0.009379 summary(data)[2] # 1st Qu. 1.29 summary(data)[3] # Median 8.172 summary(data)[4] # Mean 5.631 summary(data)[5] # 3rd Qu. 8.654 summary(data)[6] # Max. 12.8 (summary(data)[5] - summary(data)[2])/2 # quartile sd estimate 3.682 # Compute the traditional sampling error estimate: sd(data)/sqrt(200); # 0.2555652 # Compute the quartile-based sampling error estimate: ((summary(data)[5] - summary(data)[2])/2 )/sqrt(200) # 0.2603567 # RESAMPLE THE DATA --- with replacement xstar<-matrix(0,nrow=100,ncol=200); # empty matrix to hold 100 resamplings # Put one 200-element resampling in each of 100 rows `i' of matrix `xstar': set.seed(12345); # ...to get reproducible results for(i in 1:100) xstar[i,]<-sample(data,200,replace=TRUE); # Compute the 100 medians and store them in vector `xmedians' xmedians<-vector(length=100); for(i in 1:100) xmedians[i]<-median(xstar[i,]); # Compute the 100 means and store them in vector `xmeans xmeans<-vector(length=100); for(i in 1:100) xmeans[i]<-mean(xstar[i,]); # Compute the sampling error directly from the sd of the 100 resampling means: sd(xmeans); # 0.245948 # Compute the sampling error directly from the resampling medians' quartiles: (summary(xmedians)[5]-summary(xmedians)[2])/2; # 0.05 Much less! # Evidently the median has a much lower sampling error than the mean. # Confirm this with histogram plots of the 100 means and medians of # the resampled data: par(mfrow=c(1,2)); hist(xmeans); hist(xmedians); # Compare the sampling error pdfs with the original pdf: par(mfrow=c(3,1)); hist(xmeans, breaks=0:14); hist(xmedians,breaks=0:14); hist(data,breaks=0:14)