################################################################################ #### Sampling Distribution for Means #### ################################################################################ #### By Jimin Ding, 02//16/2017 #### Ber(p) ## Generate 100 r.v. from Ber(p) p=0.5 n=100 xn=rbinom(n=n,size=1,p=p) (xbar=mean(xn)) ## the outside () can print the the xbar in the R console ## This number may not be exactly 0.5, but is very close to 0.5. ## How close to p ( =0.5)? ## Every random sample will have a different mean. ## Let's repeat the random experiments 10000 times, ## we then will simulate 10000 sample means. xbars=c() # initialize xbars to be an empty vector. for (i in 1:10000){ xn=rbinom(n=n,size=1,p=p) xbar=mean(xn) xbars=c(xbars,xbar) # save the sample mean in xbars hist(xbars) } head(xbars) ## This is equivalent to xbars=rbinom(n=10000,size=n,p=p)/n hist(xbars, main='Histogram of Sample Means of 100 Ber(0.5)') abline(v=p,col='red',lwd=2,lty=2) ## One may change "n=100" to other n values or other distributions than "Ber" ## in above code. ## Now we generate histograms of Sample Means from 10000 simulations of various special distributions. set.seed(900) pdf("SampleMeans.pdf") ## Ber(p) par(mfrow=c(2,2)) ## Ber(0.5) p=0.5 for (n in c(10,50,100,500)){ xbars=rbinom(n=10000,size=n,p=p)/n hist(xbars,xlim=c(0,1), main=paste('Ber(0.5) n=',n)) abline(v=p,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## Ber(0.1) p=0.1 for (n in c(10,50,100,500)){ xbars=rbinom(n=10000,size=n,p=p)/n hist(xbars,xlim=c(0,1), main=paste('Ber(0.1) n=',n)) abline(v=p,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## Standard Normal mu=0 sigma=1 tmp=proc.time() # record the current clock time for (n in c(10,50,100,500)){ xbars=c() for (i in 1:10000){ xbar=mean(rnorm(n=n,mean=mu,sd=sigma)) xbars=c(xbars,xbar) } hist(xbars,xlim=c(-1,1), main=paste('N(0,1) n=',n)) abline(v=mu,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } proc.time()-tmp # Above code is time consuming because of the loop # One may make use of vector and matrix to write more efficient code tmp=proc.time() for (n in c(10,50,100,500)){ xn=matrix(rnorm(n=n*10000,mean=mu,sd=sigma),ncol=10000) # xn is a matrix of nx10000 xbars=colMeans(xn) hist(xbars,xlim=c(-1,1), main=paste('N(0,1) n=',n)) abline(v=mu,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } proc.time()-tmp ## Save 50% time ## Normal(1,4) mu=1 sigma=2 for (n in c(10,50,100,500)){ xn=matrix(rnorm(n=n*10000,mean=mu,sd=sigma),ncol=10000) xbars=colMeans(xn) hist(xbars,xlim=c(-1,3), main=paste('N(1,4) n=',n)) abline(v=mu,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## Uniform[0,1] for (n in c(10,50,100,500)){ xn=matrix(runif(n=n*10000),ncol=10000) xbars=colMeans(xn) hist(xbars,xlim=c(0,1), main=paste('U[0,1] n=',n)) abline(v=0.5,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## Exp(1) for (n in c(10,50,100,500)){ xn=matrix(rexp(n=n*10000),ncol=10000) xbars=colMeans(xn) hist(xbars,xlim=c(0,3), main=paste('Exp(1) n=',n)) abline(v=1,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## Poisson(1) for (n in c(10,50,100,500)){ xn=matrix(rpois(n=n*10000,lambda=1),ncol=10000) xbars=colMeans(xn) hist(xbars,xlim=c(0,3), main=paste('Poisson(1) n=',n)) abline(v=1,col='red',lwd=2,lty=2) qqnorm(xbars) qqline(xbars,lty=2,col='red',lwd=2) } ## HyperGeometric(50,2,3) ?rhyper # Note the parameterization of "rhyper" is different from what we learned from the book # m=number of success in population, so 2 in this case # n=number of failure in population, so 48 in this case # k=number of sampling units, so 3 in this case # so the mean is 3*2/50 for (n in c(10,50,100,500)){ xn=matrix(rhyper(n*10000,m=2,n=48,k=3),ncol=10000) xbars=colMeans(xn) hist(xbars,xlim=c(0,0.5), main=paste('HyperGeometric(50,2,3) n=',n)) abline(v=3*2/50,col='red',lwd=2,lty=2) qqnorm(xbars,pch='.') qqline(xbars,lty=2,col='red',lwd=2) } dev.off()