# Confidence Interval # CIs for Population Mean when it is normal and variance is known mu = 10 # Population mean sigma = 5 # Population standard deviation alpha = 0.05 #Significance level n=20 # number of samples nExperiments = 100 # number of experiments (repeated sampling, basically) results.U = rep(0,nExperiments) results.L = rep(0,nExperiments) titleOfPlot = paste("CI for Popu Mean, Normal Case and Variance Known (Sample size Per Sampling =",n,")") plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage")) abline(h = mu,lwd=2) nCovered = 0 # Running a simulation for (i in 1:nExperiments) { sampledData = rnorm(n,mu,sigma) results.U[i] = mean(sampledData) + qnorm(1 - alpha/2)*sigma/sqrt(n) results.L[i] = mean(sampledData) - qnorm(1 - alpha/2)*sigma/sqrt(n) points(i,mean(sampledData),cex=0.5,pch=16) if (mu < results.U[i] & mu > results.L[i]) { lines(c(i,i),c(results.U[i],results.L[i]),col="blue") nCovered = nCovered + 1 } else { lines(c(i,i),c(results.U[i],results.L[i]),col="red") } } title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered/nExperiments*100),"% of times")) # T distribution df = c(5,30,100) color = c("red","blue","purple") x.seq = seq(-3,3,0.05) plot(c(min(x.seq),max(x.seq)),c(0,0.5),type="n",main="T Distribution",ylab="P(x)",xlab="x") for (i in 1:length(df)) { y = dt(x.seq,df[i]) lines(x.seq,y,col=color[i],lwd=2) } lines(x.seq,dnorm(x.seq),col="black",lwd=2,lty=2) legend("topright",legend=c(paste("Degrees of Freedom:",df),"Standard Normal Distribution"),col=c(color,"black"),lwd=2,lty=c(1,1,1,2)) # Length of CIs between Known and Unknown variance # CIs for Population Mean when it is normal and variance is known mu = 10 # Population mean sigma = 5 # Population variance (we'll treat one of the CI as if it doesn't know sigma alpha = 0.05 #Significance level n=20 # number of samples nExperiments = 100 # number of experiments (repeated sampling, basically) results.U.known = rep(0,nExperiments) results.L.known = rep(0,nExperiments) results.U.unknown = rep(0,nExperiments) results.L.unknown = rep(0,nExperiments) nCovered.known = 0 nCovered.unknown =0 # Running a simulation for (i in 1:nExperiments) { sampledData = rnorm(n,mu,sigma) results.U.known[i] = mean(sampledData) + qnorm(1 - alpha/2)*sigma/sqrt(n) results.L.known[i] = mean(sampledData) - qnorm(1 - alpha/2)*sigma/sqrt(n) results.U.unknown[i] = mean(sampledData) + qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n) results.L.unknown[i] = mean(sampledData) - qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n) if (mu < results.U.known[i] & mu > results.L.known[i]) { nCovered.known = nCovered.known + 1 } if (mu < results.U.unknown[i] & mu > results.L.unknown[i]) { nCovered.unknown = nCovered.unknown + 1 } } # For the known variance case par(mfrow=c(1,3)) titleOfPlot = paste("CI for Variance Known (Sample size Per Sampling =",n,")") plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage")) abline(h = mu,lwd=2) title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered.known/nExperiments*100),"% of times")) for (i in 1:nExperiments) { points(i,mean(c(results.U.known[i],results.U.known[i])),cex=0.5,pch=16) if (mu < results.U.known[i] & mu > results.L.known[i]) { lines(c(i,i),c(results.U.known[i],results.L.known[i]),col="blue") } else { lines(c(i,i),c(results.U.known[i],results.L.known[i]),col="red") } } # For the unknown variance case titleOfPlot = paste("CI with Variance unknown (Sample size Per Sampling =",n,")") plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage")) abline(h = mu,lwd=2) title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered.unknown/nExperiments*100),"% of times")) for (i in 1:nExperiments) { points(i,mean(c(results.U.unknown[i],results.U.unknown[i])),cex=0.5,pch=16) if (mu < results.U.unknown[i] & mu > results.L.unknown[i]) { lines(c(i,i),c(results.U.unknown[i],results.L.unknown[i]),col="blue") } else { lines(c(i,i),c(results.U.unknown[i],results.L.unknown[i]),col="red") } } # Length of sample hist(results.U.unknown - results.L.unknown,breaks=round(nExperiments/7),main="Length of the CI for unknown variance") abline(v = 2*qnorm(1 - alpha/2)*sigma/sqrt(n),lwd=2) legend("topright",legend="CI for known variance",lwd=2,lty=1) # (DEMO) par(mfrow=c(1,1)) data = c() # CIs for Population Mean when it is normal and variance is known mu = 0 # Population mean sigma = sd(data) # Population standard deviation alpha = 0.05 #Significance level n=length(data) # number of samples nExperiments = 100 # number of experiments (repeated sampling, basically) results.U = rep(0,nExperiments) results.L = rep(0,nExperiments) titleOfPlot = paste("CI for Popu Mean, Normal Case and Variance Known (Sample size Per Sampling =",n,")") plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage")) abline(h = mu,lwd=2) nCovered = 0 for (i in 1:nExperiments) { sampledData = sample(data,size=n,replace=TRUE) results.U[i] = mean(sampledData) + qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n) results.L[i] = mean(sampledData) - qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n) points(i,mean(sampledData),cex=0.5,pch=16) if (mu < results.U[i] & mu > results.L[i]) { lines(c(i,i),c(results.U[i],results.L[i]),col="blue") nCovered = nCovered + 1 } else { lines(c(i,i),c(results.U[i],results.L[i]),col="red") } } title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered/nExperiments*100),"% of times"))