# Confidence Interval

# CIs for Population Mean when it is normal and variance is known
mu = 10 # Population mean
sigma = 5 # Population standard deviation

alpha = 0.05 #Significance level

n=20 # number of samples
nExperiments = 100 # number of experiments (repeated sampling, basically)

results.U = rep(0,nExperiments)
results.L = rep(0,nExperiments)

titleOfPlot = paste("CI for Popu Mean, Normal Case and Variance Known (Sample size Per Sampling =",n,")")
plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage"))
abline(h = mu,lwd=2)

nCovered = 0

# Running a simulation
for (i in 1:nExperiments) {
  sampledData = rnorm(n,mu,sigma)
  results.U[i] = mean(sampledData) + qnorm(1 - alpha/2)*sigma/sqrt(n)
  results.L[i] = mean(sampledData) - qnorm(1 - alpha/2)*sigma/sqrt(n)
  points(i,mean(sampledData),cex=0.5,pch=16)
  if (mu < results.U[i] & mu > results.L[i]) {
    lines(c(i,i),c(results.U[i],results.L[i]),col="blue")
    nCovered = nCovered + 1
  } else {
    lines(c(i,i),c(results.U[i],results.L[i]),col="red")
  }
}
title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered/nExperiments*100),"% of times"))

# T distribution
df = c(5,30,100)
color = c("red","blue","purple")

x.seq = seq(-3,3,0.05)
plot(c(min(x.seq),max(x.seq)),c(0,0.5),type="n",main="T Distribution",ylab="P(x)",xlab="x")
for (i in 1:length(df)) {
  y = dt(x.seq,df[i])
  lines(x.seq,y,col=color[i],lwd=2)
}
lines(x.seq,dnorm(x.seq),col="black",lwd=2,lty=2) 
legend("topright",legend=c(paste("Degrees of Freedom:",df),"Standard Normal Distribution"),col=c(color,"black"),lwd=2,lty=c(1,1,1,2))

# Length of CIs between Known and Unknown variance
# CIs for Population Mean when it is normal and variance is known
mu = 10 # Population mean
sigma = 5 # Population variance (we'll treat one of the CI as if it doesn't know sigma

alpha = 0.05 #Significance level

n=20 # number of samples
nExperiments = 100 # number of experiments (repeated sampling, basically)

results.U.known = rep(0,nExperiments)
results.L.known = rep(0,nExperiments)
results.U.unknown = rep(0,nExperiments)
results.L.unknown = rep(0,nExperiments)

nCovered.known = 0
nCovered.unknown =0

# Running a simulation
for (i in 1:nExperiments) {
  sampledData = rnorm(n,mu,sigma)
  results.U.known[i] = mean(sampledData) + qnorm(1 - alpha/2)*sigma/sqrt(n)
  results.L.known[i] = mean(sampledData) - qnorm(1 - alpha/2)*sigma/sqrt(n)
  
  results.U.unknown[i] = mean(sampledData) + qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n)
  results.L.unknown[i] = mean(sampledData) - qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n)
  
  if (mu < results.U.known[i] & mu > results.L.known[i]) {
    nCovered.known = nCovered.known + 1
  } 
  
  if (mu < results.U.unknown[i] & mu > results.L.unknown[i]) {
    nCovered.unknown = nCovered.unknown + 1
  }
}
# For the known variance case
par(mfrow=c(1,3))
titleOfPlot = paste("CI for Variance Known (Sample size Per Sampling =",n,")")
plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage"))
abline(h = mu,lwd=2)
title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered.known/nExperiments*100),"% of times"))

for (i in 1:nExperiments) {
  points(i,mean(c(results.U.known[i],results.U.known[i])),cex=0.5,pch=16)
  if (mu < results.U.known[i] & mu > results.L.known[i]) {
    lines(c(i,i),c(results.U.known[i],results.L.known[i]),col="blue")
  } else {
    lines(c(i,i),c(results.U.known[i],results.L.known[i]),col="red")
  }
}

# For the unknown variance case
titleOfPlot = paste("CI with Variance unknown (Sample size Per Sampling =",n,")")
plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage"))
abline(h = mu,lwd=2)
title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered.unknown/nExperiments*100),"% of times"))

for (i in 1:nExperiments) {
  points(i,mean(c(results.U.unknown[i],results.U.unknown[i])),cex=0.5,pch=16)
  if (mu < results.U.unknown[i] & mu > results.L.unknown[i]) {
    lines(c(i,i),c(results.U.unknown[i],results.L.unknown[i]),col="blue")
  } else {
    lines(c(i,i),c(results.U.unknown[i],results.L.unknown[i]),col="red")
  }
}

# Length of sample
hist(results.U.unknown - results.L.unknown,breaks=round(nExperiments/7),main="Length of the CI for unknown variance")
abline(v = 2*qnorm(1 - alpha/2)*sigma/sqrt(n),lwd=2)

legend("topright",legend="CI for known variance",lwd=2,lty=1)




# (DEMO)
par(mfrow=c(1,1))
data = c()
# CIs for Population Mean when it is normal and variance is known
mu = 0 # Population mean
sigma = sd(data) # Population standard deviation

alpha = 0.05 #Significance level

n=length(data) # number of samples
nExperiments = 100 # number of experiments (repeated sampling, basically)

results.U = rep(0,nExperiments)
results.L = rep(0,nExperiments)

titleOfPlot = paste("CI for Popu Mean, Normal Case and Variance Known (Sample size Per Sampling =",n,")")
plot(c(0,nExperiments),c(mu - 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n),mu + 1.5*qnorm(1 -alpha/2) * sigma/sqrt(n)),main=titleOfPlot,xlab="",type="n",ylab=paste(round((1-alpha)*100),"Coverage"))
abline(h = mu,lwd=2)

nCovered = 0

for (i in 1:nExperiments) {
  sampledData = sample(data,size=n,replace=TRUE)
  results.U[i] = mean(sampledData) + qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n)
  results.L[i] = mean(sampledData) - qt(1 - alpha/2,length(sampledData)-1)*sd(sampledData)/sqrt(n)
  points(i,mean(sampledData),cex=0.5,pch=16)
  if (mu < results.U[i] & mu > results.L[i]) {
    lines(c(i,i),c(results.U[i],results.L[i]),col="blue")
    nCovered = nCovered + 1
  } else {
    lines(c(i,i),c(results.U[i],results.L[i]),col="red")
  }
}
title(xlab=paste("Repeated Sampling, Covered the Popu Mean",round(nCovered/nExperiments*100),"% of times"))