##Let's start
2 + 3
4 + 5 * 10
sqrt(25)
cos(pi)
log10(100)

##Help!!
help("mean")

##Variables and Vectors
x<- 5
x

x = 10
x

x + 2
x

y <- x * 3
y

x <- c(1, 2, 3, 4)
x + 2

x * 3

x <- 1:5
length(x)

x <- rnorm(3)
x

x <- rnorm(3, mean = 4, sd = 20)
x

mean(c(1, 4, 5, 2, 8, 2))

#####################################################################################################
##Sample Mean
set.seed(3521)
n1=500000
x.list=rexp(n=n1, rate=3)
##x.list
sample.mean=mean(x.list)
sample.mean
sample.variance=var(x.list)
sample.variance
Unbiased.variance=sum((x.list-sample.mean)^2)/(n1-1)
Unbiased.variance
####check if x.bar is from Normal distribution 

##True mean


x.bar.list=rep(0,5000)

for (i in 1:5000)
{
  x.sample=rexp(n=10, rate=3)
  x.bar.list[i]=mean(x.sample)
} 
hist(x.bar.list)
plot(density(x.bar.list), main="compare with the target normal distribution with sample distribution")
n2=50000
x.list=rnorm(n=n2, mean=mean(x.bar.list), sd=sd(x.bar.list))
lines(density(x.list),col="red")
legend("topright", legend=c("Normal", "Smaple"),
       col=c("red", "black"), lty=1:1, cex=0.8,
       title="Line types", text.font=4, bg='lightblue')

##QQ plot
probs=seq(from=0.05, to=0.95, by=0.05)
## Percentiles of the data distribution
dataperc=quantile(x.bar.list, probs)
## Percentiles of the fitted Normal distribution
fitperc=qnorm(probs, mean=mean(x.bar.list), sd=sd(x.bar.list))
cbind(dataperc, fitperc)
plot(fitperc, dataperc, xlab="Fitted percentile", ylab="Data percentile", main="QQ plot")
## add the identity line
abline(a=0, b=1)

################################################################################################
##Confidence interval

#ONE SIDED
set.seed(3701); mu=50; sigma=0.5; n=60;
y.list=rnorm(n=60,mean=mu,sd=sigma)
alpha=0.05
tperc = qt(alpha/2, n-1)
tperc
moe=tperc*sd(y.list)/sqrt(n)
Upper.bound=mean(y.list) - moe
Upper.bound
Lower.bound=mean(y.list) + moe
Lower.bound


#Two sided
set.seed(3701); mu=50; sigma=0.5; n=60;
y.list=rnorm(n=60,mean=mu,sd=sigma)
alpha=0.05
tperc = qt(alpha/2, n-1)
tperc
tperc = qt(1-alpha/2, n-1)
tperc
moe=tperc*sd(y.list)/sqrt(n)
left.pt=mean(y.list) - moe
right.pt=mean(y.list) + moe
CI=c(left.pt,right.pt)
CI


##Simulation-based estimation of the coverage probability of the random confidence 
##interval for the distribution mean with an iid sample from the Normal distribution

set.seed(3701); mu=68; sigma=3; n=5; reps=10000
alpha=0.05
tperc = qt(alpha/2, n-1)
tperc
tperc = qt(1-alpha/2, n-1)
tperc
## create the vector that will record
## whether or not the interval in the
## rth replication captured mu (1=yes, 0=no)
## r=1,...,reps
captured.list=numeric(reps)
for(r in 1:reps)
{  
  ## generate a realization of
  ## X_1,...X_n iid N(mu, sigma^2)
  x.list=rnorm(n, mu,sigma)
  ## compute realization of the (random) sample mean
  xbar=mean(x.list)
  ## compute realization of the (random) sample standard dev.
  s=sd(x.list)
  ## compute left and right endpoints of the
  ## confidence interval
  moe=tperc*s/sqrt(n)
  left.pt=xbar - moe
  right.pt=xbar + moe
  ## did the interval capture mu? (1=yes, 0=no)
  captured.list[r]=1*( left.pt <= mu ) * (mu <= right.pt)
}
  ## the simulated estimate of the coverage probability
  ## is the proportion of replications for which the interval
  ## captured mu:
mean(captured.list)