Let X1,X2,… be a sequence of independent, identically distributed random variables, each with mean μ and variance σ2. Then the distribution of X1+X2+⋯+Xn−nμσ√n tends to the standard normal as n→∞. That is, P(X1+X2+⋯+Xn−nμσ√n≤a)→1√2π∫a−∞e−x2/2dx as n→∞.
In other words, the central limit theorem tells us that if the population distribution has mean μ and standard deviation σ, then the sampling distribution of the mean also has mean μ, and the standard error of the mean is S=σ√n. We can see this experimentally. We will consider some known distribution and do the experiments.
# mean and standard deviation of normal distribution
m <- 0
s <- 1
# folowing fuction is written to show the histogram plot of sample mean.
mean.histplot <- function(n,N=50000) {
# n - sample size
X <- matrix(rnorm(n*N,m,s),n,N)
X <- colMeans(X)
# generate histogram
hist( X, breaks="Sturges", border="white", freq=FALSE,
col="blue",
xlab="Sample mean", ylab="", xlim=c(-3.5,3.5),
main=paste("Sample size =",n), axes=FALSE,
font.main=1
)
box()
axis(1)
# plot N(mu,sigma/sqrt(n))
lines( x <- seq(-3.0,3.0,.01), dnorm(x,m,s/sqrt(n)),
lwd=2, col="black", type="l"
)
}
mean.histplot(1)
mean.histplot(4)
mean.histplot(30)
# Lambda, mean and standard deviation
L<-2
m <- L
s <- sqrt(L)
# define function to draw a plot
mean.histplot <- function(n,N=50000) {
# n - sample size
X <- matrix(rpois(n*N,L),n,N)
X <- colMeans(X)
# generate histogram
hist( X, breaks="Sturges", border="white", freq=FALSE,
col="blue",
xlab="Sample mean", ylab="",
main=paste("Sample size =",n), axes=FALSE,
font.main=1
)
box()
axis(1)
# plot N(mu,sigma/sqrt(n))
lines( x <- seq(-30.0,30.0,.01), dnorm(x,m,s/sqrt(n)),
lwd=2, col="black", type="l"
)
}
mean.histplot(1)
mean.histplot(4)
mean.histplot(30)
# Lambda, mean and standard deviation
L<-3.5
m <- 1/L
s <- 1/L
# define function to draw a plot
mean.histplot <- function(n,N=50000) {
# n - sample size
X <- matrix(rexp(n*N,rate=L),n,N)
X <- colMeans(X)
# generate histogram
hist( X, breaks="Sturges", border="white", freq=FALSE,
col="blue",
xlab="Sample mean", ylab="",
main=paste("Sample size =",n), axes=FALSE,
font.main=1
)
box()
axis(1)
# plot N(mu,sigma/sqrt(n))
lines( x <- seq(-30.0,30.0,.01), dnorm(x,m,s/sqrt(n)),
lwd=2, col="black", type="l"
)
}
mean.histplot(1)
mean.histplot(4)
mean.histplot(30)
# parameters
n1<- 10
p <- 0.7
q <- 1-p
m <- n1*p
s <- sqrt(n1*p*q)
# define function to draw a plot
mean.histplot <- function(n,N=50000) {
# n - sample size
X <- matrix(rbinom(n*N,size=n1,prob = p),n,N)
X <- colMeans(X)
# generate histogram
hist( X, breaks="Sturges", border="white", freq=FALSE,
col="blue",
xlab="Sample mean", ylab="",
main=paste("Sample size =",n), axes=FALSE,
font.main=1
)
box()
axis(1)
# plot N(mu,sigma/sqrt(n))
lines( x <- seq(-30.0,30.0,.01), dnorm(x,m,s/sqrt(n)),
lwd=2, col="black", type="l"
)
}
mean.histplot(1)
mean.histplot(4)
mean.histplot(30)
# parameters
n1<- 10
p <- 0.7
q <- 1-p
m <- n1*p
s <- sqrt(n1*p*q)
# define function to draw a plot
sample.mean <- function(n,N=50000) {
# n - sample size
X <- matrix(rbinom(n*N,size=n1,prob = p),n,N)
X <- colMeans(X)
return(X)
}
trace0 <- sample.mean(1)
trace1 <- sample.mean(4)
trace2 <- sample.mean(30)
library(plotly)
fig <- plot_ly(x = trace0, type = "histogram",name='Sample size = 1')
fig <- fig %>% add_histogram(x = ~trace1,name='Sample size = 4')
fig <- fig %>% add_histogram(x = ~trace2,name='Sample size = 30')
fig <- fig %>% layout(barmode = "overlay", xaxis = list(title = 'Sample mean'))
fig