# Variance of the trimmed mean (example of Hertzsprung)

alpha <- seq(0.001,0.499,0.001)
a <- qnorm(1-alpha)
sigma2 <- 2*(0.5+alpha*(a^2-1) - a*dnorm(a))/(1-2*alpha)^2
                                        #asymptotic variance
plot(alpha,sigma2,type="l")
# comparing with Hertzsprung's simulation results 
points((1:11)/24,c(1.013,1.037,1.069,1.095,1.139,1.184,1.232,1.283,
                   1.345,1.407,1.489),pch=2)
# New simulations
N <- 1000
x <- matrix(rnorm(24*N),nrow=N,ncol=24)
erg <- matrix(0,N,12)
for (k in (0:11)) erg[,k+1] <- apply(x,1,mean,trim=k/24)
var.trim <- 24*apply(erg^2,2,mean)
points((0:11)/24,var.trim,col=3)  # standard estimator
points((0:11)/24,var.trim/var.trim[1],col=4) # ratio estimator
hist(erg[,3])


# composition of Hertzsprung's urn
x <- seq(-4,4,by=0.01)
n <- length(x)-1
freq <- 12534*c(pnorm(x[1]+0.005),pnorm(x[2:n]+0.005)-pnorm(x[2:n]-0.005),
                1-pnorm(x[n]+0.005))
plot(x,round(freq),type="h")
sum(round(freq))
urn <- rep(x,freq)
# reproducing Hertzsprung's simulation
x <- matrix(sample(urn,size=24*N,replace=TRUE),nrow=N,ncol=24)
erg <- matrix(0,N,12)
for (k in (0:11)) erg[,k+1] <- apply(x,1,mean,trim=k/24)
var.trim <- 24*apply(erg^2,2,mean)
points((0:11)/24,var.trim,col=3)  # standard estimator
points((0:11)/24,var.trim/var.trim[1],col=4) # ratio estimator

#Bootstrap
# Data set used: Sublimation heat of platinum, from Rice, Chap. 10.4
plat <- c(136.3,136.6,135.8,135.4,134.7,135.0,134.1,143.3,147.8,148.8,134.8,
          135.2,134.9,146.5,141.2,135.4,134.8,135.8,135.0,133.7,134.4,134.9,
          134.8,134.5,134.3,135.2)
hist(plat)
n <- length(plat)

# direct implementation of the bootstrap
m <- floor((n-1)/2)
N <- 1000
x <- matrix(sample(plat,size=n*N,replace=TRUE),nrow=N,ncol=n)
erg <- matrix(0,N,m+1)
for (k in (0:m)) erg[,k+1] <- apply(x,1,mean,trim=k/n)
var.trim <- apply(erg,2,var)
plot((0:m)/n,var.trim/var.trim[1],ylab="variance ratio",
     xlab="trimming proportion")
hist(erg[,13])
round(sqrt(var.trim),3)


# Comparing the real data with bootstrap samples
#Using histograms
par(mfrow=c(2,2)) 
hist(plat,breaks <- seq(132,150,2)) 
plat.b <- plat[ceiling(n*runif(n))] # Bootstrap sample 
plat.b <- sample(plat,size=n,replace=T) # same thing
hist(plat.b,breaks <- seq(132,150,2))
plat.f <- factor(plat) # values which occur, sorted, without ties
table(plat.f) # frequencies in the original sample
table(factor(plat.b,levels=levels(plat.f)))
                                  # frequencies in the bootstrap sample
#Using the normal plot
qqnorm(plat) 
plat.b <- plat[ceiling(n*runif(n))]
qqnorm(plat.b) #Normalplot of a bootstrap sample 
#Using the distribution function
par(mfrow=c(1,1))
plot.stepfun(plat,verticals=T,do.points=F) # empirical distribution function
plat.b <- plat[ceiling(n*runif(n))]
plot.stepfun(plat.b,verticals=T,do.points=F,add=T,col.hor=3,col.vert=3)
# empirical distribution function of the bootstrap sample


# Using functions of Davison and Hinkley 
library("boot")
help(boot)
t.mean <- function(d,ind) mean(d[ind],0.2)
plat.boot <- boot(plat, t.mean, R=999, stype="i")
hist(plat.boot$t)
sqrt(var(plat.boot$t))
qqnorm(plat.boot$t)
boot.ci(plat.boot, conf=c(0.80,0.95),type=c("norm","basic","perc","bca"))
# confidence intervals using the bootstrap. All methods except norm
# correct for skewness of the trimmed mean.


data("bigcity") # population size of American cities 1920 and 1930 
plot(bigcity$u,bigcity$x)
ratio <- function(d, ind)
          sum(d$x[ind])/sum(d$u[ind]) # Estimate of increase
city.boot <- boot(bigcity, ratio, R=999, stype="i")
hist(city.boot$t)
sqrt(var(city.boot$t))
qqnorm(city.boot$t)
boot.ci(city.boot, conf=c(0.90,0.95),type=c("norm","basic","perc","bca"))