library(mlbench) ##load Bostong Housing data data(BostonHousing) plot(BostonHousing,pch=20,cex=0.5,col=rgb(0.2,0.2,0.2,0.5)) ## prepare Y and X matrix Y <- BostonHousing$medv X <- BostonHousing[,1:13] ## make 4th variable a numeric X[,4] <- as.numeric(X[,4]) n <- nrow(X) ## fit a linear model linmod <- lm(Y~ ., data=X) summary(linmod) Yhat <- fitted(linmod) ## add intercept term (first column) by hand for the following X <- cbind(rep(1,n),as.matrix(X)) ############################## compute simultaneous confidence bounds for conditional means ## Projection matrix P=Proj Proj <- X %*% solve( t(X) %*% X) %*% t(X) p <- ncol(X) sigmahatsq <- sum( residuals(linmod)^2)/ (n-p) bounds <- sqrt(diag(Proj) * sigmahatsq * p * df(0.95,p,n-p)) ####################### show confidence intervals for conditional means at each observation plot(Yhat, Y) for (i in 1:n){ lines( rep(Yhat[i],2), Yhat[i] + c(-1,1) * bounds[i],col=2) } ####################### compute bootstrap samples of predicted values nsim <- 100 Yhatboot <- matrix(nrow=n,ncol=nsim) for (sim in 1:nsim){ useSamples <- sample(1:n,n,replace=TRUE) Yboot <- Y[useSamples] Xboot <- X[useSamples,] linBoot <- lm( Yboot ~ .-1, data=as.data.frame(Xboot)) Yhatboot[,sim] <- predict(linBoot,as.data.frame(X)) } ## compare bootstrap mean with original point prediction plot( apply(Yhatboot,1,mean), Yhat) ## compare standard deviation across bootstrap samples with the theoretical bound plot( apply(Yhatboot,1,sd), bounds)