#Donnerstag, 20.12.2001 library(modreg) data(cars) attach(cars) plot(cars) fit.gcv <- smooth.spline(speed,dist) #smoothing spline mit GCV lines(fit.gcv,col=2) fit.df5 <- smooth.spline(speed,dist,df=5) #mit df lines(fit.df5,col=3) fit.sp0.5 <- smooth.spline(speed,dist,spar=.5) #mit spar lines(fit.sp0.5,col=4) #Verhalten von GCV(smoothing parameter) fit.gcv <- 1:200 fit.df <- 1:200 for(i in 1:200) { fit <- smooth.spline(speed,dist,spar=(i*0.01)) fit.gcv[i] <- fit$cv fit.df[i] <- fit$df } par(mfrow=c(1,2)) plot((1:200)/100,fit.gcv,xlab="spar",main="GCV vs. spar") plot(fit.df,fit.gcv,main="GCV vs. df") #Inferenz hatMat <- function(x, trace = FALSE, pred.sm = function(x,y,...)predict(smooth.spline(x,y, ...), x = x)$y, ...) { ## Purpose: Return Hat matrix of a smoother -- very general (but slow) ## ------------------------------------------------------------------------- ## Arguments: x: the design points ## trace: if TRUE, only the trace of the matrix is returned, i.e., ## sum(diag( )) ## pred.sm: a function(x,y,..), predict(smoother(x,y,...)) ## .... : smoothing parameters, for the pred.sm() function. ## ------------------------------------------------------------------------- ## Author: Martin Maechler, Date: 7 Mar 2001, 11:12 n <- length(x) y <- pred.sm(x, numeric(n), ...) if(!is.numeric(y) || length(y) !=n) stop("`pred.sm' does not return a numeric length n vector") H <- if(trace) 0 else matrix(as.numeric(NA), n,n) for (i in 1:n) { y <- numeric(n) ; y[i] <- 1 y <- pred.sm(x, y, ...) if(trace) H <- H + y[i] else H[,i] <- y } H } S.mat <- hatMat(speed,df=2.635278) #smoother matrix (GCV), aufgrund von x fit.gcv <- smooth.spline(speed,dist) sigma2 <- sum((dist - predict(fit.gcv,x=speed)$y)^2)/(50-fit.gcv$df) #geschaetzte Fehlervarianz var.gcv <- sigma2*diag(S.mat%*%S.mat) #Varianz von fitted values par(mfrow=c(1,1)) plot(cars) lines(fit.gcv,col=2) lines(speed,predict(fit.gcv,x=speed)$y+1.96*sqrt(var.gcv),col=3) lines(speed,predict(fit.gcv,x=speed)$y-1.96*sqrt(var.gcv),col=3) S.mat <- hatMat(speed,df=5) #smoother matrix fit.df5 <- smooth.spline(speed,dist,df=5) sigma2 <- sum((dist - predict(fit.df5,x=speed)$y)^2)/(50-5) var.df5 <- sigma2*diag(S.mat%*%S.mat) lines(fit.df5,col=4) lines(speed,predict(fit.df5,x=speed)$y+1.96*sqrt(var.df5),col=2) lines(speed,predict(fit.df5,x=speed)$y-1.96*sqrt(var.df5),col=2) S.mat <- hatMat(speed,df=10) #smoother matrix fit.df10 <- smooth.spline(speed,dist,df=10) sigma2 <- sum((dist - predict(fit.df10,x=speed)$y)^2)/(50-10) var.df10 <- sigma2*diag(S.mat%*%S.mat) lines(fit.df10,col=6) lines(speed,predict(fit.df10,x=speed)$y+1.96*sqrt(var.df10),col=9) lines(speed,predict(fit.df10,x=speed)$y-1.96*sqrt(var.df10),col=9) #Performance von GCV in Simulation #Modell #Y = 1 + 0.8*x + 2*sin(3*x) + N(0,0.25) res <- matrix(0,ncol=20,nrow=100) set.seed(22) for(i in 1:100) { print(i) x <- runif(100) fx <- 1 + 0.8*x + 2*sin(6*x) y <- fx + rnorm(100,0,0.5) fit <- smooth.spline(x,y) res[i,1] <- sum((predict(fit,x=x)$y - fx)^2) #quadratischer Fehler zur wahren Funktion fuer GCV for(k in 2:20) { fit <- smooth.spline(x,y,df=k) res[i,k] <- sum((predict(fit,x=x)$y - fx)^2) #quadratischer Fehler zur wahren Funktion fuer df=k } } mse <- apply(res,2,mean) #Approximation von E[(\hat{m}(X) - m(X))^2] plot(mse,xlab="df",ylim=c(1,13),main="MSE vs. df (and GCV)") text(1,mse[1]+0.5,"GCV") abline(min(mse),0,col=2)