[R] gam plots and seWithMean

Greg Dropkin gregd at gn.apc.org
Thu Oct 21 18:57:32 CEST 2010


hello

I'm learning mgcv and would like to obtain numerical output corresponding
to plot.gam.

I can do so when seWithMean=FALSE (the default)
but only approximately when seWithMean=TRUE.

Can anyone show how to obtain the exact values?

Alternatively, can you clarify the explanation in the manual
"Note that, if seWithMean=TRUE, the confidence bands include
the uncertainty about the overall mean. In other words although
each smooth is shown centred, the confidence bands are obtained
as if every other term in the model was constrained to have average 0
(average taken over the covariate values), except for the smooth concerned".


an example with a poisson gam
(re-run this a few times as the plots can vary significantly)

-------

library(mgcv)

#simulate some data
x1<-runif(500)
x2<-rnorm(500)
x3<-rpois(500,3)
d<-runif(500)
t<-runif(500,20,50)
linp<--6.5+x1+2*x2-x3+2*exp(-2*d)*sin(2*pi*d)
lam<-t*exp(linp)
y<-rpois(500,lam)
sum(y)
table(y)

#fit the data without d by glm and with d by gam
f1<-glm(y~offset(log(t))+x1+x2+x3,poisson)
f2<-gam(update.formula(as.formula(f1),~.+s(d)),poisson)
anova(f1,f2)
summary(f2)
plot(f2)

#the solid line s(d)
dat<-data.frame(t,d,x1,x2,x3)
datn<-transform(dat,d=0)
dif<-predict(f2)-predict(f2,datn)
cdif<-dif-mean(dif)
points(d,cdif,cex=0.5,col=rgb(0,1,0,0.2))

#another approach to the solid line s(d)
devAskNewPage(ask=T)
plot(f2)
premat2<-PredictMat(f2$smooth[[1]],data=dat)
dim(premat2)
pars<-f2$coef
pars2<-pars[5:13]
pars2<-as.matrix(pars2,9,1)
pars2
points(d,premat2%*%pars2,cex=0.5,col=rgb(0,0.6,0.3,0.2))
#premat2%*%pars2 = cdif

#confidence intervals when seWithMean = FALSE
devAskNewPage(ask=T)
plot(f2)
points(d,cdif,cex=0.5,col=rgb(0,1,0,0.2))
Vp2<-f2$Vp[5:13,5:13]
se2<-sqrt(diag(premat2%*%Vp2%*%t(premat2)))
points(d,cdif+qnorm(0.975)*se2,cex=0.5,col=rgb(1,0,0,0.2))
points(d,cdif-qnorm(0.975)*se2,cex=0.5,col=rgb(0,0,1,0.2))
#numerical output for the confidence bands is given by
#cdif+qnorm(0.975)*se2
#cdif-qnorm(0.975)*se2

#confidence intervals when seWithMean = TRUE
devAskNewPage(ask=T)
plot(f2,seWithMean=T)
points(d,cdif,cex=0.5,col=rgb(0,1,0,0.2))
premat<-cbind(rep(1,500),x1,x2,x3,premat2)
pars<-as.matrix(pars,13,1)
range(predict.gam(f2)-log(t)-as.numeric(premat%*%pars))
#so predict.gam(f2) = log(t) + as.numeric(premat%*%pars)
sew<-sqrt(diag(premat%*%f2$Vp%*%t(premat)))
range(sew-predict(f2,se.fit=T)$se.fit)
#sew = predict(f2,se.fit=T)$se.fit
points(d,cdif+qnorm(0.975)*sew,cex=0.5,col=rgb(1,0,0,0.2))
points(d,cdif-qnorm(0.975)*sew,cex=0.5,col=rgb(0,0,1,0.2))
#sort of

#smoothing the sew estimates
devAskNewPage(ask=T)
plot(f2,seWithMean=T)
points(d,cdif,cex=0.5,col=rgb(0,1,0,0.2))
lines(smooth.spline(cdif+qnorm(0.975)*sew~d),col="red")
lines(smooth.spline(cdif-qnorm(0.975)*sew~d),col="blue")
points(smooth.spline(cdif+qnorm(0.975)*sew~d)$x,smooth.spline(cdif+qnorm(0.975)*sew~d)$y,cex=0.8,col=rgb(1,0,0,0.1))
points(smooth.spline(cdif+qnorm(0.975)*sew~d)$x,smooth.spline(cdif-qnorm(0.975)*sew~d)$y,cex=0.8,col=rgb(0,0,1,0.1))
#close, but not exact
#numerical values corresponding to sort(d) are extracted as
#smooth.spline(cdif+qnorm(0.975)*sew~d)$y
#smooth.spline(cdif-qnorm(0.975)*sew~d)$y

#smoothing sew with 93% confidence levels
devAskNewPage(ask=T)
plot(f2,seWithMean=T)
points(d,cdif,cex=0.5,col=rgb(0,1,0,0.2))
lines(smooth.spline(cdif+qnorm(0.965)*sew~d),col="red")
lines(smooth.spline(cdif-qnorm(0.965)*sew~d),col="blue")
points(smooth.spline(cdif+qnorm(0.965)*sew~d)$x,smooth.spline(cdif+qnorm(0.965)*sew~d)$y,cex=0.8,col=rgb(1,0,0,0.1))
points(smooth.spline(cdif+qnorm(0.965)*sew~d)$x,smooth.spline(cdif-qnorm(0.965)*sew~d)$y,cex=0.8,col=rgb(0,0,1,0.1))
#possibly closer, but not exact
#numerical values corresponding to sort(d) are extracted as
#smooth.spline(cdif+qnorm(0.965)*sew~d)$y
#smooth.spline(cdif-qnorm(0.965)*sew~d)$y



More information about the R-help mailing list