[R] REML - quasipoisson

Tue Sep 25 14:50:56 CEST 2012

hi

I'm puzzled as to the relation between the REML score computed by gam and
the formula (4) on p.4 here:
http://opus.bath.ac.uk/22707/1/Wood_JRSSB_2011_73_1_3.pdf

I'm ok with this for poisson, or for quasipoisson when phi=1.

However, when phi differs from 1, I'm stuck.

#simulate some data
library(mgcv)
set.seed(1)
x1<-runif(500)
x2<-rnorm(500)
linp<--0.5+x1+exp(-x2^2/2)*sin(4*x2)
y<-rpois(500,exp(linp))

##poisson
#phi=1
m1<-gam(y~s(x1)+s(x2),family="poisson",method="REML")
phi<-m1$scale

#formula
#1st term
S1<-m1$smooth[[1]]$S[[1]]*m1$sp[1]
S2<-m1$smooth[[2]]$S[[1]]*m1$sp[2]
S<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
S[i,j]=S1[i-1,j-1]
S[i+9,j+9]=S2[i-1,j-1]
}
}
beta<-m1$coef
#penalised deviance
Dp<-m1$dev+t(beta)%*%S%*%beta
F1<-Dp/(2*phi)

#2nd term
F2<-sum(ifelse(y==0,0,y*log(y)-y-log(factorial(y))))

#3rd term
X<-predict(m1,type="lpmatrix")
W<-diag(fitted(m1))
H<-t(X)%*%W%*%X
ldhs<-determinant(H+S,log=TRUE)$modulus[1]
eigS<-eigen(S,only.values=TRUE)$val
lds<-sum(log(eigS[1:16]))
F3<-(ldhs-lds)/2

#4th term
Mp=3
F4<-Mp/2*log(2*pi*phi)

F1-F2+F3-F4
m1$gcv
#reml score = formula

##quasipoisson with scale = 1
#fitting is identical to the poisson case
#F1, F3, and F4 unchanged but F2 is now undefined

m2<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=1)
F1+F3-F4
m2$gcv
#reml score = formula with F2 omitted

##quasipoisson with unknown scale
m3<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=-1)
phiq<-m3$scale

#1st term
S1q<-m3$smooth[[1]]$S[[1]]*m3$sp[1]
S2q<-m3$smooth[[2]]$S[[1]]*m3$sp[2]
Sq<-matrix(0,19,19)
for (i in 2:10)
{
for (j in 2:10)
{
Sq[i,j]=S1q[i-1,j-1]
Sq[i+9,j+9]=S2q[i-1,j-1]
}
}
betaq<-m3$coef
#penalised deviance
Dpq<-m3$dev+t(betaq)%*%Sq%*%betaq
F1q<-Dpq/(2*phiq)

#2nd term undefined

#3rd term
Xq<-predict(m3,type="lpmatrix")
Wq<-diag(fitted(m3))
Hq<-t(Xq)%*%Wq%*%Xq
ldhsq<-determinant(Hq+Sq,log=TRUE)$modulus[1]
eigSq<-eigen(Sq,only.values=TRUE)$val
ldsq<-sum(log(eigSq[1:16]))
F3q<-(ldhsq-ldsq)/2

#4th term
Mp=3
F4q<-Mp/2*log(2*pi*phiq)

F1q+F3q-F4q
m3$gcv

#quite different

#but if phiq is replaced by the Pearson estimate of the scale
P<-sum((y-fitted(m3))^2/fitted(m3))
phip<-P/(500-sum(m3$edf))
F1p<-Dpq/(2*phip)
F3p<-F3q
#third term independent of scale
F4p<-Mp/2*log(2*pi*phip)
F1p+F3p-F4p
m3$gcv

#closer but still wrong

#is there a value of phi which makes this work?
f<-function(t) (Dpq/(2*t) + F3q + Mp/2*log(2*pi*t) - m3$gcv)^2
optimise(f,interval=c(0.5,1.5))$min->phix

Dpq/(2*phix) + F3q + Mp/2*log(2*pi*phix)
m3$gcv
#but what is phix - not the Pearson estimate or the scale returned by gam?

thanks

Greg Dropkin