[R] gamm design matrices

Thu Mar 6 11:43:56 CET 2014

Hello Jarrod and Simon,

i have a similiar issue. I want to use splines with the MCMCglmm-package. I
need the packages for various reasons. I try to incorporate the splines via
Jarrod´s function, which based on the mgvc-package.
I attached my data and codes. For the thin-plate-splines the fixed parts are
similiar between MCMCglmm and gamm4, but the random parts differ. It is
worth noting, that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts and
random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions. 

data_test.csv <http://r.789695.n4.nabble.com/file/n4686322/data_test.csv>  
test_splines_MCMCglmm.R
<http://r.789695.n4.nabble.com/file/n4686322/test_splines_MCMCglmm.R>  

R-Code:

library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)

### read data: y = respones variable; x_1...x_8 covariates (x_8=dummy);
country = groups
data <- read.csv2("....data_test.csv")	
attach(data)

### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}

data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)

### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)

prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1),
nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior =
prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)

# same model in gamm4

gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)

### identical output!!! ###

### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod´s function

library(mgcv)

spl2<-function(formula, data, p=TRUE, dataX=data){

  aug<-nrow(data)-nrow(dataX)

  if(aug!=0){
    if(aug<0){
      stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
    }else{
      augX<-matrix(0, aug, ncol(dataX))
      colnames(augX)<-colnames(dataX)
      dataX<-rbind(dataX, augX)
    }
  }
  smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
  sm<-smoothCon(smooth.spec.object, data=data, knots=NULL,absorb.cons=TRUE,
dataX=dataX)[[1]]

  Sed<-eigen(sm$S[[1]])
  Su<-Sed$vectors
  Sd<-Sed$values
  nonzeros <- which(Sd > sqrt(.Machine$double.eps))

  if(p){
    Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
  }else{
    Zn<-sm$X[,-nonzeros, drop=FALSE]
  }
  return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}

# A function for setting up fixed (p=FALSE) and random (p=TRUE) effect
design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are based on
dataX 

### set thin plate spline for ONE COVARIATE x_5_c ###

data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1),
nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose = F,prior =
prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)

# gamm4
gamm_spl <-
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)

plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))

### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???

### use P-Splines for covariate x_5_c

data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)

# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1),
nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <-
MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose = F,prior
= prior_mc1,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)

# gamm4
gamm_spl_p <-
gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)

plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))

### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???

#-----------------------------------------------------------------------

### set thin plate splines for covariates x_1_c ... x_7_c

data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)

# Version a (that will take some time)

prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu = 0.002),G5 =
list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu = 0.002),G8 =
list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random =
~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
 burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)

gamm_spl_a <-
gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)

plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))

### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???

# Version b: cbind Zns to one matrix

Zn_b <-
cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)

prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V =
diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose = F,prior
= prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)

plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))

### Prediction are the same between Version a and Version b
### So, are both methods ok???

--
View this message in context: http://r.789695.n4.nabble.com/gamm-design-matrices-tp4686107p4686322.html
Sent from the R help mailing list archive at Nabble.com.