[R-sig-ME] Splines in MCMCglmm
Linus Holtermann
holtermann at hwwi.org
Thu Mar 6 11:53:50 CET 2014
Hello,
For various reasons i need the MCMCglmm-package for my model specification, but it seems to be useful to incorporate splines in my model. I want to employ the same procedure as in the gamm4-package. I try to incorporate the splines via Jarrod´s function, which based on the mgvc-package. I attached my data and codes.
For the thin-plate-splines the fixed parts are similiar between MCMCglmm and gamm4, but the random parts differ. It is worth noting, that the predictions are the same.
When using P-Splines instead of thin-plate-splines both fixed parts and random parts differ. I guess, i made a mistake somewhere.
I also add two verions where more than one covariate has spline-functions.
Are my codes ok or is there a mistake? Why does the results for P-Spline differ by a substantial amount?
R-Code:
library(gamm4)
library(MCMCglmm)
library(lme4)
library(nlme)
### read data: y = respones variable; x_1...x_8 covariates (x_8=dummy); country = groups
data <- read.csv2("....data_test.csv")
attach(data)
### grand mean centering of covariates (x_8 not centered [dummy])
grand_mean <- function(gm){
gm - mean(gm)
}
data$x_1_c <- grand_mean(x_1)
data$x_2_c <- grand_mean(x_2)
data$x_3_c <- grand_mean(x_3)
data$x_4_c <- grand_mean(x_4)
data$x_5_c <- grand_mean(x_5)
data$x_6_c <- grand_mean(x_6)
data$x_7_c <- grand_mean(x_7)
### Model without Splines in MCMCglmm and gamm4 (Varying Intercept)
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002)))
mc <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country,data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc)
# same model in gamm4
gamm <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+x_5_c+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm$mer)
### identical output!!! ###
### use standard splines in the mgvc-package(thin plate splines)
### use Jarrod´s function
library(mgcv)
spl2<-function(formula, data, p=TRUE, dataX=data){
aug<-nrow(data)-nrow(dataX)
if(aug!=0){
if(aug<0){
stop("sorry nrow(dataX) must be less than or equal to nrow(data)")
}else{
augX<-matrix(0, aug, ncol(dataX))
colnames(augX)<-colnames(dataX)
dataX<-rbind(dataX, augX)
}
}
smooth.spec.object<-interpret.gam(formula)$smooth.spec[[1]]
sm<-smoothCon(smooth.spec.object, data=data, knots=NULL,absorb.cons=TRUE, dataX=dataX)[[1]]
Sed<-eigen(sm$S[[1]])
Su<-Sed$vectors
Sd<-Sed$values
nonzeros <- which(Sd > sqrt(.Machine$double.eps))
if(p){
Zn<-sm$X%*%Su[,nonzeros, drop=FALSE]%*%diag(1/sqrt(Sd[nonzeros]))
}else{
Zn<-sm$X[,-nonzeros, drop=FALSE]
}
return(Zn[1:(nrow(data)-aug),,drop=FALSE])
}
# A function for setting up fixed (p=FALSE) and random (p=TRUE) effect design matrices
# for a reparmetrised smooth.
# data is used to set up the basis, but the design matrices are based on dataX
### set thin plate spline for ONE COVARIATE x_5_c ###
data$Xo<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn<-spl2(~s(x_5_c,k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl)
# gamm4
gamm_spl <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl$mer)
plot(predict(gamm_spl$mer), predict(mc_spl, marginal=NULL))
### results for fixed part a similiar, but not for random parts
### Prediction are the same
### ???
### use P-Splines for covariate x_5_c
data$Xo_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data, p=F)
data$Zn_p<-spl2(~s(x_5_c,bs="ps",k=10), data=data)
# MCMCglmm
prior_mc1 <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_p <- MCMCglmm(y~x_1_c+x_2_c+x_3_c+x_4_c+Xo_p+x_6_c+x_7_c+x_8+x_8:x_7_c,
random = ~country+idv(Zn_p),data =data,family="gaussian" , verbose = F,prior = prior_mc1,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_p)
# gamm4
gamm_spl_p <- gamm4(y~x_1_c+x_2_c+x_3_c+x_4_c+s(x_5_c,bs="ps",k=10)+x_6_c+x_7_c+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_p$mer)
plot(predict(gamm_spl_p$mer), predict(mc_spl_p, marginal=NULL))
### results for fixed parts and random parts differ
### Prediction are nearly the same
### ???
#-----------------------------------------------------------------------
### set thin plate splines for covariates x_1_c ... x_7_c
data$Xo_1<-spl2(~s(x_1_c,k=10), data=data, p=F)
data$Zn_1<-spl2(~s(x_1_c,k=10), data=data)
data$Xo_2<-spl2(~s(x_2_c,k=10), data=data, p=F)
data$Zn_2<-spl2(~s(x_2_c,k=10), data=data)
data$Xo_3<-spl2(~s(x_3_c,k=10), data=data, p=F)
data$Zn_3<-spl2(~s(x_3_c,k=10), data=data)
data$Xo_4<-spl2(~s(x_4_c,k=10), data=data, p=F)
data$Zn_4<-spl2(~s(x_4_c,k=10), data=data)
data$Xo_5<-spl2(~s(x_5_c,k=10), data=data, p=F)
data$Zn_5<-spl2(~s(x_5_c,k=10), data=data)
data$Xo_6<-spl2(~s(x_6_c,k=10), data=data, p=F)
data$Zn_6<-spl2(~s(x_6_c,k=10), data=data)
data$Xo_7<-spl2(~s(x_7_c,k=10), data=data, p=F)
data$Zn_7<-spl2(~s(x_7_c,k=10), data=data)
# Version a (that will take some time)
prior_mc_spl_a <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002),
G3 = list(V = diag(1), nu = 0.002),G4 = list(V = diag(1), nu = 0.002),G5 = list(V = diag(1), nu = 0.002),
G6 = list(V = diag(1), nu = 0.002),G7 = list(V = diag(1), nu = 0.002),G8 = list(V = diag(1), nu = 0.002)))
mc_spl_a <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_1)+idv(Zn_2)+idv(Zn_3)+idv(Zn_4)+idv(Zn_5)+idv(Zn_6)+idv(Zn_7),
data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_a,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_a)
gamm_spl_a <- gamm4(y~s(x_1_c,k=10)+s(x_2_c,k=10)+s(x_3_c,k=10)+s(x_4_c,k=10)+s(x_5_c,k=10)+s(x_6_c,k=10)+s(x_7_c,k=10)+x_8+x_8:x_7_c,
data=data,random=~(1|country))
summary(gamm_spl_a$mer)
plot(predict(gamm_spl_a$mer), predict(mc_spl_a, marginal=NULL))
### fixed parts are similiar, but random parts differ
### Predictions are nearly the same
### ???
# Version b: cbind Zns to one matrix
Zn_b <- cbind(data$Zn_1,data$Zn_2,data$Zn_3,data$Zn_4,data$Zn_5,data$Zn_6,data$Zn_7)
prior_mc_spl_b <- list(R = list(V = 1, nu=0.002), G = list(G1 = list(V = diag(1), nu = 0.002),G2 = list(V = diag(1), nu = 0.002)))
mc_spl_b <- MCMCglmm(y~Xo_1+Xo_2+Xo_3+Xo_4+Xo_5+Xo_6+Xo_7+x_8+x_8:x_7_c,
random = ~country+idv(Zn_b),data =data,family="gaussian" , verbose = F,prior = prior_mc_spl_b,
burnin=10000,nitt=110000,thin=10,saveX=T,saveZ=T,saveXL=T,pr=T,pl=T)
summary(mc_spl_b)
plot(predict(mc_spl_b, marginal=NULL), predict(mc_spl_a, marginal=NULL))
### Prediction are the same between Version a and Version b
### So, are both methods ok???
best regards,
Linus Holtermann
Hamburgisches WeltWirtschaftsInstitut gemeinnützige GmbH (HWWI)
Heimhuder Straße 71
20148 Hamburg
Tel +49-(0)40-340576-336
Fax+49-(0)40-340576-776
Internet: www.hwwi.org
Email: holtermann at hwwi.org
AmtsgerichtHamburg HRB 94303
Geschäftsführer: Prof. Dr. Thomas Straubhaar, Gunnar Geyer
Umsatzsteuer-ID: DE 241849425
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: test_splines_MCMCglmm.R
URL: <https://stat.ethz.ch/pipermail/r-sig-mixed-models/attachments/20140306/8f97e5b3/attachment.pl>
More information about the R-sig-mixed-models
mailing list