## Example from Yates 1937: Yield (Y) of ## three varieties of oat (V) ## four levels of nitrogen a (N) ## planted in six blocks (B) ## each block was split into three whole plots (B:V) ## each whole plot was split into four sub-plots (B:V:N) ## get dataset require(MASS) data(oats) # load dataset oats str(oats) head(oats, 18) require(lme4a) #### ## first model: default split plot r.m1 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats) r.m1 # Model Matrix X head(with(env(r.m1), X),n=9) image(with(env(r.m1), X)) # Model Matrix Z t(env(r.m1)$Zt) image(t(env(r.m1)$Zt)) ## profile plots pr1 <- profile(r.m1) xyplot(pr1,aspect=1.3) xyplot(pr1,aspect=1.3,absVal=TRUE) splom(pr1) ## confidence intervals confint(pr1,level=0.95) ## anova anova(r.m1) # does not work at the moment r.m1r <- update(r.m1, Y ~ . - V:N) anova(r.m1, r.m1r) # => drop V:N #### ## second model: variety as random r.m2 <- lmer(Y ~ N + (1|V) + (1|V:N) + (1|B) + (1|B:V), oats) r.m2 # => drop V:N random effect r.m2r <- update(r.m2, Y ~ . - (1|V:N)) ## profile plots pr2 <- profile(r.m2r) # does not work (neither for r.m2 (zero sigma_{V:N}?)) ## anova anova(r.m2, r.m2r) # of course => drop V:N # Model Matrix X head(with(env(r.m2), X),n=9) image(with(env(r.m2), X)) # Model Matrix Z t(env(r.m2)$Zt) image(t(env(r.m2)$Zt)) #### ## third model: drop first observation r.m3 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = -1) r.m3 #### ## fourth model: drop 0.0cwt in first block r.m4 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = -c(1,5,9)) r.m4 #### ## third model: drop 0.0cwt and Golden.Rain combinations ## therefore: default split plot but no interaction V:N r.m5 <- lmer(Y ~ V + N + (1|B) + (1|B:V), oats, subset = !(oats$N == '0.0cwt' & oats$V == 'Golden.rain')) r.m5 #### ## fourth model: drop 0.0cwt altogether r.m6 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = !(oats$N == '0.0cwt')) r.m6 #### ## prediction ## choose model: r.m <- r.m1 # complete data r.m <- r.m2 # Variety as random effect r.m <- r.m3 # without first obs r.m <- r.m4 # without 0.0cwt in first block r.m <- r.m5 # without 0.0cwt, Golden Rain combination r.m <- r.m6 # without 0.0cwt altogether ## predict for original data.frame mdata <- with(env(r.m), frame) mX <- as(with(env(r.m), X), 'matrix') mZ <- as(t(env(r.m)$Zt), 'matrix') ## get parameters r.m.fe <- fixef(r.m) ## and estimates of random effects r.m.re <- ranef(r.m) # this form is not very useful r.m.re <- unlist(sapply(r.m.re, t)) # transform into vector names(r.m.re) <- dimnames(mZ)[[2]] # add names #### ## predictions using aggregate: ## marginal prediction: use only fixed effects r.m.pm <- as.vector(mX %*% r.m.fe) # == X %*% beta aggregate(r.m.pm, mdata['N'], mean) # For nitrogen, averaged over variety aggregate(r.m.pm, mdata['V'], mean) # For variety, averaged over nitrogen aggregate(r.m.pm, mdata[c('V', 'N')], mean) # For variety x nitrogen ## conditional prediction add also random effects r.m.pc <- r.m.pm + as.vector(mZ %*% r.m.re) # == X %*% beta + Z %*% b aggregate(r.m.pc, mdata['N'], mean) # For nitrogen, averaged over variety aggregate(r.m.pc, mdata['V'], mean) # For variety, averaged over nitrogen aggregate(r.m.pc, mdata[c('V', 'N')], mean) # For variety x nitrogen #### ## prediction using matrix D ( + std errs for confidence intervals) ## For nitrogen, averaged over variety ## marginal prediction ## create matrix D_tau Dt.N <- as(aggregate(mX, mdata['N'], mean)[,-1], 'matrix') # automated ## create matrix D_u Du.N <- as(aggregate(mZ, mdata['N'], mean)[,-1], 'matrix') # automated ## calculate marginal predictions and se for confidence intervals ## vcov(r.m) returns matrix C^-1 = cov(beta) Etheta <- sigma(r.m)^2 * with(env(r.m), tcrossprod(Lambda)) # == cov(b) mp <- data.frame(mpred=Dt.N %*% r.m.fe, se.CI=sqrt(diag(Dt.N %*% tcrossprod(vcov(r.m), Dt.N))), se.PI=sqrt(diag(Dt.N %*% tcrossprod(vcov(r.m), Dt.N) + Du.N %*% tcrossprod(Etheta, Du.N) + sigma(r.m)^2)), row.names = levels(mdata$N)) cat('\nMarginal prediction for Nitrogen\n'); print(mp, digits = 4) ## conditional prediction ( cov(b) == 0) ## covariance matrix of beta without random effects ## in general: only exclude random effects that we're conditioning on ## don't calculate (X' %*% X)^{-1} directly, use qr decomposition mX.Rinv <- solve(qr.R(qr(mX))) # so we only have to invert R (upper-tri.) covX <- sigma(r.m)^2 * tcrossprod(mX.Rinv) # == sigma^2 * (X' %*% X)^{-1} ## calculate conditional predictions and se for confidence intervals cp <- data.frame(cpred=Dt.N %*% r.m.fe + Du.N %*% r.m.re, se.CI=sqrt(diag(Dt.N %*% tcrossprod(covX, Dt.N))), se.PI=sqrt(diag(Dt.N %*% tcrossprod(covX, Dt.N) + sigma(r.m)^2)), row.names = levels(mdata$N)) cat('\nConditional prediction for Nitrogen\n'); print(cp, digits = 4) #### ## For variety, averaged over nitrogen ## marginal prediction ## create matrix D_tau Dt.V <- as(aggregate(mX, mdata['V'], mean)[,-1], 'matrix') ## create matrix D_u Du.V <- as(aggregate(mZ, mdata['V'], mean)[,-1], 'matrix') ## calculate marginal predictions and se for confidence intervals ## vcov(r.m) returns matrix C^-1 = cov(beta) ## Etheta from above mp <- data.frame(mpred=Dt.V %*% r.m.fe, se.CI=sqrt(diag(Dt.V %*% tcrossprod(vcov(r.m), Dt.V))), se.PI=sqrt(diag(Dt.V %*% tcrossprod(vcov(r.m), Dt.V) + Du.V %*% tcrossprod(Etheta, Du.V) + sigma(r.m)^2)), row.names = levels(mdata$V)) cat('\nMarginal prediction for Varieties\n'); print(mp, digits = 4) ## conditional prediction ## calculate conditional predictions and se for confidence intervals ## use covX from above cp <- data.frame(cpred=Dt.V %*% r.m.fe + Du.V %*% r.m.re, se.CI=sqrt(diag(Dt.V %*% tcrossprod(covX, Dt.V))), se.PI=sqrt(diag(Dt.V %*% tcrossprod(covX, Dt.V) + sigma(r.m)^2)), row.names = levels(mdata$V)) cat('\nConditional prediction for Varieties\n'); print(cp, digits = 4)