## Example from Yates 1937: Yield (Y) of 
## three varieties of oat (V)
## four levels of nitrogen a (N)

## planted in six blocks (B)
## each block was split into three whole plots (B:V)
## each whole plot was split into four sub-plots (B:V:N)

## get dataset
require(MASS)
data(oats) # load dataset oats
str(oats)

head(oats, 18)

require(lme4a)

####
## first model: default split plot
r.m1 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats)
r.m1

# Model Matrix X
head(with(env(r.m1), X),n=9)
image(with(env(r.m1), X))

# Model Matrix Z
t(env(r.m1)$Zt)
image(t(env(r.m1)$Zt))


## profile plots
pr1 <- profile(r.m1)

xyplot(pr1,aspect=1.3)
xyplot(pr1,aspect=1.3,absVal=TRUE)
splom(pr1)

## confidence intervals
confint(pr1,level=0.95)

## anova
anova(r.m1) # does not work at the moment
r.m1r <- update(r.m1, Y ~ . - V:N)
anova(r.m1, r.m1r) # => drop V:N

####
## second model: variety as random
r.m2 <- lmer(Y ~ N + (1|V) + (1|V:N) + (1|B) + (1|B:V), oats)
r.m2 # => drop V:N random effect
r.m2r <- update(r.m2, Y ~ . - (1|V:N))

## profile plots
pr2 <- profile(r.m2r) # does not work (neither for r.m2 (zero sigma_{V:N}?))

## anova
anova(r.m2, r.m2r) # of course => drop V:N

# Model Matrix X
head(with(env(r.m2), X),n=9)
image(with(env(r.m2), X))

# Model Matrix Z
t(env(r.m2)$Zt)
image(t(env(r.m2)$Zt))

####
## third model: drop first observation
r.m3 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = -1)
r.m3

####
## fourth model: drop 0.0cwt in first block
r.m4 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = -c(1,5,9))
r.m4

####
## third model: drop 0.0cwt and Golden.Rain combinations
## therefore: default split plot but no interaction V:N
r.m5 <- lmer(Y ~ V + N + (1|B) + (1|B:V), oats,
             subset = !(oats$N == '0.0cwt' & oats$V == 'Golden.rain'))
r.m5

####
## fourth model: drop 0.0cwt altogether
r.m6 <- lmer(Y ~ V*N + (1|B) + (1|B:V), oats, subset = !(oats$N == '0.0cwt'))
r.m6


####
## prediction

## choose model:
r.m <- r.m1 # complete data 
r.m <- r.m2 # Variety as random effect
r.m <- r.m3 # without first obs
r.m <- r.m4 # without 0.0cwt in first block
r.m <- r.m5 # without 0.0cwt, Golden Rain combination
r.m <- r.m6 # without 0.0cwt altogether

## predict for original data.frame
mdata <- with(env(r.m), frame)
mX <- as(with(env(r.m), X), 'matrix')
mZ <- as(t(env(r.m)$Zt), 'matrix')

## get parameters
r.m.fe <- fixef(r.m)
## and estimates of random effects
r.m.re <- ranef(r.m) # this form is not very useful
r.m.re <- unlist(sapply(r.m.re, t)) # transform into vector
names(r.m.re) <- dimnames(mZ)[[2]] # add names

####
## predictions using aggregate:
## marginal prediction: use only fixed effects
r.m.pm <- as.vector(mX %*% r.m.fe) # == X %*% beta
aggregate(r.m.pm, mdata['N'], mean) # For nitrogen, averaged over variety
aggregate(r.m.pm, mdata['V'], mean) # For variety, averaged over nitrogen
aggregate(r.m.pm, mdata[c('V', 'N')], mean) # For variety x nitrogen

## conditional prediction add also random effects
r.m.pc <- r.m.pm + as.vector(mZ %*% r.m.re) # == X %*% beta + Z %*% b
aggregate(r.m.pc, mdata['N'], mean) # For nitrogen, averaged over variety
aggregate(r.m.pc, mdata['V'], mean) # For variety, averaged over nitrogen
aggregate(r.m.pc, mdata[c('V', 'N')], mean) # For variety x nitrogen

####
## prediction using matrix D ( + std errs for confidence intervals)
## For nitrogen, averaged over variety

## marginal prediction
## create matrix D_tau
Dt.N <- as(aggregate(mX, mdata['N'], mean)[,-1], 'matrix') # automated
## create matrix D_u
Du.N <- as(aggregate(mZ, mdata['N'], mean)[,-1], 'matrix') # automated

## calculate marginal predictions and se for confidence intervals
## vcov(r.m) returns matrix C^-1 = cov(beta)
Etheta <- sigma(r.m)^2 * with(env(r.m), tcrossprod(Lambda)) # == cov(b)

mp <- data.frame(mpred=Dt.N %*% r.m.fe,
                 se.CI=sqrt(diag(Dt.N %*% tcrossprod(vcov(r.m), Dt.N))),
                 se.PI=sqrt(diag(Dt.N %*% tcrossprod(vcov(r.m), Dt.N) +
                   Du.N %*% tcrossprod(Etheta, Du.N) + sigma(r.m)^2)),
                 row.names = levels(mdata$N))
cat('\nMarginal prediction for Nitrogen\n'); print(mp, digits = 4)

## conditional prediction ( cov(b) == 0)
## covariance matrix of beta without random effects
## in general: only exclude random effects that we're conditioning on
## don't calculate (X' %*% X)^{-1} directly, use qr decomposition
mX.Rinv <- solve(qr.R(qr(mX)))  # so we only have to invert R (upper-tri.)
covX <- sigma(r.m)^2 * tcrossprod(mX.Rinv) # == sigma^2 * (X' %*% X)^{-1}

## calculate conditional predictions and se for confidence intervals
cp <- data.frame(cpred=Dt.N %*% r.m.fe + Du.N %*% r.m.re,
                 se.CI=sqrt(diag(Dt.N %*% tcrossprod(covX, Dt.N))),
                 se.PI=sqrt(diag(Dt.N %*% tcrossprod(covX, Dt.N) +
                   sigma(r.m)^2)),
                 row.names = levels(mdata$N))
cat('\nConditional prediction for Nitrogen\n'); print(cp, digits = 4)

####
## For variety, averaged over nitrogen 

## marginal prediction
## create matrix D_tau 
Dt.V <- as(aggregate(mX, mdata['V'], mean)[,-1], 'matrix')
## create matrix D_u
Du.V <- as(aggregate(mZ, mdata['V'], mean)[,-1], 'matrix')
## calculate marginal predictions and se for confidence intervals
## vcov(r.m) returns matrix C^-1 = cov(beta)
## Etheta from above 
mp <- data.frame(mpred=Dt.V %*% r.m.fe,
                 se.CI=sqrt(diag(Dt.V %*% tcrossprod(vcov(r.m), Dt.V))),
                 se.PI=sqrt(diag(Dt.V %*% tcrossprod(vcov(r.m), Dt.V) +
                   Du.V %*% tcrossprod(Etheta, Du.V) + sigma(r.m)^2)),
                 row.names = levels(mdata$V))
cat('\nMarginal prediction for Varieties\n'); print(mp, digits = 4)

## conditional prediction

## calculate conditional predictions and se for confidence intervals
## use covX from above
cp <- data.frame(cpred=Dt.V %*% r.m.fe + Du.V %*% r.m.re,
                 se.CI=sqrt(diag(Dt.V %*% tcrossprod(covX, Dt.V))),
                 se.PI=sqrt(diag(Dt.V %*% tcrossprod(covX, Dt.V) +
                   sigma(r.m)^2)),
                 row.names = levels(mdata$V))
cat('\nConditional prediction for Varieties\n'); print(cp, digits = 4)