## Series 7 (Applied Multivariate Statistics)
#####################################################################

# Exercise 1
############

#a) Load the package nlme. Read the help file on the data set
#   ?Rail and look at the data.

library(nlme)
?Rail

#b) Compare the travel times by making a boxplot for each track.

boxplot(travel ~ Rail, data = Rail)

#c) Fit a random intercept model in order to explain the
#   variation coming from the different tracks
#   and from the measurement inaccuracy.

fmRail.lme <- lme(travel ~ 1, data = Rail, random = ~ 1|Rail)
summary(fmRail.lme)


#f) Compute the confidence intervals.

intervals(fmRail.lme)

#g) What random effects were fitted for the individual tracks?

re <- random.effects(fmRail.lme)
re

#h) Check the model assumptions:

#Look at the standarized residuals.

plot(fmRail.lme)
qqnorm(fmRail.lme)

#Look at the random effects.

qqnorm(re[,1])


# Exercise 2
############


# a) Load the dataset OrthoFem.csv and have a look at it. The data is taken from the data set Orthodont in the package nlme. Have a look at the help file of the data (?Orthodont). In our data, we only kept the female patients and centered the age by substracting 11 years.

dat <- read.csv(file = "http://stat.ethz.ch/Teaching/Datasets/WBL/OrthoFem.csv", header = TRUE, row.names = 1)

# b) Make a xyplot (in package lattice) of distance vs. age for each subject.

library(lattice)
xyplot(distance ~ age | Subject, data = dat)

# c)  Fit a standard linear model explaining distance by age.

fm.lm <- lm(distance ~ age, data = dat)
summary(fm.lm)

# d) Fit a random intercept model.

fm.ri <- lme(distance ~ age, data = dat, random = ~1|Subject)
summary(fm.ri)


# e) Is the random intercept model a significant improvement over the standard linear model?

anova(fm.ri, fm.lm)# Yes, it is a significant improvement, the p-value is smaller than 0.0001.


# f) Can the fit be further improved significantly by considering a random intercept & random slope model?

fm.rs <- lme(distance ~ age, data = dat, random = ~age|Subject)
summary(fm.rs)# No it cannot be improved significantly; the p-value is 0.15.

# g) Compare the confidence intervals of standard lm (=incorrect model) and the random slope model (=improved model). Are there relevant differences?

intervals(fm.rs)

# The confidence interval for the fixed effects became in this example substantially smaller (this is not always the case; they also could be similar or get wider, depending on the setting). Thus, neglecting the repeated measures structure might have a relevant (negative) impact on the data analysis. 


# h) Check the random slope model by looking at the residual plots.

# Look at the standarized residuals.

re <- ranef(fm.rs)
plot(fm.rs, which = 1)
qqnorm(fm.rs)

# Look at the randomm effects.

par(mfrow = c(1,2))  
qqnorm(re[,1])
qqnorm(re[,2])