## Series 7 (Applied Multivariate Statistics) ##################################################################### # Exercise 1 ############ #a) Load the package nlme. Read the help file on the data set # ?Rail and look at the data. library(nlme) ?Rail #b) Compare the travel times by making a boxplot for each track. boxplot(travel ~ Rail, data = Rail) #c) Fit a random intercept model in order to explain the # variation coming from the different tracks # and from the measurement inaccuracy. fmRail.lme <- lme(travel ~ 1, data = Rail, random = ~ 1|Rail) summary(fmRail.lme) #f) Compute the confidence intervals. intervals(fmRail.lme) #g) What random effects were fitted for the individual tracks? re <- random.effects(fmRail.lme) re #h) Check the model assumptions: #Look at the standarized residuals. plot(fmRail.lme) qqnorm(fmRail.lme) #Look at the random effects. qqnorm(re[,1]) # Exercise 2 ############ # a) Load the dataset OrthoFem.csv and have a look at it. The data is taken from the data set Orthodont in the package nlme. Have a look at the help file of the data (?Orthodont). In our data, we only kept the female patients and centered the age by substracting 11 years. dat <- read.csv(file = "http://stat.ethz.ch/Teaching/Datasets/WBL/OrthoFem.csv", header = TRUE, row.names = 1) # b) Make a xyplot (in package lattice) of distance vs. age for each subject. library(lattice) xyplot(distance ~ age | Subject, data = dat) # c) Fit a standard linear model explaining distance by age. fm.lm <- lm(distance ~ age, data = dat) summary(fm.lm) # d) Fit a random intercept model. fm.ri <- lme(distance ~ age, data = dat, random = ~1|Subject) summary(fm.ri) # e) Is the random intercept model a significant improvement over the standard linear model? anova(fm.ri, fm.lm)# Yes, it is a significant improvement, the p-value is smaller than 0.0001. # f) Can the fit be further improved significantly by considering a random intercept & random slope model? fm.rs <- lme(distance ~ age, data = dat, random = ~age|Subject) summary(fm.rs)# No it cannot be improved significantly; the p-value is 0.15. # g) Compare the confidence intervals of standard lm (=incorrect model) and the random slope model (=improved model). Are there relevant differences? intervals(fm.rs) # The confidence interval for the fixed effects became in this example substantially smaller (this is not always the case; they also could be similar or get wider, depending on the setting). Thus, neglecting the repeated measures structure might have a relevant (negative) impact on the data analysis. # h) Check the random slope model by looking at the residual plots. # Look at the standarized residuals. re <- ranef(fm.rs) plot(fm.rs, which = 1) qqnorm(fm.rs) # Look at the randomm effects. par(mfrow = c(1,2)) qqnorm(re[,1]) qqnorm(re[,2])