# Rscript for multivariate tests for means # Download zip-file "chap7tibetskull.dat" # from http://biostatistics.iop.kcl.ac.uk/publications/everitt/ # Unzip the file into your working directory source("chap7tibetskull.dat") # the data are now available as dataframe 'Tibet' ########################## # Info about these data: # ########################## # The data were collected in southeastern and eastern Tibet # Skulls 1-17 were found in one place (Type 1), # and skulls 18-32 were found in another place (Type 2). # On each skull, we have 5 measurements, all in millimeter: # Length: greatest length of skull # Breadth: greatest horizontal breadth of skull # Height: height of skull # Fheight: upper face height # Fbreadth: face breadth, between outermost points of cheek bones ############################ # Take a look at the data: # ############################ # scatterplot matrix: pairs(Tibet) # boxplot: boxplot(Tibet) # boxplot in which we can compare the two types: boxplot(Tibet[Type==1,c(1:5)], boxwex=0.25, at=1:5-0.2, col="yellow", main="Tibetan skull measurements",ylim=c(60,205)) boxplot(Tibet[Type==2,c(1:5)], boxwex=0.25, at=1:5+0.2, col="red", add = TRUE) legend(4.5, 200, c("Type 1", "Type 2"), fill = c("yellow", "red")) ############################ # One-sample test for mean # ############################ # H0: mu=mu.0 # Ha: mu is not equal to mu.0 # (this is a bit artificial in this case, but we do it anyway to # illustrate the procedure) # set hypothesized value for the mean: mu.0 <- c(180,140,135,74,135) data <- Tibet[,c(1:5)] n <- nrow(data) p <- ncol(data) # compute sample mean vector: sample.mean <- apply(data,2,mean) # compute sample covariance matrix: # note: R automatically computes the unbiased version, # with n-1 in the denominator S.u <- var(data) # compute inverse of sample covariance matrix: S.u.inverse <- solve(S.u) # compute T-squared statistic: T2 <- n * t(sample.mean-mu.0) %*% S.u.inverse %*% (sample.mean-mu.0) # Under the null hypothesis, this statistic has a T2(p,n-1) distribution. # This distribution is not readily available in R, so we convert to the # F-distribution. # compute F-statistic: F <- (n-p)/((n-1)*p) * T2 F # under the null hypothesis, this statistic has a F(p,n-p) distribution # compute p-value p.value <- pf(F, p, n-p, lower=FALSE) p.value # conclusion: we do *not* reject the null hypothesis ############################ # Two-sample test for mean # ############################ # H0: mu.1 = mu.2 # Ha: mu.1 not equal to mu.2 # (under the assumption that Sigma.1 = Sigma.2) attach(Tibet) Tibet1 <- Tibet[Type==1,c(1:5)] Tibet2 <- Tibet[Type==2,c(1:5)] n1 <- nrow(Tibet1) n2 <- nrow(Tibet2) n <- n1+n2 p <- ncol(Tibet1) # compute sample mean vectors: sample.mean1 <- apply(Tibet1, 2, mean) sample.mean2 <- apply(Tibet2, 2, mean) # compute pooled estimate for the covariance matrix: S.u <- ((n1-1)*var(Tibet1) + (n2-1)*var(Tibet2))/(n-2) S.u.inverse <- solve(S.u) # compute sample version of Mahalonobis distance (squared): D2 <- t(sample.mean1-sample.mean2) %*% S.u.inverse %*% (sample.mean1-sample.mean2) # compute T-squared statistic: T2 <- n1*n2/n * D2 # Under the null hypothesis, this statistic has a T2(p,n-2) distribution # This distribution is not readily available in R, so we convert to the # F-distribution. # compute F-statistic: Fstat <- (n-1-p)/((n-2)*p) * T2 Fstat # under the null hypothesis, this statistic has a F(p,n-1-p) distribution # Compute p-value: p.value <- pf(Fstat, p, n-1-p, lower=FALSE) p.value # Conclusion: we reject the null hypothesis that the means # in both groups are equal # Note: the code given in Everitt seems to be incorrect... ##################################################################### # More general approach: manova (Multivariate ANalysis Of VAriance) # # Can also be used when there are more than two groups # ##################################################################### Y <- cbind(Length, Breadth, Height, Fheight, Fbreadth) res <- manova(Y ~ as.factor(Type)) summary(res)