[R] Type-I v/s Type-III Sum-Of-Squares in ANOVA

Mon Apr 19 19:17:05 CEST 2010

* On Mo, 1. Mar 2010, Ista Zahn wrote:

> I've posted a short explanation about this at
> http://yourpsyche.org/miscellaneous that you might find helpful. I'm a

As someone who's also struggled with the "type X sum of squares" topic, I
like the idea to completely walk through a numerical example and see what
happens. I'd like to extend this a bit, and cover the following aspects:

- how are the model comparisons underlying the SS types calculated?
- do the compared models obey the marginality principle?
- what are the orthogonal projections defining the model comparisons?
- are the projections invariant to the type of contrast codes?
- are the hypotheses formulated using empirical cell sizes?
  (are the effect estimates using weighted or unweighted marginal means)?
- how can (some of) the SS be calculated without matrix math?

Below you'll find the code for SS type III using the 2x2 example from
Maxwell and Delaney. For SS type I, II, and III using the 3x3 example in
M&D, please see <http://www.uni-kiel.de/psychologie/dwoll/r/doc/ssTypes.r>

The model projections for SS type III corresponding to models that violate
the marginality principle are not invariant to the coding scheme. If,
e.g., Pai is the projection for the model including main effect A and
interaction A:B, Pai will be different for non sum-to-zero and sum-to-zero
codes. This seems to mean that SS type III for main effects compare
different models when using different contrasts codes. Which leads to the
question what hypotheses these models actually imply. I'd be grateful if
someone could provide any pointers on where to read up on that.

I hope this post is not too long! Best, Daniel

####-----------------------------------------------------------------
# 2x2 unbalanced design: data from Maxwell & Delaney 2004 p322
P   <- 2  # two groups in factor A (female / male)
Q   <- 2  # two groups in factor B (college degree / no degree)
g11 <- c(24, 26, 25, 24, 27, 24, 27, 23)
g12 <- c(15, 17, 20, 16)
g21 <- c(25, 29, 27)
g22 <- c(19, 18, 21, 20, 21, 22, 19)
Y   <- c(g11, g12, g21, g22)  # salary in 100$
A   <- factor(rep(1:P, c(8+4, 3+7)),         labels=c("f", "m"))
B   <- factor(rep(rep(1:Q, P), c(8,4, 3,7)), labels=c("deg", "noDeg"))

####-----------------------------------------------------------------
# utility function getInf2x2 (run with different contrasts settings)
# fit all relevant regression models for 2x2 between-subjects design
# output: * residual sum of squares for each model and their df
#         * orthogonal projection on subspace as defined
#           by the design matrix of each model
getInf2x2 <- function() {
  X <- model.matrix(lm(Y ~ A + B + A:B))  # ANOVA design matrix

  # indicator variables for factors from design matrix
  idA <- X[ , 2]   # factor A
  idB <- X[ , 3]   # factor B
  idI <- X[ , 4]   # interaction A:B

  # fit each relevant regression model
  mod1   <- lm(Y ~ 1)                 # no effect
  modA   <- lm(Y ~ idA)               # factor A
  modB   <- lm(Y ~       idB)         # factor B
  modAB  <- lm(Y ~ idA + idB)         # factors A, B
  modAI  <- lm(Y ~ idA       + idI)   # factor A, interaction A:B
  modBI  <- lm(Y ~       idB + idI)   # factor B, interaction A:B
  modABI <- lm(Y ~ idA + idB + idI)   # full model A, B, A:B

  # RSS for each regression model from lm()
  rss1   <- sum(residuals(mod1)^2)    # no effect, i.e., total SS
  rssA   <- sum(residuals(modA)^2)    # factor A
  rssB   <- sum(residuals(modB)^2)    # factor B
  rssAB  <- sum(residuals(modAB)^2)   # factors A, B
  rssAI  <- sum(residuals(modAI)^2)   # factor A, A:B
  rssBI  <- sum(residuals(modBI)^2)   # factor B, A:B
  rssABI <- sum(residuals(modABI)^2)  # full model A, B, A:B

  # degrees of freedom for RSS for each model
  N     <- length(Y)  # total N
  df1   <- N - (0+1)  # no effect:            0 predictors + mean
  dfA   <- N - (1+1)  # factor A:             1 predictor  + mean
  dfB   <- N - (1+1)  # factor B:             1 predictor  + mean
  dfAB  <- N - (2+1)  # factors A, B:         2 predictors + mean
  dfAI  <- N - (2+1)  # factor A, A:B:        2 predictors + mean
  dfBI  <- N - (2+1)  # factor B, A:B:        2 predictors + mean
  dfABI <- N - (3+1)  # full model A, B, A:B: 3 predictors + mean

  ####---------------------------------------------------------------
  # alternatively: get RSS for each model and their df manually
  # based on geometric interpretation
  # design matrix for each model
  one  <- rep(1, nrow(X))            # column of 1s
  X1   <- cbind(one)                 # no effect
  Xa   <- cbind(one, idA)            # factor A
  Xb   <- cbind(one,      idB)       # factor B
  Xab  <- cbind(one, idA, idB)       # factors A, B
  Xai  <- cbind(one, idA,      idI)  # factor A, interaction A:B
  Xbi  <- cbind(one,      idB, idI)  # factor B, interaction A:B
  Xabi <- cbind(one, idA, idB, idI)  # full model A, B, A:B

  # orthogonal projections P on subspace given by the design matrix
  # of each model: P*y = y^hat are the model predictions
  P1   <- X1   %*% solve(t(X1)   %*% X1)   %*% t(X1)    # no effect
  Pa   <- Xa   %*% solve(t(Xa)   %*% Xa)   %*% t(Xa)    # A
  Pb   <- Xb   %*% solve(t(Xb)   %*% Xb)   %*% t(Xb)    # B
  Pab  <- Xab  %*% solve(t(Xab)  %*% Xab)  %*% t(Xab)   # A, B
  Pai  <- Xai  %*% solve(t(Xai)  %*% Xai)  %*% t(Xai)   # A, A:B
  Pbi  <- Xbi  %*% solve(t(Xbi)  %*% Xbi)  %*% t(Xbi)   # B, A:B
  Pabi <- Xabi %*% solve(t(Xabi) %*% Xabi) %*% t(Xabi)  # A, B, A:B

  # RSS = squared length of projections onto the orthogonal
  # complement of each model's null hypothesis subspace in NxN space
  # (I-P)*y = I*y - P*y = y - P*y = y - y^hat
  # differences to model prediction, i.e., residuals
  ID     <- diag(N)  # NxN identity matrix: outermost space
  rss1   <- c(crossprod((ID - P1)   %*% Y))  # no effect
  rssA   <- c(crossprod((ID - Pa)   %*% Y))  # A
  rssB   <- c(crossprod((ID - Pb)   %*% Y))  # B
  rssAB  <- c(crossprod((ID - Pab)  %*% Y))  # A, B
  rssAI  <- c(crossprod((ID - Pai)  %*% Y))  # A, A:B
  rssBI  <- c(crossprod((ID - Pbi)  %*% Y))  # B, A:B
  rssABI <- c(crossprod((ID - Pabi) %*% Y))  # A, B, A:B

  # get df of each RSS from dimension of subspace complementary
  # to the null hypothesis model subspace (within NxN space)
  df1   <- qr(ID - P1)$rank                  # no effect
  dfA   <- qr(ID - Pa)$rank                  # A
  dfB   <- qr(ID - Pb)$rank                  # B
  dfAB  <- qr(ID - Pab)$rank                 # AB
  dfAI  <- qr(ID - Pai)$rank                 # A, A:B
  dfBI  <- qr(ID - Pbi)$rank                 # B, A:B
  dfABI <- qr(ID - Pabi)$rank                # A, B, A:B

  # return information about RSS from each model, their df,
  # and the corresponding orthogonal projections
    return(list( rss1=rss1,   rssA=rssA,    rssB=rssB, rssAB=rssAB,
                rssAI=rssAI, rssBI=rssBI, rssABI=rssABI,
                  df1=df1,     dfA=dfA,      dfB=dfB,   dfAB=dfAB,
                 dfAI=dfAI,   dfBI=dfBI,   dfABI=dfABI,
                   P1=P1,       Pa=Pa,        Pb=Pb,     Pab=Pab,
                  Pai=Pai,     Pbi=Pbi,     Pabi=Pabi))
}

####-----------------------------------------------------------------
# SS type III
# * test model comparisons that violate marginality principle
# * do not depend on order of terms in model
# * individual effect SS do not sum to total effect SS
# * test equality of unweighted marginal expected values
# * correct results only with sum-to-zero contrasts
#   due to violation of marginality principle

# coding scheme for categorical variables matters
# dummy coding -> wrong results
options(contrasts=c(unordered="contr.treatment", ordered="contr.poly"))

# effect coding (or other sum to zero codes) -> correct results
options(contrasts=c(unordered="contr.sum", ordered="contr.poly"))

library(car)  # result from Anova() from package car
Anova(lm(Y ~ A + B + A:B), type="III")

# order of model terms doesn't matter
Anova(lm(Y ~ B + A + A:B), type="III")

####-----------------------------------------------------------------
# model comparisons for SS type III violate principle of marginality:
# restricted model includes interaction without one main effect
# main effect factor A
#     B + A:B vs. A + B + A:B
# main effect factor B
# A     + A:B vs. A + B + A:B
# interaction: same comparison for SS type I, II and III
# A + B       vs. A + B + A:B

# comparisons cannot be done using anova() due to violation of
# marginality principle - drop1() drops each term in turn anyway
(dropRes <- drop1(lm(Y ~ A + B + A:B), ~ ., test="F"))

####-----------------------------------------------------------------
# manual model comparisons leading to SS type III
resIII <- getInf2x2()

# effect SS: RSS-difference between model with all
# effects and that model except the target effect
(ssIIIa <- resIII$rssBI - resIII$rssABI)             # factor A
(ssIIIb <- resIII$rssAI - resIII$rssABI)             # factor B
(ssIIIi <- resIII$rssAB - resIII$rssABI)             # interaction

# effect MS: RSS-difference divided by df-difference
(msIIIa   <- ssIIIa / (resIII$dfBI - resIII$dfABI))  # factor A
(msIIIb   <- ssIIIb / (resIII$dfAI - resIII$dfABI))  # factor B
(msIIIabi <- ssIIIi / (resIII$dfAB - resIII$dfABI))  # interaction
(msE      <- resIII$rssABI / resIII$dfABI)           # error MS full mod

# F-values for each effect: effect MS / error MS full model
msIIIa   / msE  # factor A
msIIIb   / msE  # factor B
msIIIabi / msE  # interaction

# indiviudal effect SS do not sum to total effect SS:
# sum of indiviudal effect SS
ssIIIa + ssIIIb + ssIIIi

# total effect SS from model comparison:
# RSS model with 0 predictors - RSS full model
resIII$rss1 - resIII$rssABI

####-----------------------------------------------------------------
# SS for each effect from geometric interpretation:
# squared length of projection onto the orthogonal complement
# of null hypothesis model subspace within full model subspace
crossprod((resIII$Pbi - resIII$Pabi) %*% Y)  # factor A
crossprod((resIII$Pai - resIII$Pabi) %*% Y)  # factor B
crossprod((resIII$Pab - resIII$Pabi) %*% Y)  # interaction

####-----------------------------------------------------------------
# coding scheme for categorical variables matters for models
# that violate marginality principle

# orthogonal projections onto subspace given by model's
# design matrix when using dummy coding:
options(contrasts=c(unordered="contr.treatment", ordered="contr.poly"))
resD  <- getInf2x2()     # projections from utility function
PaD   <- resD$Pa         # factor A
PbD   <- resD$Pb         # factor B
PabD  <- resD$Pab        # factors A, B
PaiD  <- resD$Pai        # factor A, A:B -> violates marginality
PbiD  <- resD$Pbi        # factor B, A:B -> violates marginality
PabiD <- resD$Pabi       # full model A, B, A:B

# orthogonal projections onto subspace given by model's
# design matrix when using effect coding coding (sum to zero):
options(contrasts=c(unordered="contr.sum", ordered="contr.poly"))
resE  <- getInf2x2()     # projections from utility function
PaE   <- resE$Pa         # factor A
PbE   <- resE$Pb         # factor B
PabE  <- resE$Pab        # factors A, B
PaiE  <- resE$Pai        # factor A, A:B -> violates marginality
PbiE  <- resE$Pbi        # factor B, A:B -> violates marginality
PabiE <- resE$Pabi       # full model A, B, A:B

# equality of projections from different contrast coding schemes
all.equal(PaD,   PaE)    # -> equal
all.equal(PbD,   PbE)    # -> equal
all.equal(PaiD,  PaiE)   # -> not equal
all.equal(PbiD,  PbiE)   # -> not equal
all.equal(PabD,  PabE)   # -> equal
all.equal(PabiD, PabiE)  # -> equal

####-----------------------------------------------------------------
# manual calculation of SS type III for main effects based
# on unweighted cell means = results from sum-to-zero contrasts
cellNs <- tapply(Y, list(A, B), length)  # cell sizes
cellMs <- tapply(Y, list(A, B), mean)    # cell means
rowMs  <- rowMeans(cellMs)               # unweighted row means
colMs  <- colMeans(cellMs)               # unweighted column means

# effective marginal N: harmonic mean off cell sizes
effNj <- P / apply(1/cellNs, 1, sum)     # harmonic row means
effNk <- Q / apply(1/cellNs, 2, sum)     # harmonic column means

# grand mean based on corresponding marginal means
# weighted with effective marginal N
gM1 <- sum(effNj*rowMs) / sum(effNj)     # based on row means
gM2 <- sum(effNk*colMs) / sum(effNk)     # based on column means

(ssIIIa <- P * sum(effNj * (rowMs-gM1)^2))  # SS type III for A
(ssIIIb <- Q * sum(effNk * (colMs-gM2)^2))  # SS type III for B
# interaction SS cannot be easily calculated without matrix math
# error SS: sum of squared Y - corresponding cell mean
(ssE    <- sum((Y - ave(Y, A, B, FUN=mean))^2))

dfA     <- P-1            # df SS factor A
dfB     <- Q-1            # df SS factor A
dfE     <- N - (P*Q)      # df error SS
(msIIIa <- ssIIIa / dfA)  # MS factor A
(msIIIb <- ssIIIb / dfB)  # MS factor B
(msE    <- ssE    / dfE)  # MS error
msIIIa / msE              # F-value factor A
msIIIb / msE              # F-value factor B