[Rd] Two apparent bugs in aov(y~ *** -1 + Error(***)), with suggested (PR#6510)

b-h at mevik.net b-h at mevik.net
Fri Jan 30 20:25:04 MET 2004


I think there are two bugs in aov() that shows up when the right hand
side of `formula' contains both `-1' and an Error() term, e.g.,
aov(y ~ a + b - 1 + Error(c), ...).  Without `-1' or `Error()' there
is no problem.  I've included and example, and the source of aov()
with suggested fixes below.

The first bug (labeled BUG 1 below) creates an extra, empty stratum
inside `Within' in the result list (se example below).

The second bug (BUG 2) assigns the fit of each stratum at one index
too high in the result list; i.e., the fit of stratum 1 is assigned to
`Stratum 2' etc.  `Stratum 1' is NULL.  (Also the extra stratum added
by BUG 1, is NULL.)

***
*** VERSION INFORMATION
***

> version
         _                
platform i386-pc-linux-gnu
arch     i386             
os       linux-gnu        
system   i386, linux-gnu  
status                    
major    1                
minor    8.1              
year     2003             
month    11               
day      21               
language R                


***
*** EXAMPLE
***

> test.df <- data.frame (y=rnorm(8), a=gl(2,1,8), b=gl(2,3,8), c=gl(2,4,8))
> ## With intercept; no problem:
> aov(y~a+b+Error(c), data=test.df)

Call:
aov(formula = y ~ a + b + Error(c), data = test.df)

Grand Mean: -0.4105379 

Stratum 1: c

Terms:
                        b
Sum of Squares  0.3602348
Deg. of Freedom         1

Estimated effects are balanced

Stratum 2: Within

Terms:
                        a         b Residuals
Sum of Squares  0.0142722 2.7349479 1.7883294
Deg. of Freedom         1         1         4

Residual standard error: 0.6686422 
Estimated effects may be unbalanced
> ## Without intercept; incorrect strata assignments:
> aov(y~a+b-1+Error(c), data=test.df)

Call:
aov(formula = y ~ a + b - 1 + Error(c), data = test.df)

Stratum 1: c
NULL

Stratum 2: Within

Terms:
                        a         b
Sum of Squares  1.3483306 0.3602348
Deg. of Freedom         1         1

1 out of 3 effects not estimable
Estimated effects may be unbalanced

Stratum 3: NA
NULL
> ## Fixed version:
> fixaov(y~a+b-1+Error(c), data=test.df)

Call:
fixaov(formula = y ~ a + b - 1 + Error(c), data = test.df)

Stratum 1: c

Terms:
                        a         b
Sum of Squares  1.3483306 0.3602348
Deg. of Freedom         1         1

1 out of 3 effects not estimable
Estimated effects may be unbalanced

Stratum 2: Within

Terms:
                        a         b Residuals
Sum of Squares  0.0142722 2.7349479 1.7883294
Deg. of Freedom         1         1         4

Residual standard error: 0.6686422 
1 out of 3 effects not estimable
Estimated effects may be unbalanced
> 


***
*** aov() code with suggested fixes:
***

fixaov <- function(formula, data = NULL, projections = FALSE, qr = TRUE,
                contrasts = NULL, ...)
{
    Terms <- if(missing(data)) terms(formula, "Error")
    else terms(formula, "Error", data = data)
    indError <- attr(Terms, "specials")$Error
    if(length(indError) > 1)
        stop(paste("There are", length(indError),
                   "Error terms: only 1 is allowed"))
    lmcall <- Call <- match.call()
    lmcall[[1]] <- as.name("lm")
    lmcall$singular.ok <- TRUE          # not currently used in R
    if(projections) qr <- lmcall$qr <- TRUE
    lmcall$projections <- NULL
    if(is.null(indError)) {
        ## no Error term
        fit <- eval(lmcall, parent.frame())
        if(projections) fit$projections <- proj(fit)
        class(fit) <- if(inherits(fit, "mlm"))
            c("maov", "aov", oldClass(fit)) else c("aov", oldClass(fit))
        fit$call <- Call
        return(fit)
    } else {
        ##  helmert contrasts can be helpful: do we want to force them?
        ##  this version does for the Error model.
        opcons <- options("contrasts")
        options(contrasts=c("contr.helmert", "contr.poly"))
        on.exit(options(opcons))
        allTerms <- Terms
        errorterm <-  attr(Terms, "variables")[[1 + indError]]
        eTerm <- deparse(errorterm[[2]], width = 500, backtick = TRUE)
        intercept <- attr(Terms, "intercept")
        ecall <- lmcall
        ecall$formula <-
            as.formula(paste(deparse(formula[[2]], width = 500,
                                     backtick = TRUE), "~", eTerm,
                             if(!intercept) "- 1"),
                       env=environment(formula))

        ecall$method <- "qr"
        ecall$qr <- TRUE
        ecall$contrasts <- NULL
        er.fit <- eval(ecall, parent.frame())
        options(opcons)
        nmstrata <- attr(terms(er.fit), "term.labels")
        ## remove backticks from simple labels for strata (only)
        nmstrata <- sub("^`(.*)`$", "\\1", nmstrata)
        if(intercept) nmstrata <- c("(Intercept)", nmstrata)
        qr.e <- er.fit$qr
        rank.e <- er.fit$rank
        qty <- er.fit$resid
        maov <- is.matrix(qty)
        asgn.e <- er.fit$assign[qr.e$piv[1:rank.e]]
        ## we want this to label the rows of qtx, not cols of x.
        nobs <- NROW(qty)
        if(nobs > rank.e) {
## APPARENT BUG 1:
          # result <- vector("list", max(asgn.e) + 2)
          # asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1
## SUGGESTED FIX:
            asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1
            result <- vector("list", length (unique(asgn.e)))
## END FIX
            nmstrata <- c(nmstrata, "Within")
        } else result <- vector("list", max(asgn.e) + 1)
        names(result) <- nmstrata
        lmcall$formula <- form <-
            update(formula, paste(". ~ .-", deparse(errorterm, width = 500,
                                                    backtick = TRUE)))
        Terms <- terms(form)
        lmcall$method <- "model.frame"
        mf <- eval(lmcall, parent.frame())
        xvars <- as.character(attr(Terms, "variables"))[-1]
        if ((yvar <- attr(Terms, "response")) > 0)
            xvars <- xvars[-yvar]
        if (length(xvars) > 0) {
            xlev <- lapply(mf[xvars], levels)
            xlev <- xlev[!sapply(xlev, is.null)]
        } else xlev <- NULL
        resp <- model.response(mf)
        qtx <- model.matrix(Terms, mf, contrasts)
        cons <- attr(qtx, "contrasts")
        dnx <- colnames(qtx)
        asgn.t <- attr(qtx, "assign")
        if(length(wts <- model.weights(mf))) {
            wts <- sqrt(wts)
            resp <- resp * wts
            qtx <- qtx * wts
        }
        qty <- as.matrix(qr.qty(qr.e, resp))
        if((nc <- ncol(qty)) > 1) {
            dny <- colnames(resp)
            if(is.null(dny)) dny <- paste("Y", 1:nc, sep="")
            dimnames(qty) <- list(seq(nrow(qty)), dny)
        } else dimnames(qty) <- list(seq(nrow(qty)), NULL)
        qtx <- qr.qty(qr.e, qtx)
        dimnames(qtx) <- list(seq(nrow(qtx)) , dnx)
        for(i in seq(along=nmstrata)) {
## APPARENT BUG 2:
          # select <- asgn.e==(i-1)
## SUGGESTED FIX:
            select <- asgn.e==unique(asgn.e)[i]
## END FIX
            ni <- sum(select)
            if(!ni) next
            ## helpful to drop constant columns.
            xi <- qtx[select, , drop = FALSE]
            cols <- colSums(xi^2) > 1e-5
            if(any(cols)) {
                xi <- xi[, cols, drop = FALSE]
                attr(xi, "assign") <- asgn.t[cols]
                fiti <- lm.fit(xi, qty[select,,drop=FALSE])
                fiti$terms <- Terms
            } else {
                y <- qty[select,,drop=FALSE]
                fiti <- list(coefficients = numeric(0), residuals = y,
                             fitted.values = 0 * y, weights = wts, rank = 0,
                             df.residual = NROW(y))
            }
            if(projections) fiti$projections <- proj(fiti)
            class(fiti) <- c(if(maov) "maov", "aov", oldClass(er.fit))
            result[[i]] <- fiti
        }
        class(result) <- c("aovlist", "listof")
        if(qr) attr(result, "error.qr") <- qr.e
        attr(result, "call") <- Call
        if(length(wts)) attr(result, "weights") <- wts
        attr(result, "terms") <- allTerms
        attr(result, "contrasts") <- cons
        attr(result, "xlevels") <- xlev
        result
    }
}

-- 
Sincerely,
Bjørn-Helge Mevik, dr.scient.



More information about the R-devel mailing list