[Rd] Two apparent bugs in aov(y~ *** -1 + Error(***)),
with suggested (PR#6510)
b-h at mevik.net
b-h at mevik.net
Fri Jan 30 20:25:04 MET 2004
I think there are two bugs in aov() that shows up when the right hand
side of `formula' contains both `-1' and an Error() term, e.g.,
aov(y ~ a + b - 1 + Error(c), ...). Without `-1' or `Error()' there
is no problem. I've included and example, and the source of aov()
with suggested fixes below.
The first bug (labeled BUG 1 below) creates an extra, empty stratum
inside `Within' in the result list (se example below).
The second bug (BUG 2) assigns the fit of each stratum at one index
too high in the result list; i.e., the fit of stratum 1 is assigned to
`Stratum 2' etc. `Stratum 1' is NULL. (Also the extra stratum added
by BUG 1, is NULL.)
***
*** VERSION INFORMATION
***
> version
_
platform i386-pc-linux-gnu
arch i386
os linux-gnu
system i386, linux-gnu
status
major 1
minor 8.1
year 2003
month 11
day 21
language R
***
*** EXAMPLE
***
> test.df <- data.frame (y=rnorm(8), a=gl(2,1,8), b=gl(2,3,8), c=gl(2,4,8))
> ## With intercept; no problem:
> aov(y~a+b+Error(c), data=test.df)
Call:
aov(formula = y ~ a + b + Error(c), data = test.df)
Grand Mean: -0.4105379
Stratum 1: c
Terms:
b
Sum of Squares 0.3602348
Deg. of Freedom 1
Estimated effects are balanced
Stratum 2: Within
Terms:
a b Residuals
Sum of Squares 0.0142722 2.7349479 1.7883294
Deg. of Freedom 1 1 4
Residual standard error: 0.6686422
Estimated effects may be unbalanced
> ## Without intercept; incorrect strata assignments:
> aov(y~a+b-1+Error(c), data=test.df)
Call:
aov(formula = y ~ a + b - 1 + Error(c), data = test.df)
Stratum 1: c
NULL
Stratum 2: Within
Terms:
a b
Sum of Squares 1.3483306 0.3602348
Deg. of Freedom 1 1
1 out of 3 effects not estimable
Estimated effects may be unbalanced
Stratum 3: NA
NULL
> ## Fixed version:
> fixaov(y~a+b-1+Error(c), data=test.df)
Call:
fixaov(formula = y ~ a + b - 1 + Error(c), data = test.df)
Stratum 1: c
Terms:
a b
Sum of Squares 1.3483306 0.3602348
Deg. of Freedom 1 1
1 out of 3 effects not estimable
Estimated effects may be unbalanced
Stratum 2: Within
Terms:
a b Residuals
Sum of Squares 0.0142722 2.7349479 1.7883294
Deg. of Freedom 1 1 4
Residual standard error: 0.6686422
1 out of 3 effects not estimable
Estimated effects may be unbalanced
>
***
*** aov() code with suggested fixes:
***
fixaov <- function(formula, data = NULL, projections = FALSE, qr = TRUE,
contrasts = NULL, ...)
{
Terms <- if(missing(data)) terms(formula, "Error")
else terms(formula, "Error", data = data)
indError <- attr(Terms, "specials")$Error
if(length(indError) > 1)
stop(paste("There are", length(indError),
"Error terms: only 1 is allowed"))
lmcall <- Call <- match.call()
lmcall[[1]] <- as.name("lm")
lmcall$singular.ok <- TRUE # not currently used in R
if(projections) qr <- lmcall$qr <- TRUE
lmcall$projections <- NULL
if(is.null(indError)) {
## no Error term
fit <- eval(lmcall, parent.frame())
if(projections) fit$projections <- proj(fit)
class(fit) <- if(inherits(fit, "mlm"))
c("maov", "aov", oldClass(fit)) else c("aov", oldClass(fit))
fit$call <- Call
return(fit)
} else {
## helmert contrasts can be helpful: do we want to force them?
## this version does for the Error model.
opcons <- options("contrasts")
options(contrasts=c("contr.helmert", "contr.poly"))
on.exit(options(opcons))
allTerms <- Terms
errorterm <- attr(Terms, "variables")[[1 + indError]]
eTerm <- deparse(errorterm[[2]], width = 500, backtick = TRUE)
intercept <- attr(Terms, "intercept")
ecall <- lmcall
ecall$formula <-
as.formula(paste(deparse(formula[[2]], width = 500,
backtick = TRUE), "~", eTerm,
if(!intercept) "- 1"),
env=environment(formula))
ecall$method <- "qr"
ecall$qr <- TRUE
ecall$contrasts <- NULL
er.fit <- eval(ecall, parent.frame())
options(opcons)
nmstrata <- attr(terms(er.fit), "term.labels")
## remove backticks from simple labels for strata (only)
nmstrata <- sub("^`(.*)`$", "\\1", nmstrata)
if(intercept) nmstrata <- c("(Intercept)", nmstrata)
qr.e <- er.fit$qr
rank.e <- er.fit$rank
qty <- er.fit$resid
maov <- is.matrix(qty)
asgn.e <- er.fit$assign[qr.e$piv[1:rank.e]]
## we want this to label the rows of qtx, not cols of x.
nobs <- NROW(qty)
if(nobs > rank.e) {
## APPARENT BUG 1:
# result <- vector("list", max(asgn.e) + 2)
# asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1
## SUGGESTED FIX:
asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1
result <- vector("list", length (unique(asgn.e)))
## END FIX
nmstrata <- c(nmstrata, "Within")
} else result <- vector("list", max(asgn.e) + 1)
names(result) <- nmstrata
lmcall$formula <- form <-
update(formula, paste(". ~ .-", deparse(errorterm, width = 500,
backtick = TRUE)))
Terms <- terms(form)
lmcall$method <- "model.frame"
mf <- eval(lmcall, parent.frame())
xvars <- as.character(attr(Terms, "variables"))[-1]
if ((yvar <- attr(Terms, "response")) > 0)
xvars <- xvars[-yvar]
if (length(xvars) > 0) {
xlev <- lapply(mf[xvars], levels)
xlev <- xlev[!sapply(xlev, is.null)]
} else xlev <- NULL
resp <- model.response(mf)
qtx <- model.matrix(Terms, mf, contrasts)
cons <- attr(qtx, "contrasts")
dnx <- colnames(qtx)
asgn.t <- attr(qtx, "assign")
if(length(wts <- model.weights(mf))) {
wts <- sqrt(wts)
resp <- resp * wts
qtx <- qtx * wts
}
qty <- as.matrix(qr.qty(qr.e, resp))
if((nc <- ncol(qty)) > 1) {
dny <- colnames(resp)
if(is.null(dny)) dny <- paste("Y", 1:nc, sep="")
dimnames(qty) <- list(seq(nrow(qty)), dny)
} else dimnames(qty) <- list(seq(nrow(qty)), NULL)
qtx <- qr.qty(qr.e, qtx)
dimnames(qtx) <- list(seq(nrow(qtx)) , dnx)
for(i in seq(along=nmstrata)) {
## APPARENT BUG 2:
# select <- asgn.e==(i-1)
## SUGGESTED FIX:
select <- asgn.e==unique(asgn.e)[i]
## END FIX
ni <- sum(select)
if(!ni) next
## helpful to drop constant columns.
xi <- qtx[select, , drop = FALSE]
cols <- colSums(xi^2) > 1e-5
if(any(cols)) {
xi <- xi[, cols, drop = FALSE]
attr(xi, "assign") <- asgn.t[cols]
fiti <- lm.fit(xi, qty[select,,drop=FALSE])
fiti$terms <- Terms
} else {
y <- qty[select,,drop=FALSE]
fiti <- list(coefficients = numeric(0), residuals = y,
fitted.values = 0 * y, weights = wts, rank = 0,
df.residual = NROW(y))
}
if(projections) fiti$projections <- proj(fiti)
class(fiti) <- c(if(maov) "maov", "aov", oldClass(er.fit))
result[[i]] <- fiti
}
class(result) <- c("aovlist", "listof")
if(qr) attr(result, "error.qr") <- qr.e
attr(result, "call") <- Call
if(length(wts)) attr(result, "weights") <- wts
attr(result, "terms") <- allTerms
attr(result, "contrasts") <- cons
attr(result, "xlevels") <- xlev
result
}
}
--
Sincerely,
Bjørn-Helge Mevik, dr.scient.
More information about the R-devel
mailing list