[R] mu^2(1-mu)^2 variance function for GLM

Thu Jun 16 10:27:53 CEST 2005

Dear list,

I'm trying to mimic the analysis of Wedderburn (1974) as cited by
McCullagh and Nelder (1989) on p.328-332. This is the leaf-blotch on 
barley example, and the data is available in the `faraway' package.

Wedderburn suggested using the variance function mu^2(1-mu)^2. This
variance function isn't readily available in R's `quasi' family object, 
but it seems to me that the following definition could be used:

}, "mu^2(1-mu)^2" = {
     variance <- function(mu) mu^2 * (1 - mu)^2
     validmu <- function(mu) all(mu > 0) && all(mu < 1)
     dev.resids <- function(y, mu, wt) 2 * wt * ((2 * y - 1) *
         (log(ifelse(y == 0, 1, y/mu)) - log(ifelse(y == 1, 1,
         (1 - y)/(1 - mu)))) - 2 + y/mu + (1 - y)/(1 - mu))

I've modified the `quasi' function accordingly (into `quasi2' given 
below) and my results are very much in line with the ones cited by 
McCullagh and Nelder on p.330-331:

> data(leafblotch, package = "faraway")
> summary(fit <- glm(blotch ~ site + variety,
+         family = quasi2(link = "logit", variance = "mu^2(1-mu)^2"),
+         data = leafblotch))

Call:
glm(formula = blotch ~ site + variety, family = quasi2(link = "logit",
     variance = "mu^2(1-mu)^2"), data = leafblotch)

Deviance Residuals:
      Min        1Q    Median        3Q       Max
-3.23175  -0.65385  -0.09426   0.46946   1.97152

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.92253    0.44463 -17.818  < 2e-16 ***
site2        1.38308    0.44463   3.111  0.00268 **
site3        3.86013    0.44463   8.682 8.18e-13 ***
site4        3.55697    0.44463   8.000 1.53e-11 ***
site5        4.10841    0.44463   9.240 7.48e-14 ***
site6        4.30541    0.44463   9.683 1.13e-14 ***
site7        4.91811    0.44463  11.061  < 2e-16 ***
site8        5.69492    0.44463  12.808  < 2e-16 ***
site9        7.06762    0.44463  15.896  < 2e-16 ***
variety2    -0.46728    0.46868  -0.997  0.32210
variety3     0.07877    0.46868   0.168  0.86699
variety4     0.95418    0.46868   2.036  0.04544 *
variety5     1.35276    0.46868   2.886  0.00514 **
variety6     1.32859    0.46868   2.835  0.00595 **
variety7     2.34066    0.46868   4.994 3.99e-06 ***
variety8     3.26268    0.46868   6.961 1.30e-09 ***
variety9     3.13556    0.46868   6.690 4.10e-09 ***
variety10    3.88736    0.46868   8.294 4.33e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasi family taken to be 0.9884755)

     Null deviance: 339.488  on 89  degrees of freedom
Residual deviance:  71.961  on 72  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 18

Also, the plot of the Pearson residuals against the linear predictor

> plot(residuals(fit, type = "pearson") ~ fit$linear.predictors)
> abline(h = 0, lty = 2)

results in a plot that, to my eyes at least, is very close to Fig. 9.2
on p. 332.

However, I can't seem to find any other published examples using this
variance function. I'd really like to verify that my code above is
working before applying it to real data sets. Can anybody help?

Thanks,
Henric
- - - - -
quasi2 <- function (link = "identity", variance = "constant")
{
     linktemp <- substitute(link)
     if (is.expression(linktemp) || is.call(linktemp))
         linktemp <- link
     else if (!is.character(linktemp))
         linktemp <- deparse(linktemp)
     if (is.character(linktemp))
         stats <- make.link(linktemp)
     else stats <- linktemp
     variancetemp <- substitute(variance)
     if (!is.character(variancetemp)) {
         variancetemp <- deparse(variancetemp)
         if (linktemp == "variance")
             variancetemp <- eval(variance)
     }
     switch(variancetemp, constant = {
         variance <- function(mu) rep.int(1, length(mu))
         dev.resids <- function(y, mu, wt) wt * ((y - mu)^2)
         validmu <- function(mu) TRUE
     }, "mu(1-mu)" = {
         variance <- function(mu) mu * (1 - mu)
         validmu <- function(mu) all(mu > 0) && all(mu < 1)
         dev.resids <- function(y, mu, wt) 2 * wt * (y * log(ifelse(y ==
             0, 1, y/mu)) + (1 - y) * log(ifelse(y == 1, 1, (1 -
             y)/(1 - mu))))
     }, "mu^2(1-mu)^2" = {
         variance <- function(mu) mu^2 * (1 - mu)^2
         validmu <- function(mu) all(mu > 0) && all(mu < 1)
         dev.resids <- function(y, mu, wt) 2 * wt * ((2 * y - 1) *
             (log(ifelse(y == 0, 1, y/mu)) - log(ifelse(y == 1, 1,
             (1 - y)/(1 - mu)))) - 2 + y/mu + (1 - y)/(1 - mu))
     }, mu = {
         variance <- function(mu) mu
         validmu <- function(mu) all(mu > 0)
         dev.resids <- function(y, mu, wt) 2 * wt * (y * log(ifelse(y ==
             0, 1, y/mu)) - (y - mu))
     }, "mu^2" = {
         variance <- function(mu) mu^2
         validmu <- function(mu) all(mu > 0)
         dev.resids <- function(y, mu, wt) pmax(-2 * wt * (log(ifelse(y ==
             0, 1, y)/mu) - (y - mu)/mu), 0)
     }, "mu^3" = {
         variance <- function(mu) mu^3
         validmu <- function(mu) all(mu > 0)
         dev.resids <- function(y, mu, wt) wt * ((y - mu)^2)/(y *
             mu^2)
     }, stop(gettextf("'variance' \"%s\" is invalid: possible values are 
\"mu(1-mu)\", \"mu^2(1-mu)^2\", \"mu\", \"mu^2\", \"mu^3\" and 
\"constant\"",
         variancetemp), domain = NA))
     initialize <- expression({
         n <- rep.int(1, nobs)
         mustart <- y + 0.1 * (y == 0)
     })
     aic <- function(y, n, mu, wt, dev) NA
     structure(list(family = "quasi", link = linktemp, linkfun = 
stats$linkfun,
         linkinv = stats$linkinv, variance = variance, dev.resids = 
dev.resids,
         aic = aic, mu.eta = stats$mu.eta, initialize = initialize,
         validmu = validmu, valideta = stats$valideta, varfun = 
variancetemp),
         class = "family")
}