[R] question about how summary.lm works

Mon Jan 12 18:26:06 CET 2004

Hi,

While exploring how summary.lm generated its output I came across a section 
that left me puzzled.

at around line 57
     R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
     se <- sqrt(diag(R) * resvar)

I'm hoping somebody could explain the logic of these to steps or 
alternatively point me in the direction of a text that will explain these 
steps.

In particular I'm puzzled what is the relationship between QR factorization 
and the cholesky factorization such that one can give a (sort of ) R matrix 
as a parameter of chol2inv(). I say sort of R matrix as the matrix 
generated by Qr$qr[p1, p1, drop = FALSE] has a lower triangle with non zero 
entries although the upper triangle corresponds to the values in the R matrix.

Thank you

Dominic

summary.lm
function (object, correlation = FALSE, symbolic.cor = FALSE,
     ...)
{
     z <- object
     p <- z$rank
     if (p == 0) {
         r <- z$residuals
         n <- length(r)
         w <- z$weights
         if (is.null(w)) {
             rss <- sum(r^2)
         }
         else {
             rss <- sum(w * r^2)
             r <- sqrt(w) * r
         }
         resvar <- rss/(n - p)
         ans <- z[c("call", "terms")]
         class(ans) <- "summary.lm"
         ans$aliased <- is.na(coef(object))
         ans$residuals <- r
         ans$df <- c(0, n, length(ans$aliased))
         ans$coefficients <- matrix(NA, 0, 4)
         dimnames(ans$coefficients) <- list(NULL, c("Estimate",
             "Std. Error", "t value", "Pr(>|t|)"))
         ans$sigma <- sqrt(resvar)
         ans$r.squared <- ans$adj.r.squared <- 0
         return(ans)
     }
     Qr <- object$qr
     if (is.null(z$terms) || is.null(Qr))
         stop("invalid 'lm' object:  no terms nor qr component")
     n <- NROW(Qr$qr)
     rdf <- n - p
     if (rdf != z$df.residual)
         warning("inconsistent residual degrees of freedom. -- please report!")
     p1 <- 1:p
     r <- z$residuals
     f <- z$fitted
     w <- z$weights
     if (is.null(w)) {
         mss <- if (attr(z$terms, "intercept"))
             sum((f - mean(f))^2)
         else sum(f^2)
         rss <- sum(r^2)
     }
     else {
         mss <- if (attr(z$terms, "intercept")) {
             m <- sum(w * f/sum(w))
             sum(w * (f - m)^2)
         }
         else sum(w * f^2)
         rss <- sum(w * r^2)
         r <- sqrt(w) * r
     }
     resvar <- rss/rdf
     R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
     se <- sqrt(diag(R) * resvar)
     est <- z$coefficients[Qr$pivot[p1]]
     tval <- est/se
     ans <- z[c("call", "terms")]
     ans$residuals <- r
     ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval),
         rdf, lower.tail = FALSE))
     dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]],
         c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
     ans$aliased <- is.na(coef(object))
     ans$sigma <- sqrt(resvar)
     ans$df <- c(p, rdf, NCOL(Qr$qr))
     if (p != attr(z$terms, "intercept")) {
         df.int <- if (attr(z$terms, "intercept"))
             1
         else 0
         ans$r.squared <- mss/(mss + rss)
         ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n -
             df.int)/rdf)
         ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
             numdf = p - df.int, dendf = rdf)
     }
     else ans$r.squared <- ans$adj.r.squared <- 0
     ans$cov.unscaled <- R
     dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,
         1)]
     if (correlation) {
         ans$correlation <- (R * resvar)/outer(se, se)
         dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
         ans$symbolic.cor <- symbolic.cor
     }
     class(ans) <- "summary.lm"
     ans
}
<environment: namespace:base>