[R] obtaining p-values for lm.ridge() coefficients (package 'MASS')

Wed Aug 24 13:58:40 CEST 2011

The attachment seems to have been dropped, so I'm pasting the code
below. Sorry for that
Liviu

On Wed, Aug 24, 2011 at 1:44 PM, Liviu Andronic <landronimirc at gmail.com> wrote:
>> Second, penalty in ols() is not the same as the ridge constant in lm.ridge,
>> but rather a penalty on the log likelihood.  The documentation
>> for ols refers to ?lrm for the description.
>>
> I see. At the same time ?rms::ols contains: "The penalized maximum
> likelihood estimate (penalized least squares or ridge estimate) of
> beta is (X'X + P)^{-1} X'Y.", while ?rms::lrm defines P as "penalty
> \times diag(pf) \times penalty.matrix \times diag(pf), where pf is the
> vector of square roots of penalty factors computed from penalty". This
> is suspiciously similar to the definition of the "classical" Ridge
> Regression beta estimates (X'X + kI)^{-1} X'Y, where k is the lambda
> constant and I the identity matrix.
>
> Hence I was wondering if rms::ols could be forced to use 'P = kI', and
> thus compute the "classical" Ridge estimates? I tried to hack rms::ols
> to ensure that it passed 'kI' to lm.pfit(..., penalty.matrix = ...),
> but I get strange results so I must be missing something. (See
> attached.)
>

ols.ridge <-
    function (formula, data, weights, subset, na.action = na.delete,
        method = "qr", model = FALSE, x = FALSE, y = FALSE, se.fit = FALSE,
        linear.predictors = TRUE, penalty = 0, penalty.matrix, tol = 1e-07,
        sigma = NULL, var.penalty = c("simple", "sandwich"), ridge=TRUE, ...)
{
    call <- match.call()
    var.penalty <- match.arg(var.penalty)
    m <- match.call(expand = FALSE)
    mc <- match(c("formula", "data", "subset", "weights", "na.action"),
        names(m), 0)
    m <- m[c(1, mc)]
    m$na.action <- na.action
    m$drop.unused.levels <- TRUE
    m[[1]] <- as.name("model.frame")
    if(ridge) lambda <- penalty
    if (length(attr(terms(formula), "term.labels"))) {
        dul <- .Options$drop.unused.levels
        if (!length(dul) || dul) {
            on.exit(options(drop.unused.levels = dul))
            options(drop.unused.levels = FALSE)
        }
        X <- Design(eval.parent(m))
        options(drop.unused.levels = dul)
        atrx <- attributes(X)
        atr <- atrx$Design
        nact <- atrx$na.action
        Terms <- atrx$terms
        assig <- DesignAssign(atr, 1, Terms)
        penpres <- FALSE
        if (!missing(penalty) && any(unlist(penalty) != 0))
            penpres <- TRUE
        if (!missing(penalty.matrix) && any(penalty.matrix !=
            0))
            penpres <- TRUE
        if (penpres && missing(var.penalty))
            warning("default for var.penalty has changed to \"simple\"")
        if (method == "model.frame")
            return(X)
        scale <- as.character(formula[2])
        attr(Terms, "formula") <- formula
        weights <- model.extract(X, "weights")
        if (length(weights) && penpres)
            stop("may not specify penalty with weights")
        Y <- model.extract(X, "response")
        n <- length(Y)
        if (model)
            m <- X
        X <- model.matrix(Terms, X)
        if (length(atr$colnames))
            dimnames(X)[[2]] <- c("Intercept", atr$colnames)
        else dimnames(X)[[2]] <- c("Intercept", dimnames(X)[[2]][-1])
        if (method == "model.matrix")
            return(X)
    }
    else {
        if (length(weights))
            stop("weights not implemented when no covariables are present")
        assig <- NULL
        yy <- attr(terms(formula), "variables")[1]
        Y <- eval(yy, sys.parent(2))
        nmiss <- sum(is.na(Y))
        if (nmiss == 0)
            nmiss <- NULL
        else names(nmiss) <- as.character(yy)
        Y <- Y[!is.na(Y)]
        yest <- mean(Y)
        coef <- yest
        n <- length(Y)
        if (!length(sigma))
            sigma <- sqrt(sum((Y - yest)^2)/(n - 1))
        cov <- matrix(sigma * sigma/n, nrow = 1, ncol = 1, dimnames =
list("Intercept",
            "Intercept"))
        fit <- list(coefficients = coef, var = cov, non.slopes = 1,
            fail = FALSE, residuals = Y - yest, df.residual = n -
                1, intercept = TRUE)
        if (linear.predictors) {
            fit$linear.predictors <- rep(yest, n)
            names(fit$linear.predictors) <- names(Y)
        }
        if (model)
            fit$model <- m
        if (x)
            fit$x <- matrix(1, ncol = 1, nrow = n, dimnames = list(NULL,
                "Intercept"))
        if (y)
            fit$y <- Y
        fit$fitFunction <- c("ols", "lm")
        oldClass(fit) <- c("ols", "rms", "lm")
        return(fit)
    }
    if (!penpres) {
        fit <- if (length(weights))
            lm.wfit(X, Y, weights, method = method, ...)
        else lm.fit(X, Y, method = method, ...)
        cov.unscaled <- chol2inv(fit$qr$qr)
        r <- fit$residuals
        yhat <- Y - r
        if (length(weights)) {
            sse <- sum(weights * r^2)
            m <- sum(weights * yhat/sum(weights))
            ssr <- sum(weights * (yhat - m)^2)
            r2 <- ssr/(ssr + sse)
            if (!length(sigma))
                sigma <- sqrt(sse/fit$df.residual)
        }
        else {
            sse <- sum(fit$residuals^2)
            if (!length(sigma))
                sigma <- sqrt(sse/fit$df.residual)
            r2 <- 1 - sse/sum((Y - mean(Y))^2)
        }
        fit$var <- sigma * sigma * cov.unscaled
        cnam <- dimnames(X)[[2]]
        dimnames(fit$var) <- list(cnam, cnam)
        fit$stats <- c(n = n, `Model L.R.` = -n * logb(1 - r2),
            d.f. = length(fit$coef) - 1, R2 = r2, g = GiniMd(yhat),
            Sigma = sigma)
    }
    else {
        p <- length(atr$colnames)
        if (missing(penalty.matrix))
            penalty.matrix <- Penalty.matrix(atr, X)
        if (nrow(penalty.matrix) != p || ncol(penalty.matrix) !=
            p)
            stop("penalty matrix does not have", p, "rows and columns")
        psetup <- Penalty.setup(atr, penalty)
        penalty <- psetup$penalty
        multiplier <- psetup$multiplier
        if (length(multiplier) == 1)
            penalty.matrix <- multiplier * penalty.matrix
        else {
            a <- diag(sqrt(multiplier))
            penalty.matrix <- a %*% penalty.matrix %*% a
        }
        if(ridge) {
            stopifnot(length(lambda)==1)
            penalty.matrix <- lambda * diag(p)
            if (nrow(penalty.matrix) != p || ncol(penalty.matrix) !=
                p)
                stop("(ridge) penalty matrix does not have", p, "rows
and columns")
        }
        fit <- lm.pfit(X, Y, penalty.matrix = penalty.matrix,
            tol = tol, var.penalty = var.penalty)
        if(!ridge) fit$penalty <- penalty else fit$penalty <-lambda
    }
    if (model)
        fit$model <- m
    if (linear.predictors) {
        fit$linear.predictors <- Y - fit$residuals
        names(fit$linear.predictors) <- names(Y)
    }
    if (x)
        fit$x <- X
    if (y)
        fit$y <- Y
    if (se.fit) {
        se <- drop((((X %*% fit$var) * X) %*% rep(1, ncol(X)))^0.5)
        names(se) <- names(Y)
        fit$se.fit <- se
    }
    fit <- c(fit, list(call = call, terms = Terms, Design = atr,
        non.slopes = 1, na.action = nact, scale.pred = scale,
        fail = FALSE, fitFunction = c("ols", "lm")))
    fit$assign <- assig
    oldClass(fit) <- c("ols", "rms", "lm")
    fit
}