[R] obtaining p-values for lm.ridge() coefficients (package 'MASS')
Liviu Andronic
landronimirc at gmail.com
Wed Aug 24 13:58:40 CEST 2011
The attachment seems to have been dropped, so I'm pasting the code
below. Sorry for that
Liviu
On Wed, Aug 24, 2011 at 1:44 PM, Liviu Andronic <landronimirc at gmail.com> wrote:
>> Second, penalty in ols() is not the same as the ridge constant in lm.ridge,
>> but rather a penalty on the log likelihood. The documentation
>> for ols refers to ?lrm for the description.
>>
> I see. At the same time ?rms::ols contains: "The penalized maximum
> likelihood estimate (penalized least squares or ridge estimate) of
> beta is (X'X + P)^{-1} X'Y.", while ?rms::lrm defines P as "penalty
> \times diag(pf) \times penalty.matrix \times diag(pf), where pf is the
> vector of square roots of penalty factors computed from penalty". This
> is suspiciously similar to the definition of the "classical" Ridge
> Regression beta estimates (X'X + kI)^{-1} X'Y, where k is the lambda
> constant and I the identity matrix.
>
> Hence I was wondering if rms::ols could be forced to use 'P = kI', and
> thus compute the "classical" Ridge estimates? I tried to hack rms::ols
> to ensure that it passed 'kI' to lm.pfit(..., penalty.matrix = ...),
> but I get strange results so I must be missing something. (See
> attached.)
>
ols.ridge <-
function (formula, data, weights, subset, na.action = na.delete,
method = "qr", model = FALSE, x = FALSE, y = FALSE, se.fit = FALSE,
linear.predictors = TRUE, penalty = 0, penalty.matrix, tol = 1e-07,
sigma = NULL, var.penalty = c("simple", "sandwich"), ridge=TRUE, ...)
{
call <- match.call()
var.penalty <- match.arg(var.penalty)
m <- match.call(expand = FALSE)
mc <- match(c("formula", "data", "subset", "weights", "na.action"),
names(m), 0)
m <- m[c(1, mc)]
m$na.action <- na.action
m$drop.unused.levels <- TRUE
m[[1]] <- as.name("model.frame")
if(ridge) lambda <- penalty
if (length(attr(terms(formula), "term.labels"))) {
dul <- .Options$drop.unused.levels
if (!length(dul) || dul) {
on.exit(options(drop.unused.levels = dul))
options(drop.unused.levels = FALSE)
}
X <- Design(eval.parent(m))
options(drop.unused.levels = dul)
atrx <- attributes(X)
atr <- atrx$Design
nact <- atrx$na.action
Terms <- atrx$terms
assig <- DesignAssign(atr, 1, Terms)
penpres <- FALSE
if (!missing(penalty) && any(unlist(penalty) != 0))
penpres <- TRUE
if (!missing(penalty.matrix) && any(penalty.matrix !=
0))
penpres <- TRUE
if (penpres && missing(var.penalty))
warning("default for var.penalty has changed to \"simple\"")
if (method == "model.frame")
return(X)
scale <- as.character(formula[2])
attr(Terms, "formula") <- formula
weights <- model.extract(X, "weights")
if (length(weights) && penpres)
stop("may not specify penalty with weights")
Y <- model.extract(X, "response")
n <- length(Y)
if (model)
m <- X
X <- model.matrix(Terms, X)
if (length(atr$colnames))
dimnames(X)[[2]] <- c("Intercept", atr$colnames)
else dimnames(X)[[2]] <- c("Intercept", dimnames(X)[[2]][-1])
if (method == "model.matrix")
return(X)
}
else {
if (length(weights))
stop("weights not implemented when no covariables are present")
assig <- NULL
yy <- attr(terms(formula), "variables")[1]
Y <- eval(yy, sys.parent(2))
nmiss <- sum(is.na(Y))
if (nmiss == 0)
nmiss <- NULL
else names(nmiss) <- as.character(yy)
Y <- Y[!is.na(Y)]
yest <- mean(Y)
coef <- yest
n <- length(Y)
if (!length(sigma))
sigma <- sqrt(sum((Y - yest)^2)/(n - 1))
cov <- matrix(sigma * sigma/n, nrow = 1, ncol = 1, dimnames =
list("Intercept",
"Intercept"))
fit <- list(coefficients = coef, var = cov, non.slopes = 1,
fail = FALSE, residuals = Y - yest, df.residual = n -
1, intercept = TRUE)
if (linear.predictors) {
fit$linear.predictors <- rep(yest, n)
names(fit$linear.predictors) <- names(Y)
}
if (model)
fit$model <- m
if (x)
fit$x <- matrix(1, ncol = 1, nrow = n, dimnames = list(NULL,
"Intercept"))
if (y)
fit$y <- Y
fit$fitFunction <- c("ols", "lm")
oldClass(fit) <- c("ols", "rms", "lm")
return(fit)
}
if (!penpres) {
fit <- if (length(weights))
lm.wfit(X, Y, weights, method = method, ...)
else lm.fit(X, Y, method = method, ...)
cov.unscaled <- chol2inv(fit$qr$qr)
r <- fit$residuals
yhat <- Y - r
if (length(weights)) {
sse <- sum(weights * r^2)
m <- sum(weights * yhat/sum(weights))
ssr <- sum(weights * (yhat - m)^2)
r2 <- ssr/(ssr + sse)
if (!length(sigma))
sigma <- sqrt(sse/fit$df.residual)
}
else {
sse <- sum(fit$residuals^2)
if (!length(sigma))
sigma <- sqrt(sse/fit$df.residual)
r2 <- 1 - sse/sum((Y - mean(Y))^2)
}
fit$var <- sigma * sigma * cov.unscaled
cnam <- dimnames(X)[[2]]
dimnames(fit$var) <- list(cnam, cnam)
fit$stats <- c(n = n, `Model L.R.` = -n * logb(1 - r2),
d.f. = length(fit$coef) - 1, R2 = r2, g = GiniMd(yhat),
Sigma = sigma)
}
else {
p <- length(atr$colnames)
if (missing(penalty.matrix))
penalty.matrix <- Penalty.matrix(atr, X)
if (nrow(penalty.matrix) != p || ncol(penalty.matrix) !=
p)
stop("penalty matrix does not have", p, "rows and columns")
psetup <- Penalty.setup(atr, penalty)
penalty <- psetup$penalty
multiplier <- psetup$multiplier
if (length(multiplier) == 1)
penalty.matrix <- multiplier * penalty.matrix
else {
a <- diag(sqrt(multiplier))
penalty.matrix <- a %*% penalty.matrix %*% a
}
if(ridge) {
stopifnot(length(lambda)==1)
penalty.matrix <- lambda * diag(p)
if (nrow(penalty.matrix) != p || ncol(penalty.matrix) !=
p)
stop("(ridge) penalty matrix does not have", p, "rows
and columns")
}
fit <- lm.pfit(X, Y, penalty.matrix = penalty.matrix,
tol = tol, var.penalty = var.penalty)
if(!ridge) fit$penalty <- penalty else fit$penalty <-lambda
}
if (model)
fit$model <- m
if (linear.predictors) {
fit$linear.predictors <- Y - fit$residuals
names(fit$linear.predictors) <- names(Y)
}
if (x)
fit$x <- X
if (y)
fit$y <- Y
if (se.fit) {
se <- drop((((X %*% fit$var) * X) %*% rep(1, ncol(X)))^0.5)
names(se) <- names(Y)
fit$se.fit <- se
}
fit <- c(fit, list(call = call, terms = Terms, Design = atr,
non.slopes = 1, na.action = nact, scale.pred = scale,
fail = FALSE, fitFunction = c("ols", "lm")))
fit$assign <- assig
oldClass(fit) <- c("ols", "rms", "lm")
fit
}
More information about the R-help
mailing list