#### Martin Maechler's slight improvements over MASS' excellent lm.ridge()
####  (based on VR 7.2-16's file dated 2003-10-25
####   /usr/local/app/R/R_local/src/VR/MASS/R/lm.ridge.R
##
## file MASS/lm.ridge.q
## copyright (C) 1994-9 W. N. Venables and B. D. Ripley
##
lm.ridge <-
    function(formula, data, subset, na.action, lambda = 0, stats = FALSE,
             model = FALSE, x = FALSE, y = FALSE, contrasts = NULL, ...)
{
    m <- match.call(expand = FALSE)
    m$model <- m$x <- m$y <- m$contrasts <- m$... <- m$lambda <- m$stats <- NULL
    m[[1]] <- as.name("model.frame")
    m <- eval.parent(m)
    Terms <- attr(m, "terms")
    Y <- model.response(m)
    X <- model.matrix(Terms, m, contrasts)
    n <- nrow(X); p <- ncol(X)
    offset <- model.offset(m)
    if(!is.null(offset)) Y <- Y - offset
    if(Inter <- attr(Terms, "intercept"))
    {
        ## center the X[,] and Y variables
        Xm <- colMeans(X[, -Inter])
        Ym <- mean(Y)
        p <- p - 1
        X <- X[, -Inter] - rep(Xm, rep(n, p))
        Y <- Y - Ym
    } else Ym <- Xm <- NA
    ## and scale the X[]'s
    Xscale <- drop(rep(1/n, n) %*% X^2)^0.5
    X <- X/rep(Xscale, rep(n, p))

    ## Compute the SVD of the sphered X, i.e. its principal components:
    Xs <- svd(X)         # X = U D V
    rhs <- t(Xs$u) %*% Y #  U'Y
    d <- Xs$d
    lscoef <-  Xs$v %*% (rhs/d)  # \hat\beta_LS
    lsfit <- X %*% lscoef        # \hat Y_LS
    resid <- Y - lsfit
    s2 <- sum(resid^2)/(n - p - Inter)
    k <- length(lambda)
    ## and now solve for all k lambda's simultaneously
    ## inverse of diagonal matrix (D^2 + lambda*I)^{-1} = 1 / div {vector}:
    div <- d^2 + rep(lambda, rep(p,k))
    ## degrees of freedom = trace(Hat M.) = sum_j (d_j^2 / (d_j^2 + lambda)):
    df <- colSums(matrix(d^2/div, p))
    a <- drop(d*rhs)/div
    dim(a) <- c(p, k)
    coef <- Xs$v %*% a
    dimnames(coef) <- list(names(Xscale), format(lambda))
    if(stats) {
        HKB <- (p-2)*s2/sum(lscoef^2)
        LW <- (p-2)*s2*n/sum(lsfit^2)
        GCV <- colSums((Y - X %*% coef)^2)/(n - df)^2
    } else HKB <- LW <- GCV <- NULL
    res <- list(coef = drop(coef), scales = Xscale,
                Inter = Inter, lambda = lambda, ym = Ym, xm = Xm,
                df = df, GCV = GCV, kHKB = HKB, kLW = LW)
    class(res) <- "ridgelm"
    res
}

print.ridgelm <- function(x, ...)
{
    scaledcoef <- t(as.matrix(x$coef / x$scales))
    if(x$Inter) {
        inter <- x$ym - scaledcoef %*% x$xm
        scaledcoef <- cbind(Intercept = inter, scaledcoef)
    }
    print(drop(scaledcoef), ...)
}

select <- function(obj) UseMethod("select")

select.ridgelm <- function(obj)
{
    cat("modified HKB estimator is", format(obj$kHKB), "\n")
    cat("modified L-W estimator is", format(obj$kLW), "\n")
    GCV <- obj$GCV
    if(length(GCV) > 0) {
        k <- seq(along = GCV)[GCV == min(GCV)]
        cat("smallest value of GCV  at",
            format(obj$lambda[k]), "\n")
    }
}

plot.ridgelm <- function(x, ...)
    matplot(x$lambda, t(x$coef), type = "l")
## a bit nicer and more flexible:
plot.ridgelm <-
    function(x, type = "l", xlim = range(lambda), xlab = expression(lambda),
             ylab = expression(beta[j]), main = NULL,
             do.stats = !is.null(x$GCV), col.stats = "light blue", ...)
{
    ## Purpose: better than simple one in "MASS" package
    ## ----------------------------------------------------------------------
    ## Arguments:
    ## ----------------------------------------------------------------------
    ## Author: Martin Maechler, Date: 16 Jun 2005, 11:45
    op <- par(mar = par("mar") + c(0,0,0,3))
    on.exit(par(op))
    lambda <- x$lambda
    coef <- x$coef
    matplot(lambda, t(coef), type = type, xlim = xlim,
            xlab = xlab, ylab = ylab, main = main, ...)
    abline(h=0, col = "gray")
    if(do.stats) {
        l.est <- c(lGCV = lambda[which.min(x$GCV)], x$kHKB, x$kLW)
        abline(v = l.est, lty = 3, col = col.stats)
        axis(3, at=l.est, labels = c("GCV", "kHKB", "kLW"),
             col= col.stats, mgp = c(3, 0.5,0))
    }
    i.last <- max(which(lambda <= xlim[2]))
    axis(4, at = coef[, i.last], labels = rownames(coef), las = 2)
    invisible()
}