[BioC] Increasing rlm iterations for convergence in limma's
normalizeRobustSpline
Paul Boutros
Paul.Boutros at utoronto.ca
Wed Aug 11 06:33:54 CEST 2004
Hi again,
I've come across some cDNA array data that doesn't converge when I use
limma's robust-spline normalization. I get this from:
> library(limma);
> setwd("c:/paul/dev/rat-mouse/limma/");
> targets <- readTargets();
> design <- cbind(c(1,1,1,1,-1,-1,-1,-1), c(0,0,0,0,1,1,1,1));
> colnames(design) <- c("Species", "Dye");
> read.series(targets$FileName,suffix=NULL,skip=29,sep="\t");
[1] "8 slides read"
> layout <- list(ngrid.r=12, ngrid.c=4, nspot.r=25, nspot.c=26);
> RG <- kooperberg(names=targets$FileName,layout=layout);
> RG$genes <- readGAL();
> MA <- normalizeWithinArrays(RG, layout, method="robustspline");
Warning message:
rlm failed to converge in 20 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)
To get around this I modified normalizeRobustSpline and
normalizeWithinArrays to accept a maxit parameter to pass to rlm (the code
is below). This allows:
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=20);
Warning message:
rlm failed to converge in 20 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=21);
Warning message:
rlm failed to converge in 21 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=22);
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=23);
Which indicated that 22 iterations were necessary for convergence.
To test that this didn't break anything else I also:
> MA.old <- normalizeWithinArrays(RG, layout, method="printtiploess");
> # changed functions here: removed for email-display #
> MA.new <- normalizeWithinArrays(RG, layout, method="printtiploess",
maxit=25);
> write.table(MA.old$M, 'old.txt');
> write.table(MA.new$M, 'new.txt');
And then did a file-diff on the files:
C:\Program Files\R\rw1091>comp
Name of first file to compare: old.txt
Name of second file to compare: new.txt
Option:
Comparing old.txt and new.txt...
Files compare OK
The code is below -- hopefully it's useful to other people as well.
Now, my question is: should the fact that my data took more than 20
iterations to converge be sounding alarm-bells in my head?
Paul
### BEGIN CODE ###
normalizeWithinArrays <- function (object, layout = object$printer, method =
"printtiploess",
weights = object$weights, span = 0.3, iterations = 4, controlspots =
NULL,
df = 5, robust = "M", maxit=20)
{
if (!is(object, "MAList"))
object <- MA.RG(object)
choices <- c("none", "median", "loess", "printtiploess",
"composite", "robustspline")
method <- match.arg(method, choices)
if (method == "none")
return(object)
narrays <- ncol(object$M)
if (method == "median") {
for (j in 1:narrays) object$M[, j] <- object$M[, j] -
median(object$M[, j], na.rm = TRUE)
return(object)
}
switch(method, loess = {
for (j in 1:narrays) {
y <- object$M[, j]
x <- object$A[, j]
w <- weights[, j]
object$M[, j] <- loessFit(y, x, w, span = span, iterations =
iterations)$residuals
}
}, printtiploess = {
if (is.null(layout))
stop("Layout argument not specified")
ngr <- layout$ngrid.r
ngc <- layout$ngrid.c
nspots <- layout$nspot.r * layout$nspot.c
for (j in 1:narrays) {
spots <- 1:nspots
for (gridr in 1:ngr) for (gridc in 1:ngc) {
y <- object$M[spots, j]
x <- object$A[spots, j]
w <- weights[spots, j]
object$M[spots, j] <- loessFit(y, x, w, span = span,
iterations = iterations)$residuals
spots <- spots + nspots
}
}
}, composite = {
if (is.null(layout))
stop("Layout argument not specified")
if (is.null(controlspots))
stop("controlspots argument not specified")
ntips <- layout$ngrid.r * layout$ngrid.c
nspots <- layout$nspot.r * layout$nspot.c
for (j in 1:narrays) {
y <- object$M[, j]
x <- object$A[, j]
w <- weights[, j]
f <- is.finite(y) & is.finite(x) & is.finite(w)
y[!f] <- NA
fit <- loess(y ~ x, weights = w, span = span, subset =
controlspots,
na.action = na.exclude, degree = 0, surface = "direct",
family = "symmetric", trace.hat = "approximate",
iterations = iterations)
alpha <- global <- y
global[f] <- predict(fit, newdata = x[f])
alpha[f] <- (rank(x[f]) - 1)/sum(f)
spots <- 1:nspots
for (tip in 1:ntips) {
y <- object$M[spots, j]
x <- object$A[spots, j]
w <- weights[spots, j]
local <- loessFit(y, x, w, span = span, iterations =
iterations)$fitted
object$M[spots, j] <- object$M[spots, j] - alpha[spots] *
global[spots] - (1 - alpha[spots]) * local
spots <- spots + nspots
}
}
}, robustspline = {
if (is.null(layout))
stop("Layout argument not specified")
for (j in 1:narrays) object$M[, j] <-
normalizeRobustSpline(object$M[,
j], object$A[, j], layout, df = df, method = robust,
maxit=maxit)
})
object
}
normalizeRobustSpline <- function (M, A, layout, df = 5, method = "M",
maxit=20)
{
require(MASS)
require(splines)
ngrids <- layout$ngrid.r * layout$ngrid.c
nspots <- layout$nspot.r * layout$nspot.c
O <- is.finite(M) & is.finite(A)
X <- matrix(NA, ngrids * nspots, df)
X[O, ] <- ns(A[O], df = df, intercept = TRUE)
x <- X[O, , drop = FALSE]
y <- M[O]
w <- rep(1, length(y))
s <- summary(rlm(x, y, weights = w, method = method, maxit=maxit),
method = "XtWX",
correlation = FALSE)
beta0 <- s$coefficients[, 1]
covbeta0 <- s$cov * s$stddev^2
beta <- array(1, c(ngrids, 1)) %*% array(beta0, c(1, df))
covbeta <- array(0, c(ngrids, df, df))
spots <- 1:nspots
for (i in 1:ngrids) {
o <- O[spots]
y <- M[spots][o]
if (length(y)) {
x <- X[spots, ][o, , drop = FALSE]
r <- qr(x)$rank
if (r < df)
x <- x[, 1:r, drop = FALSE]
w <- rep(1, length(y))
s <- summary(rlm(x, y, weights = w, method = method,
maxit=maxit),
method = "XtWX", correlation = FALSE)
beta[i, 1:r] <- s$coefficients[, 1]
covbeta[i, 1:r, 1:r] <- s$cov * s$stddev^2
}
spots <- spots + nspots
}
res.cov <- cov(beta) - apply(covbeta, c(2, 3), mean)
Sigma0 <- covbeta0 * max(0, sum(diag(res.cov))/sum(diag(covbeta0)))
spots <- 1:nspots
for (i in 1:ngrids) {
beta[i, ] <- beta0 + Sigma0 %*% solve(Sigma0 + covbeta[i,
, ], beta[i, ] - beta0)
o <- O[spots]
x <- X[spots, ][o, ]
M[spots][o] <- M[spots][o] - x %*% beta[i, ]
M[spots][!o] <- NA
spots <- spots + nspots
}
M
}
### END CODE ###
More information about the Bioconductor
mailing list