[R] looping over lapply calls

Ramon Diaz-Uriarte rdiazuri at students.wisc.edu
Sat Jun 3 15:39:37 CEST 2000


Dear All,

I am writing a function to analyze simulated data. For each subset of data
(those with the same simulation counter), I have to fit a linear model and
output the coefficients and the F's from drop1 (marginal F tests). I have
tried three approaches: using lapply, using a for loop, and looping over
blocks, where within each block I use lapply (following the suggestion in "S
programming", pp. 156 and 174). The later is often the fastest method
(execution time can be less than half of the other methods).  I am wondering: 

a) why exactly is that the case? (Is it related to the "split" in lapply or
the "matrix(unlist(etc))" in my function)

b) is there some rule of thumb to choose the size of the block over
which to use lapply?

Thanks,

Ramon

P.S. For completeness, I include below the core of the function I am using;
comments most welcome.



lmx.0 <- function(formula,data,max.num=0,lapply.size=100){ # looping over lapply
   # remeber that sim.counter starts at 0
  if(max.num) data<-data[data$sim.counter<max.num+1,]  else max.num<-max(data$sim.counter)
  names.vars<-drop.scope(formula)
 
 terms.in.model<-names(lm(formula=formula,data=data,subset=data$sim.counter==0)[[1]])
# there must be a simpler way...

  loop.counter<-(max.num+1)%/%lapply.size
  rest.of.data<-(max.num+1)%%lapply.size

  tmp<-matrix(nrow=max.num+1,ncol=length(names.vars)+length(terms.in.model))
  i<-0
  if (loop.counter) { #only enter in the loop if needed
  for(i in 1:loop.counter){
  datai<-data[data$sim.counter<=((i*lapply.size)-1) & data$sim.counter>=((i-1)*lapply.size),]
  # obtain output of interest ---fm[[1]],my.drop(fm)--- by applying that function
  # over the subset of data within loop; as result is list, unlist and turn into a matrix
  # it is a little bit hard to read, but I am trying to minimize creating large
  # intermediate objects.

  tmp[(((i-1)*lapply.size)+1):(i*lapply.size),]<-matrix(unlist(lapply(split(datai,datai$sim.counter),
                            function(datos,formula){
                              fm<-lm(formula=formula,data=datos);
                              c(fm[[1]],my.drop(fm))},
                            formula=formula)),
                     nrow=lapply.size,byrow=T)
}}
  # here we deal with the other cases
  datai<-data[data$sim.counter>=(loop.counter*lapply.size),]
  tmp[(((i*lapply.size)+1):(max.num+1)),] <- 
       matrix(unlist(lapply(split(datai,datai$sim.counter),                     
        function(datos,formula){fm<-lm(formula=formula,data=datos); c(fm[[1]],my.drop(fm))}, formula=formula)),        
             nrow=rest.of.data,byrow=T)

  tmp
}


my.drop<- function (object) 
{
  # This is from drop1 (by BDR) after eliminating things I didn't need;
  # this function is severely crippled, so use only here
  # components are NOT named      
    x <- model.matrix(object)     iswt <-
    !is.null(wt <- object$weights)     n <- nrow(x)
    asgn <- attr(x, "assign")
    tl <- attr(object$terms, "term.labels")
    scope <- drop.scope(object)
    ndrop <- match(scope, tl)
    ns <- length(scope)
    rdf <- object$df.resid
    chisq <- deviance.lm(object)
    dfs <- numeric(ns)
    RSS <- numeric(ns)
    y <- object$residuals + predict(object)
    rank <- object$rank
    for (i in 1:ns) {
        ii <- seq(along = asgn)[asgn == ndrop[i]]
        jj <- setdiff(seq(ncol(x)), ii)        
        z <- if (iswt) 
            lm.wfit(x[, jj, drop = FALSE], y, wt)
        else lm.fit(x[, jj, drop = FALSE], y)
        dfs[i] <- z$rank
        RSS[i] <- deviance.lm(z)
    }
    dfs <- c(object$rank, dfs)
    RSS <- c(chisq, RSS)
    dfs <- (dfs[1] - dfs)[-1]
    rdf <- object$df.resid
    dev<-RSS[-1]-RSS[1]
    rms<-RSS[1]/rdf
    Fs<-(dev/dfs)/rms
    Fs
}

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list