[R] Efficient coding

Daniele Amberti daniele.amberti at ors.it
Tue Aug 4 16:07:32 CEST 2009


I have a piece of code as the one at the bottom, unfortunately since it involves time series from a db it's not easy to give to mailing list a working script.
It becomes very slow after few hundred iterations over variable sp (must process several thousands).
The Rprof() indicates that the problem is the use of gc(). Can someone indicate what I have to take care of not to call gc() so often?

Thanks in advance


-----
toProcess <- unique(tsr$cSpillingPointID)
first <- 1
for(sp in toProcess)
{
  tsrSUB <- tsr[tsr$cSpillingPointID == sp,]
  dmnd <- tsrSUB[is.na(tsrSUB$cDeviceClass),]
  if(nrow(dmnd) != 1)
    warning(paste("cSpillingPointID", sp, "has more than one demand time Series. Used first."))

  tsrValCount <- getTimeSeries(chn, tsrSUB[!is.na(tsrSUB$cDeviceClass),]$cTimeSeriesID, nDaysBack = 366*nyears)
  tsrValDmnd  <- getTimeSeries(chn, dmnd$cTimeSeriesID[1], nDaysBack = 366*nyears)

  sequence <- timeSequence(from = format(start(tsrValDmnd), '%Y-%m-%d'), to = format(end(tsrValDmnd), '%Y-%m-%d'), by = 'day')
  if(format(start(tsrValDmnd), '%H') != '00')
    tsrValDmnd <- window(tsrValDmnd, sequence[3], end(tsrValDmnd))
  if(format(end(tsrValDmnd), '%H') != '23')
    tsrValDmnd <- window(tsrValDmnd, start(tsrValDmnd), sequence[length(sequence)-2])

  sequence <- sequence - 3600
  tsrValDmnd <- aggregate(tsrValDmnd, by = sequence, sum)
  sequence <- seriesPositions(tsrValDmnd)
  newPositions(tsrValDmnd) <- sequence - 23*3600
  #head(tsrValDmnd2)
  #sum(head(tsrValDmnd, 24))
  tsrValSub <- cbind(tsrValDmnd, tsrValCount)
  tsrValSub <- na.omit(tsrValSub)
  head(tsrValSub)

  if(nrow(tsrValSub) > 1)
  {
  dif <- na.omit(tsrValSub[,-1] -lag(tsrValSub[,-1],1))
  head(dif)

  costant <- is.costant(dif)
  if(any(costant == FALSE))
  {
    a <- (dif != 0) *1:nrow(dif)
    a <- seriesPositions(dif[abs(a[a!=0]),])
    from <- min(a) - ndays *24*60*60
    to <- max(a) + ndays *24*60*60
    tsrValSub <- window(tsrValSub, from, to)
  }  else
  {
    if(nrow(tsrValSub) > ndays) tsrValSub <- sample(tsrValSub, ndays)
  }

  if(nrow(tsrValSub) > 1)
  {
    row.names(tsrSUB) <- tsrSUB$cTimeSeriesID
    if(any(is.na(tsrSUB$cDeviceClass)))
      tsrSUB[is.na(tsrSUB$cDeviceClass),]$cDeviceClass <- 'DMND'

    tsrValSub at units <- tsrSUB[tsrValSub at units,]$cDeviceClass

    if(first == 1)
      dat <- data.frame(cSpillingPointID = sp,
                        year = format(seriesPositions(tsrValSub), '%Y'),
                        month = format(seriesPositions(tsrValSub), '%m'),
                        day = format(seriesPositions(tsrValSub), '%d'),
                        tsrValSub,
                        stringsAsFactors = F)
    else
      dat <- merge(dat,
                   data.frame(cSpillingPointID = sp,
                              year = format(seriesPositions(tsrValSub), '%Y'),
                              month = format(seriesPositions(tsrValSub), '%m'),
                              day = format(seriesPositions(tsrValSub), '%d'),
                              tsrValSub,
                              stringsAsFactors = F),
                   all.x = T, all.y = T)

    cat(paste('Added', nrow(tsrValSub), 'rows.\n'))
    cat(paste('...', round(first/length(toProcess)*100, 2), '%...\n'))
  }
  }
  else cat(paste('...', round(first/length(toProcess)*100, 2), '%...\n'))

  if(any(first %in% seq(100, length(toProcess), 250) )) gc()
  first <- first +1
}
----

ORS Srl

Via Agostino Morando 1/3 12060 Roddi (Cn) - Italy
Tel. +39 0173 620211
Fax. +39 0173 620299 / +39 0173 433111
Web Site www.ors.it

------------------------------------------------------------------------------------------------------------------------
Qualsiasi utilizzo non autorizzato del presente messaggio e dei suoi allegati ? vietato e potrebbe costituire reato.
Se lei avesse ricevuto erroneamente questo messaggio, Le saremmo grati se provvedesse alla distruzione dello stesso
e degli eventuali allegati.
Opinioni, conclusioni o altre informazioni riportate nella e-mail, che non siano relative alle attivit? e/o
alla missione aziendale di O.R.S. Srl si intendono non  attribuibili alla societ? stessa, n? la impegnano in alcun modo.




More information about the R-help mailing list