[R] nested for loops too slow

Sun Apr 12 15:47:21 CEST 2015

The small example below works lighting-fast; however, when I run the same
script on my real problem, a 1Gb text file, the for loops have been running
for over 24 hrs and I have no idea if the processing is 10% done or 90%
done.  I have not been able to figure out a betteR way to code up the
material within the for loops at the end of the example below.  The
contents of divChng, the final product, are exactly what I'm after, but I
need help formulating more efficient R script, I've got two more 1Gb files
to process after the current one finishes, whenever that is...

I appreciate any insights/solutions, Eric

dat <- read.table(textConnection("ISEG  IRCH  div  gw
1  1  265  229
1  2  260  298
1  3  234  196
54  1  432  485
54  39  467  485
54  40  468  468
54  41  460  381
54  42  489  502
1  1  265  317
1  2  276  225
1  3  217  164
54  1  430  489
54  39  456  495
54  40  507  607
54  41  483  424
54  42  457  404
1  1  265  278
1  2  287  370
1  3  224  274
54  1  412  585
54  39  473  532
54  40  502  595
54  41  497  441
54  42  447  467
1  1  230  258
1  2  251  152
1  3  199  179
54  1  412  415
54  39  439  538
54  40  474  486
54  41  477  484
54  42  413  346
1  1  230  171
1  2  262  171
1  3  217  263
54  1  432  485
54  39  455  482
54  40  493  419
54  41  489  536
54  42  431  504
1  1  1002  1090
1  2  1222  1178
1  3  1198  1177
54  1  1432  1485
54  39  1876  1975
54  40  1565  1646
54  41  1455  1451
54  42  1427  1524
1  1  1002  968
1  2  1246  1306
1  3  1153  1158
54  1  1532  1585
54  39  1790  1889
54  40  1490  1461
54  41  1518  1536
54  42  1486  1585
1  1  1002  1081
1  2  1229  1262
1  3  1142  1241
54  1  1632  1659
54  39  1797  1730
54  40  1517  1466
54  41  1527  1589
54  42  1514  1612"),header=TRUE)

dat$seq <- ifelse(dat$ISEG==1 & dat$IRCH==1, 1, 0)
tmp <- diff(dat[dat$seq==1,]$div)!=0
dat$idx <- 0
dat[dat$seq==1,][c(TRUE,tmp),]$idx <- 1
dat$ts <- cumsum(dat$idx)
dat$iter <- ave(dat$seq, dat$ts,FUN=cumsum)
dat$ct <- seq(1:length(dat[,1]))

timeStep <- unique(dat$ts)
SEG <- unique(dat$ISEG)
divChng <- data.frame(ts=NA, ISEG=NA, divChng=NA, gwChng=NA, iter=NA)

#Can the following be rescripted for better harnessing R's processing power?

for (i in 1:length(timeStep)){
  for (j in 1:length(SEG)){
    datTS <- subset(dat,ts==timeStep[i] & ISEG==SEG[j] & IRCH==1)
    datGW <- subset(dat,ts==timeStep[i] & ISEG==SEG[j])
    grw <- aggregate(gw ~ iter, datGW, sum)

    DC <- max(datTS$div)-min(datTS$div)
    GRW <- max(grw$gw) - min(grw$gw)
    divChng <- rbind(divChng,c(datTS$ts[1], SEG[j], DC, GRW,
max(datTS$iter)))
  }
}
divChng <- divChng[!is.na(divChng$ISEG),]

	[[alternative HTML version deleted]]