[R] index instead of loop?

Thu Mar 8 19:40:51 CET 2012

Hello,

> Humm.... If I understand what you are saying, you are correct. I get
> 144.138 for 2009-03-20 for column C. Maybe I posted the wrong code?  If
> so,
> sorry. 

I think I have the fastest so far solution, and it checks with your
corrected,last one.

I've made just a change: to transform it into a function I renamed the
parameters
(only for use inside the function) 'zdates', without the period, 'rddata'
and 'uadata'.

'fun1' is yours, 'fun2', mine. Here it goes.

fun1 <- function(zdates, rddata, uadata){
    fix = function(x)
    {
      year = substring(x, 1, 4)
      mo = substring(x, 5, 6)
      day = substring(x, 7, 8)
      ifelse(year=="--", "--", paste(year, mo, day, sep = "-"))

    }
    rd = apply(rddata, 2, fix)
    dimnames(rd) = dimnames(rd)

    wd1 <- seq(from =as.Date(min(zdates)), to = Sys.Date(), by = "day")
    #wd1 = wd1[weekdays(wd1) == "Friday"] # uncomment to go weekly
    wd = sapply(wd1, as.character)
    mat = matrix(NA,nrow=length(wd),ncol=ncol(uadata))
    rownames(mat) = wd
    nms = as.Date(rownames(uadata))

    for(i in 1:length(wd)){
      d = as.Date(wd[i])
      diff = abs(nms - d)
      rd_row_idx = max(which(diff == min(diff)))
      rd_col_idx = which(as.Date(rd[rd_row_idx,], format="%Y-%m-%d")  < d)
      rd_col_idx_lag = which(as.Date(rd[rd_row_idx - 1,], format="%Y-%m-%d") 
< d)
      rd_col_idx_lag2 = which(as.Date(rd[rd_row_idx - 2,],
format="%Y-%m-%d")  < d)

      if(length(rd_col_idx_lag2) && (rd_row_idx - 2) > 0){
        mat[i,rd_col_idx_lag2] = uadata[rd_row_idx - 2,rd_col_idx_lag2]
      }
      if(length(rd_col_idx_lag)){
        mat[i,rd_col_idx_lag] = uadata[rd_row_idx - 1,rd_col_idx_lag]
      }
      if( length(rd_col_idx)){
        mat[i,rd_col_idx] = uadata[rd_row_idx,rd_col_idx]
      }
    }
    colnames(mat)=colnames(uadata)
    mat
}

fun2 <- function(zdates, rddata, uadata){

	fdate <- function(x, format="%Y%m%d"){
		DF <- data.frame(x)
		for(i in colnames(DF)){
			DF[, i] <- as.Date(DF[, i], format=format)
			class(DF[, i]) <- "Date"
		}
		DF
	}

	rddata <- fdate(rddata)
	wd1 <- seq(from = as.Date(zdates[1]), to = Sys.Date(), by = "day")
	nwd1 <- length(wd1)

	fin1 <- matrix(NA, nrow=length(wd1), ncol=ncol(uadata))
	nr <- nrow(rddata)
	xstart <- c(integer(nr), nwd1)
	for(j in 1:ncol(uadata)){
		x <- xstart
		for(i in 1:nr)
			x[i] <- if(!is.na(rddata[i, j]) & !is.nan(rddata[i, j]))
					which(wd1 == rddata[i, j])
					else NA
		ix <- which(!is.na(x))
		for(i in seq_len(length(ix) - 1)){
			from <- x[ ix[i] ] + 1
			to   <- x[ ix[i + 1] ]
			fin1[ from:to, j ] <- uadata[ ix[i], j ]
		}
	}
	colnames(fin1) <- colnames(uadata)
	rownames(fin1) <- as.character(wd1)
	fin1
}

t1 <- system.time(m1 <- fun1(z.dates, rd1, ua))
t2 <- system.time(m2 <- fun2(z.dates, rd1, ua))

all.equal(m1, m2)
[1] TRUE

rbind(t1, t2)
   user.self sys.self elapsed user.child sys.child
t1      1.50        0    1.50         NA        NA
t2      0.02        0    0.01         NA        NA

And the better news is that I believe it scales up without degrading
performance,
like my first did.

See if it works.

Rui Barradas

--
View this message in context: http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4457290.html
Sent from the R help mailing list archive at Nabble.com.