# [R] Speeding up "accumulation" code in large matrix calc?

Berend Hasselman bhh at xs4all.nl
Fri Feb 24 21:06:28 CET 2012

```On 24-02-2012, at 20:02, Petr Savicky wrote:

> On Fri, Feb 24, 2012 at 08:59:44AM -0800, robertfeldt wrote:
>> Hi,
>>
>> I have R code like so:
>>
>> num.columns.back.since.last.occurence <- function(m, outcome) {
>> 	nrows <- dim(m)[1];
>> 	ncols <- dim(m)[2];
>> 	res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
>> 	for(row in 1:nrows) {
>> 		for(col in 2:ncols) {
>> 			res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
>> 		}
>> 	}
>> 	res;
>> }
>>
>> but on the very large matrices I apply this the execution times are a
>> problem. I would appreciate any help to rewrite this with more
>> "standard"/native R functions to speed things up.
>
> Hi.
>
> If the number of rows is large and the number of columns is not,
> then try the following.
>
>  # random matrix
>  A <- matrix((runif(49) < 0.2) + 0, nrow=7)
>  outcome <- 1
>
>  # transformation
>  B <- array(0, dim=dim(A))
>  curr <- B[, 1]
>  for (i in seq.int(from=2, length=ncol(A)-1)) {
>      curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
>      B[, i] <- curr
>  }

I've done some speed tests.

t1 <- function(m, outcome) {
nrows <- dim(m)[1]
ncols <- dim(m)[2]
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows)
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res
}

# by row
t2 <- function(A, outcome) {
oneRow <- function(x, outcome)
{
n <- length(x)
y <- c(0, cumsum(x[-n] == outcome))
ave(x, y, FUN = function(z) seq.int(along=z) - 1)
}

t(apply(A, 1, oneRow, outcome=1))
}

# transformation
t3 <- function(A, outcome) {
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
B
}

# Compile functions to byte code
library(compiler)
t1.c <- cmpfun(t1)
t2.c <- cmpfun(t2)
t3.c <- cmpfun(t3)

Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)

z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))

Nrow <- 1000
Ncol <- 100
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)

z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))

Nrow <- 1000
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)

z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))

------------------------------------------

Summary of results

> Nrow <- 100
> Ncol <- 1000
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
> print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child
z1     0.543    0.005   0.551          0         0
z2     0.299    0.004   0.304          0         0
z3     0.070    0.012   0.082          0         0
z4     0.112    0.002   0.114          0         0
z5     0.263    0.007   0.271          0         0
z6     0.062    0.009   0.070          0         0

> Nrow <- 1000
> Ncol <- 100
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
>
> z1 <- system.time(res1 <- t1(A,outcome=1))
> z2 <- system.time(res2 <- t2(A,outcome=1))
> z3 <- system.time(res3 <- t3(A, outcome=1))
> z4 <- system.time(res4 <- t1.c(A,outcome=1))
> z5 <- system.time(res5 <- t2.c(A,outcome=1))
> z6 <- system.time(res6 <- t3.c(A, outcome=1))
user.self sys.self elapsed user.child sys.child
z1     0.543    0.005   0.551          0         0
z2     0.299    0.004   0.304          0         0
z3     0.070    0.012   0.082          0         0
z4     0.112    0.002   0.114          0         0
z5     0.263    0.007   0.271          0         0
z6     0.062    0.009   0.070          0         0

> Nrow <- 1000
> Ncol <- 1000
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
> print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child
z1     5.381    0.019   5.417          0         0
z2     2.812    0.044   2.858          0         0
z3     0.307    0.049   0.357          0         0
z4     1.176    0.015   1.191          0         0
z5     2.686    0.038   2.728          0         0
z6     0.305    0.049   0.355          0         0

<End of timing results>

The original function (t1) benefits a lot from compiling to byte code.
The function that operates on columns (t3) seems to be always the quickest in this experiment.

Berend

```