[R] My surprising experience in trying out REvolution's R

Tue Apr 21 19:32:19 CEST 2009

Jason:

Thanks for posting this ... interesting.

A guess: Depends on the problem, the hardware, the matrix libraries,...

e.g. in relatively small problems, Revolution's overhead may consume more
time and resources than the problem warrants. In others, you may see many
fold improvements. Very dangerous to generalize from an example or two (as I
recently experienced to my own chagrin).

Cheers,
Bert 

Bert Gunter
Genentech Nonclinical Biostatistics
650-467-7374

-----Original Message-----
From: r-help-bounces at r-project.org [mailto:r-help-bounces at r-project.org] On
Behalf Of Jason Liao
Sent: Tuesday, April 21, 2009 7:26 AM
To: r-help at r-project.org
Subject: [R] My surprising experience in trying out REvolution's R

I care a lot about R's speed. So I decided to give REvolution's R
(http://revolution-computing.com/) a try, which bills itself as an
optimized R. Note that I used the free version.

My machine is a Intel core 2 duo under Windows XP professional. The code
I run is in the end of this post.

First, the regular R 1.9. It takes 2 minutes and 6 seconds, CPU usage
50%

Next, REvolution's R. It takes 2 minutes and 10 seconds, CPU usage 100%.

In other words, REvolution's R consumes double the CPU with slightly
less speed.

The above has been replicated a few times (as a statistician of course).

Anyone has any insight on this? Anyway, my high hope was dashed.

  rm(list=ls(all=TRUE))

  library(MASS);

 ###small and common functions##################################

  glmm.sample.y <- function(b, D.u, x, z)

  {

    pp <- matrix(0, nrow = n, ncol = m);

    zero <- numeric(n.random);

    ran <- t( mvrnorm(m, zero, D.u) );

    for(j in 1:m) pp[, j] <- as.vector( x[, , j] %*% b + z[, , j] %*%
ran[ ,j] );

    pp <- exp(pp)/( 1+exp(pp) );

    y <- runif(m*n);

    ifelse(y<pp, 1, 0);

  }

  #################

   quadratic.form <- function(A, x) { return( as.vector(x %*% A %*% x) )
}

   quadratic.form2 <- function(A, x)

   {

      x <- chol(A) %*% x;

      colSums(x*x)

    }

 #######################################################################

  REML.b.D.u <- function(b, D.u, x, y, z, n.iter)

  {

     u.mean.initial <- array( 0, c(n.random, m) );

     TT <- matrix(0, n.random, n.random);

     for(i in 1:n.iter)

     {

        TT <- TT*(1-1/i) + bias(b, D.u, x, z)/i;

        obj <- sample.u(b, D.u, x, y, z, u.mean.initial);

        b <- b - solve(obj$Hessian, obj$score);

        D.u <- obj$uu + TT;

        u.mean.initial <- obj$u.mean.initial;

        print(i); print(date());

        print("inside REML"); print(TT);

        if(i==n.iter)  write(TT, file="c:/liao/reml/simu50_8.dat",
append=T);

        print(D.u);

     }

      list(b=b, D.u=D.u);

  }

   bias <- function(b, D.u, x, z)

   {

      yy <- glmm.sample.y(b, D.u, x, z);  #this is the sampling stage

      obj1 <- mle(b, D.u, x, yy, z, F, F, n.iter=50);

      obj2 <- mle(b, D.u, x, yy, z, T, F, n.iter=50);

      obj1$uu - obj2$uu

    }

   ##################################################

   mle <- function(b, D.u, x, y, z, indx1, indx2, n.iter)

   {

      u.mean.initial <- array( 0, c(n.random, m) );

      for(i in 1:n.iter)

      {

        obj <- sample.u(b, D.u, x, y, z, u.mean.initial);

        if(indx1) b <- b - solve(obj$Hessian, obj$score);

        if(indx2) D.u <- obj$uu;

        u.mean.initial <- obj$u.mean.initial;

      }

      list(b=b, D.u=D.u, uu=obj$uu, u.mean.initial=u.mean.initial);

     }

##############################################

   sample.u <- function(b, D.u, x, y, z, u.mean.initial)

   {

     D.u.inv <- solve(D.u);

     uu <- matrix(0, n.random, n.random);

     score <- numeric(n.fixed);

     Hessian <- matrix(0, n.fixed, n.fixed);

     for(i in 1:m)

     {

       ada.part <- as.vector(x[, , i] %*% b);

       obj <-  sample.u.one( D.u.inv, ada.part, x[, , i], y[, i], z[,
,i], u.mean.initial[, i] );

       uu <- uu + obj$uu;

       score <- score + obj$score;

       Hessian <- Hessian + obj$Hessian;

       u.mean.initial[, i] <- obj$u.mean.initial;

     }

     uu <- uu/m;

     list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=u.mean.initial);

  }

 ##########################################

   sample.u.one <- function(D.u.inv, ada.part, x, y, z, u.mean.initial)
#ada.part, x, y, z for one subject only

   {

      fn <- log.like(y, z, D.u.inv, ada.part);

      obj <- optim(u.mean.initial, fn, control=list(fnscale=-1), hessian
= T)

      u.mean.initial <- obj$par;

      var.root <- solve(-obj$hessian);

      var.root <- t( chol(var.root) );

      u <- matrix( rnorm(n.random*n.simu), nrow=n.random, ncol=n.simu );

      log.prob1 <- -colSums(u*u)/2;

      u <- u.mean.initial + var.root %*% u;

      ada <- exp( ada.part + z %*% u );

      ada <- ada/(1+ada); #it is probability now

      log.prob2 <- colSums( y*log(ada) + (1-y)*log(1-ada) );

      log.prob2 <- -quadratic.form2(D.u.inv, u)/2 + log.prob2;

      weight <- exp(log.prob2 - log.prob1);

      weight <- weight/sum(weight);

      ada <- t(ada);

      pi <- colSums(ada*weight);

      product <- colSums( ada*(1-ada)*weight );

      score <- as.vector( (y - pi) %*% x );

      Hessian <- -t(x) %*% ( rep(product, n.fixed)*x );

      obj <- cov.wt( t(u), weight, center=T );

      uu <- obj$cov + obj$center %*% t(obj$center);

      list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=obj$center);

    }

  #########################################

  log.like <- function(y, z, D.u.inv, ada.part)

  {

     function(u)

     {

        ada <- exp( ada.part + z %*% u );

        ada <- ada/(1+ada);

        sum( y*log(ada) + (1-y)*log(1-ada) ) -quadratic.form(D.u.inv,
u)/2;

     }

  }

  main <- function()

  {

    b <- c(.5, .5 , -1, -.5);

    b <- c(.5, .5 , -1, -.5, 0, 0, 0, 0);

    D.u <- diag( c(.5, .5) );

    x <- array(0, c(n, n.fixed, m) );

    x[ ,1, ] <- 1;

    for(i in 1:n) x[i, 2, ] <- (i-5)/4;

    x[, 3, 1:trunc(m/2)] <- 0;

    x[, 3, trunc(m/2+1):m] <- 1;

    x[, 4, ] <- x[, 2, ]*x[, 3, ];

    x[1, 5:n.fixed, ] <- rnorm(4*m);

    for(i in 2:n) x[i, 5:n.fixed,] <- x[1, 5:n.fixed, ];

    z <- x[, 1:2, ];

    for(i in 1:200)

    {

      print(i); print(date());

      y <- glmm.sample.y(b, D.u, x, z);

      obj <- mle(b, D.u, x, y, z, F, T, n.iter=50);

      print("estimate with b known")

      print(obj$D.u);

      obj <- mle(b, D.u, x, y, z, T, T, n.iter=50);

      print("profile MLE");

      print(obj$D.u);

      obj <- REML.b.D.u(b, D.u, x, y, z, n.iter=50);

      print("REML type estimate")

      print(obj$D.u);

    }

  }

###################################################

  n <- 10;  #number of observation for each cluster

  m <- 50;  #number of clusters

  n.fixed <- 8;

  n.random <- 2;

  n.simu <- 2000;

  main();

	[[alternative HTML version deleted]]

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.