[R] fastest R platform
Jason Liao
jg_liao at yahoo.com
Mon Apr 9 04:46:14 CEST 2001
Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:
Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508.
I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.
My program follows in case someone can help me improve it.
Thanks.
Jason
# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.
rm(list=ls(all=TRUE))
###small and common functions##################################
glmm.sample.y <- function(x, z, b, D.u)
{
pp <- matrix(0, nrow = m, ncol = n);
D.u.half <- t( chol(D.u) );
zero <- numeric(n.random);
for(i in 1:m)
{
obj <- rmulti.norm(zero, D.u.half);
pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
}
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
dim(y) <- c(m, n);
ifelse(y < pp, 1, 0); #returning vector
}
################
rmulti.norm <- function(mean, var.half)
{
ran <- rnorm( length(mean) );
log.prob <- -sum(ran*ran)/2;
ran <- mean + as.vector( var.half %*% ran );
return(ran, log.prob);
}
#################
my.outer <- function(x) #this is much faster than R's outer()
function
{
dim(x) <- c(length(x), 1);
x %*% t(x);
}
quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
#######################################################################
REML.b.D.u <- function(b, D.u, y, x, z)
{
TT <- matrix(0, n.random, n.random);
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
mle of b
TT <- TT*(1-1/i) + obj$uu.diff/i;
D.u <- D.u + TT;
print(i); print(date());
print("inside REML");
print(D.u);print("TT"); print(TT);
}
return(b, D.u);
}
mle.b <- function(b, D.u, y, x, z) #b here is the initial value
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
if(i==1) uu.first <- obj$uu;
}
uu.diff <- uu.first - obj$uu;
return(b, uu=obj$uu, uu.diff);
}
mle.b.D.u <- function(b, D.u, y, x, z)
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
}
return(b, D.u);
}
sample.u <- function(b, D.u, x, y, z)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
x[i, ,], y[i,], z[i, ,]);
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
}
uu <- uu/m;
return(uu, score, Hessian);
}
sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only
{
func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
maximized
obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )
mean <- obj$par;
var.half <- solve(-obj$hessian)*1.2;
var.half <- t( chol(var.half) );
n.simu <- 1000 #sample size of importance sampling
sum.weight <- 0;
uu <- matrix(0, n.random, n.random);
pi <- numeric(n);
product <- numeric(n);
func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
for(i in 1:n.simu)
{
obj1 <- rmulti.norm(mean, var.half);
obj2 <- func(obj1$ran);
weight <- exp(obj2$log.likelihood - obj1$log.prob);
uu <- uu + my.outer(obj1$ran) * weight;
pi <- pi + obj2$pi * weight;
product <- product + obj2$pi*(1-obj2$pi) * weight;
sum.weight <- sum.weight + weight;
}
uu <- uu/sum.weight;
pi <- pi/sum.weight;
product <- product/sum.weight;
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% diag(product) %*% x;
return(uu, score, Hessian);
}
one.log.like <- function(D.u.inv, ada.part, y, z)
{
function(u)
{
pp <- exp( ada.part + as.vector( z %*% u ) );
pp <- pp/(1+pp);
-quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );
}
}
one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
{
function(u)
{
pi <- exp( ada.part + as.vector( z %*% u ) );
pi <- pi/(1 + pi);
log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) );
return(pi, log.likelihood);
}
}
main <- function()
{
b <- c(.5 , 1, -1, -.5);
if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
D.u <- c(.5, 0, 0, .25);
D.u <- matrix(D.u, nrow=2);
x <- array(0, c(m, n, n.fixed) );
x[, , 1] <- 1;
for(j in 1:n) x[, j, 2] <- j-5;
x[1:trunc(m/2), , 3] <- 0;
x[trunc(m/2+1):m, , 3] <- 1;
x[, , 4] <- x[, , 2]*x[, , 3];
if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
z <- x[, , 1:2];
for(i in 1:50)
{
y <- glmm.sample.y(x, z, b, D.u);
obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u);
write(obj$D.u, file = "simu1.dat", append=T);
obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);
write(obj$D.u, file = "simu2.dat", append=T);
}
}
###################################################
#global variable m, n, n.fixed, n.random
n <- 10;
m <- 25;
n.fixed <- 4;
n.random <- 2;
main();
print(date());
=====
Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126
http://www.geocities.com/jg_liao/index.html
__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail.
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
More information about the R-help
mailing list