[R] Problem to remove loops in a routine

Sébastien pomchip at free.fr
Wed Aug 1 23:57:07 CEST 2007


Dear R-users,

I have written the following code to generate some trellis plots. It 
works perfectly fine except that it is quite slow when it is apply to my 
typical datasets (over several thousands of lines). I believe the 
problem comes from the loops I am using to subset my data.frame. I read 
in the archives that the tapply function is often more efficient than a 
loop in R. Unfortunately , it seems that I am not enough familiar with 
the "philosophy" of this function to implement it in my code. Would you 
have some suggestions to speed up the whole thing?

Thanks in advance

Sebastien

PS: the rationale behind these loops is to split the trellis plots on 
different pages, all plots on a page (or a group of pages) having a 
given combination of values for the PLOT, DVID, PER and GRP parameters.

###############################################

library(lattice)
rm(list=ls(all=TRUE))

# Generate a dummy dataset with
# - 20 individuals (ID)
# - individuals 1 to 10 belong to group (GRP) 1, 11 to 20 belong to group 2
# - measurements (DV) done at 10 time points (TIME) per individuals on 2 
occassions (OCC)
# - modelisation of the DV versus TIME relationships with 4 different 
models (MODEL)
# - predicted values (Y)
# - the PLOT column serves as a flag to plot together the models (A and 
B) and (C and D)

PLOT<-rep(1:2,each=40,times=20)
ID<-rep(1:20,each=80)
OCC<-rep(1:2,each=10,times=80)
GRP<-as.numeric(rep(gl(2,80),times=10))
MODEL<-as.vector(rep(gl(4,20,label=c("A","B","C","D")),times=20))
TIME<-rep.int(1:10,160)
DV<-OCC*(1:10)*rep(rnorm(20,50,10),each=80)+rep(rnorm(20,10,1),each=80)
Y<-jitter(DV)

mydata<-data.frame(PLOT,ID,OCC,GRP,MODEL,TIME,DV,Y)

mydata$DVID<-rep.int(1,1600)   #in a real dataset, DVID could have 
typically 2 to 3 levels

#############
# Plotting routine
#############

myPath<-"C:/"                    #TO BE MODIFIED

nTrellisCol<-2                                    #number of columns per 
Trellis plot
nTrellisRow<-3                                    #number of lines per 
Trellis plot

nDVID<-nlevels(factor(mydata$DVID))                #number of 
DVID=observations types
nidPlot<-nlevels(factor(mydata$PLOT))            #number of items in the 
PLOT column
nPer<-nlevels(factor(mydata$OCC))                  #number of occassions 
(OCC, PER, etc...)
nGRP<-nlevels(factor(mydata$GRP))         #number of groups

pdf(file=paste(myPath,"test.pdf",sep=""))         

trellis.par.set(par.main.text=list(cex=1))
trellis.par.set(par.ylab.text=list(font=2))
trellis.par.set(par.xlab.text=list(font=2))

for (i in 1:nidPlot) {                                               
#loop on PLOT id
#    i=1
    idata<-subset(mydata,mydata$PLOT==i)   
   
for (j in 1:nDVID) {                                                 
#loop on DVID
#    j=1       
    ijdata<-subset(idata,idata$DVID==j)
   
        for (k in 1:nPer) {                                             
#loop on Period
#    k=1

    ijkdata<-subset(ijdata,ijdata$OCC==k)
   
         for (l in 1:nGRP) {                           #loop on Group
# l=1

  subdata<-subset(ijkdata,ijkdata$GRP==l)
 
  nModel<-nlevels(factor(subdata$MODEL))                           
#number of models to be plotted in this loop
    mylegend<-c("Raw data",levels(factor(subdata$MODEL)))
    subID<-nlevels(factor(subdata$ID))                       #number of 
ID in the new dataset

  myplot<-xyplot(Y ~ TIME | ID,                               #creates plot
            data = subdata, type = "l",
            groups = MODEL,
            observed = subdata$DV,
            as.table=TRUE,
            panel = function(x, y, ..., subscripts, observed) {
                panel.points(x, pch=3,col=1,observed[subscripts])
                panel.xyplot(x, y, ..., 
col=2:nlevels(subdata$MODEL),subscripts = subscripts)},
       
            strip=function (which.panel,...){
                col<-rep("Black",subID)
                llines(c(0,1,1,0,0),c(0,0,1,1,0),col.line=1)
                ltext(rep(0.5,subID),rep(0.5,subID),
                paste("Subject 
",levels(factor(subdata$ID))[which.panel],sep=""),cex=trellis.par.get("axis.text")[2])},
       
            key=list(space="bottom",
               lines = list(pch = as.integer(c(3,rep("",nModel))), type 
= c("p", gl(1,nModel,label="l")),
                             col = 
1:(nModel+1),"cex"=trellis.par.get("axis.text")[2]),
                     text=list(mylegend, 
"cex"=trellis.par.get("axis.text")[2])),
       
            xlab="Time (hr)",
            ylab="Concentration (ng/mL)",
            layout=c(nTrellisCol,nTrellisRow),

            main=paste(paste(paste("Plot ",i,sep=""),
                             paste(paste(", DVID ",j,sep=""),
                                   paste(paste(", Occasion ",k,sep=""),
                                         paste(", Group 
",l,sep="")))),sep=""))
                                        

  trellis.par.set(par.xlab.text=list(cex=trellis.par.get("axis.text")[2]))
    trellis.par.set(par.ylab.text=list(cex=trellis.par.get("axis.text")[2]))
               
    
print(myplot,panel.width=list(x=(0.75/nTrellisCol),units="npc"),panel.height=list(x=(0.50/nTrellisRow),units="npc"))
       
        }}}}
dev.off()



More information about the R-help mailing list