[R] Problem to remove loops in a routine
Sébastien
pomchip at free.fr
Wed Aug 1 23:57:07 CEST 2007
Dear R-users,
I have written the following code to generate some trellis plots. It
works perfectly fine except that it is quite slow when it is apply to my
typical datasets (over several thousands of lines). I believe the
problem comes from the loops I am using to subset my data.frame. I read
in the archives that the tapply function is often more efficient than a
loop in R. Unfortunately , it seems that I am not enough familiar with
the "philosophy" of this function to implement it in my code. Would you
have some suggestions to speed up the whole thing?
Thanks in advance
Sebastien
PS: the rationale behind these loops is to split the trellis plots on
different pages, all plots on a page (or a group of pages) having a
given combination of values for the PLOT, DVID, PER and GRP parameters.
###############################################
library(lattice)
rm(list=ls(all=TRUE))
# Generate a dummy dataset with
# - 20 individuals (ID)
# - individuals 1 to 10 belong to group (GRP) 1, 11 to 20 belong to group 2
# - measurements (DV) done at 10 time points (TIME) per individuals on 2
occassions (OCC)
# - modelisation of the DV versus TIME relationships with 4 different
models (MODEL)
# - predicted values (Y)
# - the PLOT column serves as a flag to plot together the models (A and
B) and (C and D)
PLOT<-rep(1:2,each=40,times=20)
ID<-rep(1:20,each=80)
OCC<-rep(1:2,each=10,times=80)
GRP<-as.numeric(rep(gl(2,80),times=10))
MODEL<-as.vector(rep(gl(4,20,label=c("A","B","C","D")),times=20))
TIME<-rep.int(1:10,160)
DV<-OCC*(1:10)*rep(rnorm(20,50,10),each=80)+rep(rnorm(20,10,1),each=80)
Y<-jitter(DV)
mydata<-data.frame(PLOT,ID,OCC,GRP,MODEL,TIME,DV,Y)
mydata$DVID<-rep.int(1,1600) #in a real dataset, DVID could have
typically 2 to 3 levels
#############
# Plotting routine
#############
myPath<-"C:/" #TO BE MODIFIED
nTrellisCol<-2 #number of columns per
Trellis plot
nTrellisRow<-3 #number of lines per
Trellis plot
nDVID<-nlevels(factor(mydata$DVID)) #number of
DVID=observations types
nidPlot<-nlevels(factor(mydata$PLOT)) #number of items in the
PLOT column
nPer<-nlevels(factor(mydata$OCC)) #number of occassions
(OCC, PER, etc...)
nGRP<-nlevels(factor(mydata$GRP)) #number of groups
pdf(file=paste(myPath,"test.pdf",sep=""))
trellis.par.set(par.main.text=list(cex=1))
trellis.par.set(par.ylab.text=list(font=2))
trellis.par.set(par.xlab.text=list(font=2))
for (i in 1:nidPlot) {
#loop on PLOT id
# i=1
idata<-subset(mydata,mydata$PLOT==i)
for (j in 1:nDVID) {
#loop on DVID
# j=1
ijdata<-subset(idata,idata$DVID==j)
for (k in 1:nPer) {
#loop on Period
# k=1
ijkdata<-subset(ijdata,ijdata$OCC==k)
for (l in 1:nGRP) { #loop on Group
# l=1
subdata<-subset(ijkdata,ijkdata$GRP==l)
nModel<-nlevels(factor(subdata$MODEL))
#number of models to be plotted in this loop
mylegend<-c("Raw data",levels(factor(subdata$MODEL)))
subID<-nlevels(factor(subdata$ID)) #number of
ID in the new dataset
myplot<-xyplot(Y ~ TIME | ID, #creates plot
data = subdata, type = "l",
groups = MODEL,
observed = subdata$DV,
as.table=TRUE,
panel = function(x, y, ..., subscripts, observed) {
panel.points(x, pch=3,col=1,observed[subscripts])
panel.xyplot(x, y, ...,
col=2:nlevels(subdata$MODEL),subscripts = subscripts)},
strip=function (which.panel,...){
col<-rep("Black",subID)
llines(c(0,1,1,0,0),c(0,0,1,1,0),col.line=1)
ltext(rep(0.5,subID),rep(0.5,subID),
paste("Subject
",levels(factor(subdata$ID))[which.panel],sep=""),cex=trellis.par.get("axis.text")[2])},
key=list(space="bottom",
lines = list(pch = as.integer(c(3,rep("",nModel))), type
= c("p", gl(1,nModel,label="l")),
col =
1:(nModel+1),"cex"=trellis.par.get("axis.text")[2]),
text=list(mylegend,
"cex"=trellis.par.get("axis.text")[2])),
xlab="Time (hr)",
ylab="Concentration (ng/mL)",
layout=c(nTrellisCol,nTrellisRow),
main=paste(paste(paste("Plot ",i,sep=""),
paste(paste(", DVID ",j,sep=""),
paste(paste(", Occasion ",k,sep=""),
paste(", Group
",l,sep="")))),sep=""))
trellis.par.set(par.xlab.text=list(cex=trellis.par.get("axis.text")[2]))
trellis.par.set(par.ylab.text=list(cex=trellis.par.get("axis.text")[2]))
print(myplot,panel.width=list(x=(0.75/nTrellisCol),units="npc"),panel.height=list(x=(0.50/nTrellisRow),units="npc"))
}}}}
dev.off()
More information about the R-help
mailing list