[R] How can I make my functions run faster
Laz
lmramba at ufl.edu
Mon Aug 19 11:28:32 CEST 2013
Dear R users,
I have written a couple of R functions, some are through the help of the
R group members. However, running them takes days instead of minutes or
a few hours. I am wondering whether there is a quick way of doing that.
Here are all my R functions. The last one calls almost all of the
previous functions. It is the one I am interested in most. It gives me
the correct output but it takes several days to run only 1000 or 2000
simulations!
e.g. system.time(test1<-finalF(designs=5,swaps=20));test1
will take about 20 minutes to run but
system.time(test1<-finalF(designs=5,swaps=50));test1 takes about 10
hours and system.time(test1<-finalF(designs=25,swaps=2000));test1 takes
about 3 days to run
Here are my functions
#####################################################################
ls() # list all existing objects
rm(list = ls()) # remove them all
rm(list = ls()[!grepl("global.var.A", ls())])
# refresh memory
gc()
ls()
### Define a function that requires useful input from the user
#b=4;g=seq(1,20,1);rb=5;cb=4;s2e=1; r=10;c=8
#####################################
####################################
# function to calculate heritability
herit<-function(varG,varR=1)
{
h<-4*varG/(varG+varR)
return(c(heritability=h))
}
###################################
# function to calculate random error
varR<-function(varG,h2)
{
varR<- varG*(4-h2)/h2
return(c(random_error=varR))
}
##########################################
# function to calculate treatment variance
varG<-function(varR=1,h2)
{
varG<-varR*h2/(4-h2)
return(c(treatment_variance=varG))
}
###############################
# calculating R inverse from spatial data
rspat<-function(rhox=0.6,rhoy=0.6)
{
s2e<-1
R<-s2e*eye(N)
for(i in 1:N) {
for (j in i:N){
y1<-y[i]
y2<-y[j]
x1<-x[i]
x2<-x[j]
R[i,j]<-s2e*(rhox^abs(x2-x1))*(rhoy^abs(y2-y1)) # Core AR(1)*AR(1)
R[j,i]<-R[i,j]
}
}
IR<-solve(R)
IR
}
ped<<-read.table("ped2new.txt",header=FALSE)
# Now work on the pedigree
## A function to return Zinverse from pedigree
ZGped<-function(ped)
{
ped2<-data.frame(ped)
lenp2<-length(unique(ped2$V1));lenp2 # how many Genotypes in total in
the pedigree =40
ln2<-length(g);ln2#ln2=nrow(matdf)=30
# calculate the new Z
Zped<-model.matrix(~ matdf$genotypes -1)# has order N*t = 180 by 30
dif<-(lenp2-ln2);dif # 40-30=10
#print(c(lenp2,ln2,dif))
zeromatrix<-zeros(nrow(matdf),dif);zeromatrix # 180 by 10
Z<-cbind(zeromatrix,Zped) # Design Matrix for random effect
(Genotypes): 180 by 40
# calculate the new G
M<-matrix(0,lenp2,lenp2) # 40 by 40
for (i in 1:nrow(ped2)) { M[ped2[i, 1], ped2[i, 2]] <- ped2[i, 3] }
G<-s2g*M # Genetic Variance covariance matrix for pedigree 2: 40 by 40
IG<-solve(G)
return(list(IG=IG, Z=Z))
}
##########################
## Required packages #
############################
library(gmp)
library(knitr) # load this packages for publishing results
library(matlab)
library(Matrix)
library(psych)
library(foreach)
library(epicalc)
library(ggplot2)
library(xtable)
library(gdata)
library(gplots)
#b=6;g=seq(1,30,1);rb=5;cb=6;r=15;c=12;h2=0.3;rhox=0.6;rhoy=0.6;ped=0
setup<-function(b,g,rb,cb,r,c,h2,rhox=0.6,rhoy=0.6,ped="F")
{
# where
# b = number of blocks
# t = number of treatments per block
# rb = number of rows per block
# cb = number of columns per block
# s2g = variance within genotypes
# h2 = heritability
# r = total number of rows for the layout
# c = total number of columns for the layout
### Check points
if(b==" ")
stop(paste(sQuote("block")," cannot be missing"))
if(!is.vector(g) | length(g)<3)
stop(paste(sQuote("treatments")," should be a vector and more
than 2"))
if(!is.numeric(b))
stop(paste(sQuote("block"),"is not of class", sQuote("numeric")))
if(length(b)>1)
stop(paste(sQuote("block"),"has to be only 1 numeric value"))
if(!is.whole(b))
stop(paste(sQuote("block"),"has to be an", sQuote("integer")))
## Compatibility checks
if(rb*cb !=length(g))
stop(paste(sQuote("rb x cb")," should be equal to number of
treatment", sQuote("g")))
if(length(g) != rb*cb)
stop(paste(sQuote("the number of treatments"), "is not equal to",
sQuote("rb*cb")))
## Generate the design
g<<-g
genotypes<-times(b) %do% sample(g,length(g))
#genotypes<-rep(g,b)
block<-rep(1:b,each=length(g))
genotypes<-factor(genotypes)
block<-factor(block)
### generate the base design
k<-c/cb # number of blocks on the x-axis
x<<-rep(rep(1:r,each=cb),k) # X-coordinate
#w<-rb
l<-cb
p<-r/rb
m<-l+1
d<-l*b/p
y<<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate
## compact
matdf<<-data.frame(x,y,block,genotypes)
N<<-nrow(matdf)
mm<-summ(matdf)
ss<-des(matdf)
## Identity matrices
X<<-model.matrix(~block-1)
h2<<-h2;rhox<<-rhox;rhoy<<-rhoy
s2g<<-varG(varR=1,h2)
## calculate G and Z
ifelse(ped == "F",
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~matdf$genotypes-1)),
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
## calculate R and IR
s2e<-1
ifelse(rhox==0 | rhoy==0, IR<<-(1/s2e)*eye(N),
IR<<-rspat(rhox=rhox,rhoy=rhoy))
C11<-t(X)%*%IR%*%X
C11inv<-solve(C11)
K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
return(list(matdf=matdf,summary=mm,description=ss))
}
#setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
#system.time(out3<-setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out3
#system.time(out4<-setup(b=16,g=seq(1,196,1),rb=14,cb=14,r=56,c=56,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out4
####################################################
# The function below uses shortcuts from textbook by Harville 1997
# uses inverse of a partitioned matrix technique
####################################################
mainF<-function(criteria=c("A","D"))
{
### Variance covariance matrices
temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
C22<-solve(temp)
##########################
## Optimality Criteria
#########################
traceI<<-sum(diag(C22)) ## A-Optimality
doptimI<<-log(det(C22)) # D-Optimality: minimize the det of the
inverse of Inform Matrix
#return(c(traceI,doptimI))
if(criteria=="A") return(traceI)
if(criteria=="D") return(doptimI)
else{return(c(traceI,doptimI))}
}
# system.time(res1<-mainF(criteria="A"));res1
# system.time(res2<-mainF(criteria="D"));res2
#system.time(res3<-mainF(criteria="both"));res3
##############################################
### Swap function that takes matdf and returns
## global values newnatdf and design matrices
### Z and IG
##############################################
swapsimple<-function(matdf,ped="F")
{
# dataset D =mat1 generated from the above function
## now, new design after swapping is
matdf<-as.data.frame(matdf)
attach(matdf,warn.conflict=FALSE)
b1<-sample(matdf$block,1,replace=TRUE);b1
gg1<-matdf$genotypes[block==b1];gg1
g1<-sample(gg1,2);g1
samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
dimnames=list(NULL,c("gen1","gen2","block")));samp
newGen<-matdf$genotypes
newG<-ifelse(matdf$genotypes==samp[,1] &
block==samp[,3],samp[,2],matdf$genotypes)
NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
NewG<-factor(NewG)
## now, new design after swapping is
newmatdf<-cbind(matdf,NewG)
newmatdf<<-as.data.frame(newmatdf)
mm<-summ(newmatdf)
ss<-des(newmatdf)
## Identity matrices
ifelse(ped == "F",
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)),
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
## calculate R and IR
C11<-t(X)%*%IR%*%X
C11inv<-solve(C11)
K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
return(list(newmatdf=newmatdf,summary=mm,description=ss))
}
#swapsimple(matdf,ped="F")[c(2,3)]
#which(newmatdf$genotypes != newmatdf$NewG)
###########################################
# for one design, swap pairs of treatments
# several times and store the traces
# of the successive swaps
##########################################
optmF<-function(iterations=2,verbose=FALSE)
{
trace<-c()
for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
swapsimple(matdf,ped="F")
trace[k]<-mainF(criteria="A")
iterations[k]<-k
mat<-cbind(trace, iterations= seq(iterations))
}
if (verbose){
cat("***starting matrix\n")
print(mat)
}
# iterate till done
while(nrow(mat) > 1){
high <- diff(mat[, 'trace']) > 0
if (!any(high)) break # done
# find which one to delete
delete <- which.max(high) + 1L
#mat <- mat[-delete, ]
mat <- mat[-delete,, drop=FALSE]
}
mat
}
#system.time(test1<-optmF(iterations=10));test1
################################################
###############################################
swap<-function(matdf,ped="F",criteria=c("A","D"))
{
# dataset D =mat1 generated from the above function
## now, new design after swapping is
matdf<-as.data.frame(matdf)
attach(matdf,warn.conflict=FALSE)
b1<-sample(matdf$block,1,replace=TRUE);b1
gg1<-matdf$genotypes[block==b1];gg1
g1<-sample(gg1,2);g1
samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
dimnames=list(NULL,c("gen1","gen2","block")));samp
newGen<-matdf$genotypes
newG<-ifelse(matdf$genotypes==samp[,1] &
block==samp[,3],samp[,2],matdf$genotypes)
NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
NewG<-factor(NewG)
## now, new design after swapping is
newmatdf<-cbind(matdf,NewG)
newmatdf<<-as.data.frame(newmatdf)
mm<-summ(newmatdf)
ss<-des(newmatdf)
## Identity matrices
#X<<-model.matrix(~block-1)
#s2g<<-varG(varR=1,h2)
## calculate G and Z
ifelse(ped == "F",
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)),
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
## calculate R and IR
C11<-t(X)%*%IR%*%X
C11inv<-solve(C11)
K<-IR%*%X%*%C11inv%*%t(X)%*%IR
temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
C22<-solve(temp)
##########################
## Optimality Criteria
#########################
traceI<-sum(diag(C22)) ## A-Optimality
doptimI<-log(det(C22)) #
#return(c(traceI,doptimI))
if(criteria=="A") return(traceI)
if(criteria=="D") return(doptimI)
else{return(c(traceI,doptimI))}
}
#swap(matdf,ped="F",criteria="both")
###########################################
### Generate 25 initial designs
###########################################
#rspatf<-function(design){
# arr = array(1, dim=c(nrow(matdf),ncol(matdf)+1,design))
# l<-list(length=dim(arr)[3])
# for (i in 1:dim(arr)[3]){
# l[[i]]<-swapsimple(matdf,ped="F")[[1]][,,i]
# }
# l
#}
#matd<-rspatf(design=5)
#matd
#which(matd[[1]]$genotypes != matd[[1]]$NewG)
#which(matd[[2]]$genotypes != matd[[2]]$NewG)
###############################################
###############################################
optm<-function(iterations=2,verbose=FALSE)
{
trace<-c()
for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
trace[k]<-swap(matdf,ped="F",criteria="A")
iterations[k]<-k
mat<-cbind(trace, iterations= seq(iterations))
}
if (verbose){
cat("***starting matrix\n")
print(mat)
}
# iterate till done
while(nrow(mat) > 1){
high <- diff(mat[, 'trace']) > 0
if (!any(high)) break # done
# find which one to delete
delete <- which.max(high) + 1L
#mat <- mat[-delete, ]
mat <- mat[-delete,, drop=FALSE]
}
mat
}
#system.time(res<-optm(iterations=10));res
#################################################
################################################
finalF<-function(designs,swaps)
{
Nmatdf<-list()
OP<-list()
Miny<-NULL
Maxy<-NULL
Minx<-NULL
Maxx<-NULL
for (i in 1:designs)
{
setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
mainF(criteria="A")
for (j in 1:swaps)
{
OP[[i]]<- optmF(iterations=swaps)
Nmatdf[[i]]<-newmatdf[,5]
Miny[i]<-min(OP[[i]][,1])
Maxy[i]<-max(OP[[i]][,1])
Minx[i]<-min(OP[[i]][,2])
Maxx[i]<-max(OP[[i]][,2])
}
}
return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) #
gives us both the Optimal conditions and designs
}
#################################################
sink(file= paste(format(Sys.time(),
"Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
system.time(test1<-finalF(designs=25,swaps=2000));test1
sink()
I expect results like this below
>sink()
>finalF<-function(designs,swaps)
+{
+ Nmatdf<-list()
+ OP<-list()
+ Miny<-NULL
+ Maxy<-NULL
+ Minx<-NULL
+ Maxx<-NULL
+ for (i in 1:designs)
+ {
+ setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
+ mainF(criteria="A")
+ for (j in 1:swaps)
+ {
+ OP[[i]]<- optmF(iterations=swaps)
+ Nmatdf[[i]]<-newmatdf[,5]
+ Miny[i]<-min(OP[[i]][,1])
+ Maxy[i]<-max(OP[[i]][,1])
+ Minx[i]<-min(OP[[i]][,2])
+ Maxx[i]<-max(OP[[i]][,2])
+ }
+ }
+ return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) # gives us both the Optimal conditions and designs
+}
>sink(file= paste(format(Sys.time(), "Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
>system.time(test1<-finalF(designs=5,swaps=5));test1
user system elapsed
37.88 0.00 38.04
$OP
$OP[[1]]
trace iterations
[1,] 0.8961335 1
[2,] 0.8952822 3
[3,] 0.8934649 4
$OP[[2]]
trace iterations
[1,] 0.893955 1
$OP[[3]]
trace iterations
[1,] 0.9007225 1
[2,] 0.8971837 4
[3,] 0.8902474 5
$OP[[4]]
trace iterations
[1,] 0.8964726 1
[2,] 0.8951722 4
$OP[[5]]
trace iterations
[1,] 0.8973285 1
[2,] 0.8922594 4
$Miny
[1] 0.8934649 0.8939550 0.8902474 0.8951722 0.8922594
$Maxy
[1] 0.8961335 0.8939550 0.9007225 0.8964726 0.8973285
$Minx
[1] 1 1 1 1 1
$Maxx
[1] 4 1 5 4 4
$Nmatdf
$Nmatdf[[1]]
[1] 30 8 5 28 27 29 1 26 24 22 13 6 17 18 2 19 14 11 3 23 10 15 21 9 25 4 7 20 12 16 14 17 15 5 8 6 19
[38] 4 1 10 11 3 24 20 13 2 27 12 16 28 21 23 30 25 29 7 26 18 9 22 24 21 26 2 13 30 5 28 20 11 3 7 18 25
[75] 22 16 4 17 19 27 29 10 23 6 12 15 14 1 9 8 12 11 3 8 5 20 23 22 7 15 19 29 24 27 13 2 6 1 21 26 25
[112] 10 16 14 18 4 30 17 9 28 29 9 7 27 11 2 30 18 8 14 19 20 15 21 4 3 16 24 13 28 26 10 12 6 5 25 1 17
[149] 23 22 21 2 23 16 4 10 9 22 30 24 1 27 3 20 12 5 26 17 28 11 7 14 8 25 19 13 18 29 15 6
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
$Nmatdf[[2]]
[1] 5 13 30 2 21 23 6 27 16 19 8 26 18 4 20 9 22 28 7 3 15 10 11 17 25 24 29 1 14 12 28 18 23 19 21 16 17
[38] 29 13 7 15 27 25 22 10 1 2 5 30 9 20 3 14 24 26 4 6 12 11 8 8 18 25 12 5 23 21 4 9 17 20 1 2 6
[75] 22 7 16 26 30 29 3 15 19 14 13 11 24 28 27 10 16 21 26 23 25 4 9 24 15 14 22 1 20 27 2 7 17 18 13 8 12
[112] 5 6 19 28 3 10 30 11 29 11 30 14 9 26 5 1 10 29 28 4 18 8 24 20 13 3 23 27 6 15 16 21 2 17 7 25 12
[149] 19 22 7 28 8 11 26 24 12 29 9 16 21 27 22 23 18 19 13 6 15 3 1 30 2 17 14 5 25 20 4 10
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
$Nmatdf[[3]]
[1] 7 25 4 30 12 11 14 13 26 1 10 21 15 22 29 19 27 16 2 24 28 20 3 5 23 8 18 6 17 9 6 21 9 15 11 17 13
[38] 29 24 4 20 7 23 14 2 16 18 26 19 25 8 1 12 10 28 27 22 30 5 3 20 12 8 2 11 18 24 19 9 22 15 7 30 27
[75] 17 29 6 3 5 1 21 25 28 14 23 4 16 26 13 10 20 29 26 25 15 22 9 10 28 17 18 21 6 16 7 1 3 24 11 2 4
[112] 14 8 5 13 27 23 30 19 12 6 30 1 2 7 28 18 8 20 10 4 25 14 19 27 11 13 29 12 9 3 26 22 21 16 15 17 24
[149] 5 23 17 6 25 11 21 29 5 26 13 7 15 2 9 4 18 30 3 8 20 24 27 22 19 16 28 12 1 23 14 10
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
$Nmatdf[[4]]
[1] 24 8 17 30 10 20 4 28 25 16 14 13 7 12 26 29 21 19 1 22 11 6 23 18 15 5 27 2 3 9 1 24 27 15 26 14 28
[38] 20 8 5 4 29 2 25 9 13 6 21 7 22 30 17 3 10 12 19 11 18 16 23 25 18 3 29 1 4 8 6 9 30 2 14 11 16
[75] 23 13 10 12 7 19 17 5 21 28 24 20 15 27 26 22 14 5 7 6 17 3 1 29 25 23 19 11 21 18 4 30 20 8 2 12 9
[112] 16 10 15 27 26 13 24 28 22 19 7 17 1 12 8 18 16 14 22 3 28 27 25 10 6 4 15 30 9 11 5 20 26 24 29 21 2
[149] 23 13 2 16 10 25 18 15 26 22 12 19 30 17 23 8 3 7 20 14 13 28 9 21 11 29 6 5 4 24 27 1
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
$Nmatdf[[5]]
[1] 12 18 8 22 9 21 2 1 29 13 30 25 17 6 16 5 26 7 3 14 23 15 28 27 10 24 20 11 19 4 20 30 14 27 25 4 6
[38] 28 23 8 9 29 26 19 24 7 5 1 11 22 21 2 10 18 12 15 3 17 13 16 16 22 6 9 21 5 14 2 30 10 3 25 27 15
[75] 28 7 17 20 11 8 19 29 12 26 24 13 1 4 18 23 4 16 10 25 5 13 18 19 22 7 28 30 23 21 11 2 14 9 20 24 8
[112] 17 1 15 29 6 12 27 3 26 14 8 26 6 20 9 15 23 3 22 7 30 25 24 1 10 19 21 4 11 2 18 17 13 28 29 27 16
[149] 12 5 19 2 4 5 15 21 17 7 25 8 6 16 20 29 10 18 1 12 26 28 27 11 14 23 22 9 3 13 30 24
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
More information about the R-help
mailing list