[R] How can I make my functions run faster
Michael Dewey
info at aghmed.fsnet.co.uk
Mon Aug 19 12:55:47 CEST 2013
At 10:28 19/08/2013, Laz wrote:
>Dear R users,
>
>I have written a couple of R functions, some are through the help of
>the R group members. However, running them takes days instead of
>minutes or a few hours. I am wondering whether there is a quick way
>of doing that.
Your example code is rather long for humans to profile. Have you
thought of getting R to tell where it is spending most time? The R
extensions manual tells you how to do this.
>Here are all my R functions. The last one calls almost all of the
>previous functions. It is the one I am interested in most. It gives
>me the correct output but it takes several days to run only 1000 or
>2000 simulations!
>e.g. system.time(test1<-finalF(designs=5,swaps=20));test1
>will take about 20 minutes to run but
>system.time(test1<-finalF(designs=5,swaps=50));test1 takes about 10
>hours and system.time(test1<-finalF(designs=25,swaps=2000));test1
>takes about 3 days to run
>
>Here are my functions
>
>
>#####################################################################
>
>ls() # list all existing objects
>rm(list = ls()) # remove them all
>rm(list = ls()[!grepl("global.var.A", ls())])
># refresh memory
>gc()
>ls()
>
>### Define a function that requires useful input from the user
>#b=4;g=seq(1,20,1);rb=5;cb=4;s2e=1; r=10;c=8
>
>#####################################
>####################################
># function to calculate heritability
>herit<-function(varG,varR=1)
>{
> h<-4*varG/(varG+varR)
> return(c(heritability=h))
>}
>
>###################################
># function to calculate random error
>varR<-function(varG,h2)
>{
> varR<- varG*(4-h2)/h2
> return(c(random_error=varR))
>}
>
>##########################################
># function to calculate treatment variance
>varG<-function(varR=1,h2)
>{
> varG<-varR*h2/(4-h2)
> return(c(treatment_variance=varG))
>}
>
>
>###############################
>
># calculating R inverse from spatial data
>rspat<-function(rhox=0.6,rhoy=0.6)
>{
> s2e<-1
> R<-s2e*eye(N)
> for(i in 1:N) {
> for (j in i:N){
> y1<-y[i]
> y2<-y[j]
> x1<-x[i]
> x2<-x[j]
> R[i,j]<-s2e*(rhox^abs(x2-x1))*(rhoy^abs(y2-y1)) # Core AR(1)*AR(1)
> R[j,i]<-R[i,j]
> }
> }
> IR<-solve(R)
> IR
>}
>
>ped<<-read.table("ped2new.txt",header=FALSE)
># Now work on the pedigree
>## A function to return Zinverse from pedigree
>
>ZGped<-function(ped)
>{
> ped2<-data.frame(ped)
> lenp2<-length(unique(ped2$V1));lenp2 # how many Genotypes in
> total in the pedigree =40
> ln2<-length(g);ln2#ln2=nrow(matdf)=30
> # calculate the new Z
> Zped<-model.matrix(~ matdf$genotypes -1)# has order N*t = 180 by 30
> dif<-(lenp2-ln2);dif # 40-30=10
> #print(c(lenp2,ln2,dif))
> zeromatrix<-zeros(nrow(matdf),dif);zeromatrix # 180 by 10
> Z<-cbind(zeromatrix,Zped) # Design Matrix for random effect
> (Genotypes): 180 by 40
> # calculate the new G
> M<-matrix(0,lenp2,lenp2) # 40 by 40
> for (i in 1:nrow(ped2)) { M[ped2[i, 1], ped2[i, 2]] <- ped2[i, 3] }
> G<-s2g*M # Genetic Variance covariance matrix for pedigree 2: 40 by 40
> IG<-solve(G)
> return(list(IG=IG, Z=Z))
>}
>
>##########################
>## Required packages #
>############################
>library(gmp)
>library(knitr) # load this packages for publishing results
>library(matlab)
>library(Matrix)
>library(psych)
>library(foreach)
>library(epicalc)
>library(ggplot2)
>library(xtable)
>library(gdata)
>library(gplots)
>
>#b=6;g=seq(1,30,1);rb=5;cb=6;r=15;c=12;h2=0.3;rhox=0.6;rhoy=0.6;ped=0
>
>setup<-function(b,g,rb,cb,r,c,h2,rhox=0.6,rhoy=0.6,ped="F")
> {
> # where
> # b = number of blocks
> # t = number of treatments per block
> # rb = number of rows per block
> # cb = number of columns per block
> # s2g = variance within genotypes
> # h2 = heritability
> # r = total number of rows for the layout
> # c = total number of columns for the layout
>
> ### Check points
> if(b==" ")
> stop(paste(sQuote("block")," cannot be missing"))
> if(!is.vector(g) | length(g)<3)
> stop(paste(sQuote("treatments")," should be a vector and
> more than 2"))
> if(!is.numeric(b))
> stop(paste(sQuote("block"),"is not of class", sQuote("numeric")))
> if(length(b)>1)
> stop(paste(sQuote("block"),"has to be only 1 numeric value"))
> if(!is.whole(b))
> stop(paste(sQuote("block"),"has to be an", sQuote("integer")))
>
> ## Compatibility checks
> if(rb*cb !=length(g))
> stop(paste(sQuote("rb x cb")," should be equal to number of
> treatment", sQuote("g")))
> if(length(g) != rb*cb)
> stop(paste(sQuote("the number of treatments"), "is not equal
> to", sQuote("rb*cb")))
>
> ## Generate the design
> g<<-g
> genotypes<-times(b) %do% sample(g,length(g))
> #genotypes<-rep(g,b)
> block<-rep(1:b,each=length(g))
> genotypes<-factor(genotypes)
> block<-factor(block)
>
> ### generate the base design
> k<-c/cb # number of blocks on the x-axis
> x<<-rep(rep(1:r,each=cb),k) # X-coordinate
>
> #w<-rb
> l<-cb
> p<-r/rb
> m<-l+1
> d<-l*b/p
> y<<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate
>
> ## compact
> matdf<<-data.frame(x,y,block,genotypes)
> N<<-nrow(matdf)
> mm<-summ(matdf)
> ss<-des(matdf)
>
> ## Identity matrices
> X<<-model.matrix(~block-1)
> h2<<-h2;rhox<<-rhox;rhoy<<-rhoy
> s2g<<-varG(varR=1,h2)
> ## calculate G and Z
> ifelse(ped == "F",
> c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~matdf$genotypes-1)),
> c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
> ## calculate R and IR
> s2e<-1
> ifelse(rhox==0 | rhoy==0, IR<<-(1/s2e)*eye(N),
> IR<<-rspat(rhox=rhox,rhoy=rhoy))
> C11<-t(X)%*%IR%*%X
> C11inv<-solve(C11)
> K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
> return(list(matdf=matdf,summary=mm,description=ss))
>
> }
>
>#setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
>#system.time(out3<-setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out3
>#system.time(out4<-setup(b=16,g=seq(1,196,1),rb=14,cb=14,r=56,c=56,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out4
>
>
>####################################################
># The function below uses shortcuts from textbook by Harville 1997
># uses inverse of a partitioned matrix technique
>####################################################
>
>mainF<-function(criteria=c("A","D"))
>{
> ### Variance covariance matrices
> temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
> C22<-solve(temp)
> ##########################
> ## Optimality Criteria
> #########################
> traceI<<-sum(diag(C22)) ## A-Optimality
> doptimI<<-log(det(C22)) # D-Optimality: minimize the det of the
> inverse of Inform Matrix
> #return(c(traceI,doptimI))
> if(criteria=="A") return(traceI)
> if(criteria=="D") return(doptimI)
> else{return(c(traceI,doptimI))}
>}
>
># system.time(res1<-mainF(criteria="A"));res1
># system.time(res2<-mainF(criteria="D"));res2
>#system.time(res3<-mainF(criteria="both"));res3
>
>
>##############################################
>### Swap function that takes matdf and returns
>## global values newnatdf and design matrices
>### Z and IG
>##############################################
>
>swapsimple<-function(matdf,ped="F")
>{
> # dataset D =mat1 generated from the above function
> ## now, new design after swapping is
> matdf<-as.data.frame(matdf)
> attach(matdf,warn.conflict=FALSE)
> b1<-sample(matdf$block,1,replace=TRUE);b1
> gg1<-matdf$genotypes[block==b1];gg1
> g1<-sample(gg1,2);g1
> samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
> dimnames=list(NULL,c("gen1","gen2","block")));samp
> newGen<-matdf$genotypes
> newG<-ifelse(matdf$genotypes==samp[,1] &
> block==samp[,3],samp[,2],matdf$genotypes)
> NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
> NewG<-factor(NewG)
>
> ## now, new design after swapping is
> newmatdf<-cbind(matdf,NewG)
> newmatdf<<-as.data.frame(newmatdf)
> mm<-summ(newmatdf)
> ss<-des(newmatdf)
>
> ## Identity matrices
> ifelse(ped == "F",
> c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)),
> c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
> ## calculate R and IR
> C11<-t(X)%*%IR%*%X
> C11inv<-solve(C11)
> K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
> return(list(newmatdf=newmatdf,summary=mm,description=ss))
>}
>#swapsimple(matdf,ped="F")[c(2,3)]
>#which(newmatdf$genotypes != newmatdf$NewG)
>###########################################
># for one design, swap pairs of treatments
># several times and store the traces
># of the successive swaps
>##########################################
>
>optmF<-function(iterations=2,verbose=FALSE)
>{
> trace<-c()
>
> for (k in 1:iterations){
>setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
> swapsimple(matdf,ped="F")
> trace[k]<-mainF(criteria="A")
> iterations[k]<-k
> mat<-cbind(trace, iterations= seq(iterations))
> }
>
> if (verbose){
> cat("***starting matrix\n")
> print(mat)
> }
> # iterate till done
> while(nrow(mat) > 1){
> high <- diff(mat[, 'trace']) > 0
> if (!any(high)) break # done
> # find which one to delete
> delete <- which.max(high) + 1L
> #mat <- mat[-delete, ]
> mat <- mat[-delete,, drop=FALSE]
> }
> mat
>}
>
>#system.time(test1<-optmF(iterations=10));test1
>
>################################################
>###############################################
>
>swap<-function(matdf,ped="F",criteria=c("A","D"))
>{
> # dataset D =mat1 generated from the above function
> ## now, new design after swapping is
> matdf<-as.data.frame(matdf)
> attach(matdf,warn.conflict=FALSE)
> b1<-sample(matdf$block,1,replace=TRUE);b1
> gg1<-matdf$genotypes[block==b1];gg1
> g1<-sample(gg1,2);g1
> samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
> dimnames=list(NULL,c("gen1","gen2","block")));samp
> newGen<-matdf$genotypes
> newG<-ifelse(matdf$genotypes==samp[,1] &
> block==samp[,3],samp[,2],matdf$genotypes)
> NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
> NewG<-factor(NewG)
>
> ## now, new design after swapping is
> newmatdf<-cbind(matdf,NewG)
> newmatdf<<-as.data.frame(newmatdf)
> mm<-summ(newmatdf)
> ss<-des(newmatdf)
>
> ## Identity matrices
> #X<<-model.matrix(~block-1)
> #s2g<<-varG(varR=1,h2)
> ## calculate G and Z
> ifelse(ped == "F",
> c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)),
> c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
> ## calculate R and IR
> C11<-t(X)%*%IR%*%X
> C11inv<-solve(C11)
> K<-IR%*%X%*%C11inv%*%t(X)%*%IR
> temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
> C22<-solve(temp)
> ##########################
> ## Optimality Criteria
> #########################
> traceI<-sum(diag(C22)) ## A-Optimality
> doptimI<-log(det(C22)) #
> #return(c(traceI,doptimI))
> if(criteria=="A") return(traceI)
> if(criteria=="D") return(doptimI)
> else{return(c(traceI,doptimI))}
>}
>
>#swap(matdf,ped="F",criteria="both")
>
>###########################################
>### Generate 25 initial designs
>###########################################
>#rspatf<-function(design){
># arr = array(1, dim=c(nrow(matdf),ncol(matdf)+1,design))
># l<-list(length=dim(arr)[3])
># for (i in 1:dim(arr)[3]){
># l[[i]]<-swapsimple(matdf,ped="F")[[1]][,,i]
># }
># l
>#}
>#matd<-rspatf(design=5)
>#matd
>
>#which(matd[[1]]$genotypes != matd[[1]]$NewG)
>#which(matd[[2]]$genotypes != matd[[2]]$NewG)
>
>
>###############################################
>###############################################
>
>optm<-function(iterations=2,verbose=FALSE)
>{
> trace<-c()
>
> for (k in 1:iterations){
>setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
> trace[k]<-swap(matdf,ped="F",criteria="A")
> iterations[k]<-k
> mat<-cbind(trace, iterations= seq(iterations))
> }
>
> if (verbose){
> cat("***starting matrix\n")
> print(mat)
> }
> # iterate till done
> while(nrow(mat) > 1){
> high <- diff(mat[, 'trace']) > 0
> if (!any(high)) break # done
> # find which one to delete
> delete <- which.max(high) + 1L
> #mat <- mat[-delete, ]
> mat <- mat[-delete,, drop=FALSE]
> }
> mat
>}
>
>#system.time(res<-optm(iterations=10));res
>#################################################
>################################################
>finalF<-function(designs,swaps)
>{
> Nmatdf<-list()
> OP<-list()
> Miny<-NULL
> Maxy<-NULL
> Minx<-NULL
> Maxx<-NULL
> for (i in 1:designs)
> {
>setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
> mainF(criteria="A")
> for (j in 1:swaps)
> {
> OP[[i]]<- optmF(iterations=swaps)
> Nmatdf[[i]]<-newmatdf[,5]
> Miny[i]<-min(OP[[i]][,1])
> Maxy[i]<-max(OP[[i]][,1])
> Minx[i]<-min(OP[[i]][,2])
> Maxx[i]<-max(OP[[i]][,2])
> }
> }
>return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf))
># gives us both the Optimal conditions and designs
>}
>
>#################################################
>sink(file= paste(format(Sys.time(),
>"Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
>system.time(test1<-finalF(designs=25,swaps=2000));test1
>sink()
>
>
>I expect results like this below
>
>>sink()
>>finalF<-function(designs,swaps)
>+{
>+ Nmatdf<-list()
>+ OP<-list()
>+ Miny<-NULL
>+ Maxy<-NULL
>+ Minx<-NULL
>+ Maxx<-NULL
>+ for (i in 1:designs)
>+ {
>+
>setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
>+ mainF(criteria="A")
>+ for (j in 1:swaps)
>+ {
>+ OP[[i]]<- optmF(iterations=swaps)
>+ Nmatdf[[i]]<-newmatdf[,5]
>+ Miny[i]<-min(OP[[i]][,1])
>+ Maxy[i]<-max(OP[[i]][,1])
>+ Minx[i]<-min(OP[[i]][,2])
>+ Maxx[i]<-max(OP[[i]][,2])
>+ }
>+ }
>+
>return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf))
># gives us both the Optimal conditions and designs
>+}
>>sink(file= paste(format(Sys.time(),
>>"Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
>>system.time(test1<-finalF(designs=5,swaps=5));test1
> user system elapsed
> 37.88 0.00 38.04
>$OP
>$OP[[1]]
> trace iterations
>[1,] 0.8961335 1
>[2,] 0.8952822 3
>[3,] 0.8934649 4
>
>$OP[[2]]
> trace iterations
>[1,] 0.893955 1
>
>$OP[[3]]
> trace iterations
>[1,] 0.9007225 1
>[2,] 0.8971837 4
>[3,] 0.8902474 5
>
>$OP[[4]]
> trace iterations
>[1,] 0.8964726 1
>[2,] 0.8951722 4
>
>$OP[[5]]
> trace iterations
>[1,] 0.8973285 1
>[2,] 0.8922594 4
>
>
>$Miny
>[1] 0.8934649 0.8939550 0.8902474 0.8951722 0.8922594
>
>$Maxy
>[1] 0.8961335 0.8939550 0.9007225 0.8964726 0.8973285
>
>$Minx
>[1] 1 1 1 1 1
>
>$Maxx
>[1] 4 1 5 4 4
>
>$Nmatdf
>$Nmatdf[[1]]
> [1] 30 8 5 28 27 29 1 26 24 22 13 6 17 18 2 19 14 11 3 23
> 10 15 21 9 25 4 7 20 12 16 14 17 15 5 8 6 19
> [38] 4 1 10 11 3 24 20 13 2 27 12 16 28 21 23 30 25 29 7 26
> 18 9 22 24 21 26 2 13 30 5 28 20 11 3 7 18 25
> [75] 22 16 4 17 19 27 29 10 23 6 12 15 14 1 9 8 12 11
> 3 8 5 20 23 22 7 15 19 29 24 27 13 2 6 1 21 26 25
>[112] 10 16 14 18 4 30 17 9 28 29 9 7 27 11 2 30 18 8 14 19 20
>15 21 4 3 16 24 13 28 26 10 12 6 5 25 1 17
>[149] 23 22 21 2 23 16 4 10 9 22 30 24 1 27 3 20 12 5 26 17 28
>11 7 14 8 25 19 13 18 29 15 6
>Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
>24 25 26 27 28 29 30
>
>$Nmatdf[[2]]
> [1] 5 13 30 2 21 23 6 27 16 19 8 26 18 4 20 9 22 28
> 7 3 15 10 11 17 25 24 29 1 14 12 28 18 23 19 21 16 17
> [38] 29 13 7 15 27 25 22 10 1 2 5 30 9 20 3 14 24 26
> 4 6 12 11 8 8 18 25 12 5 23 21 4 9 17 20 1 2 6
> [75] 22 7 16 26 30 29 3 15 19 14 13 11 24 28 27 10 16 21 26 23
> 25 4 9 24 15 14 22 1 20 27 2 7 17 18 13 8 12
>[112] 5 6 19 28 3 10 30 11 29 11 30 14 9 26 5 1 10 29 28 4 18
>8 24 20 13 3 23 27 6 15 16 21 2 17 7 25 12
>[149] 19 22 7 28 8 11 26 24 12 29 9 16 21 27 22 23 18 19 13 6 15
>3 1 30 2 17 14 5 25 20 4 10
>Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
>24 25 26 27 28 29 30
>
>$Nmatdf[[3]]
> [1] 7 25 4 30 12 11 14 13 26 1 10 21 15 22 29 19 27 16 2 24
> 28 20 3 5 23 8 18 6 17 9 6 21 9 15 11 17 13
> [38] 29 24 4 20 7 23 14 2 16 18 26 19 25 8 1 12 10 28 27 22
> 30 5 3 20 12 8 2 11 18 24 19 9 22 15 7 30 27
> [75] 17 29 6 3 5 1 21 25 28 14 23 4 16 26 13 10 20 29 26 25
> 15 22 9 10 28 17 18 21 6 16 7 1 3 24 11 2 4
>[112] 14 8 5 13 27 23 30 19 12 6 30 1 2 7 28 18 8 20 10 4 25
>14 19 27 11 13 29 12 9 3 26 22 21 16 15 17 24
>[149] 5 23 17 6 25 11 21 29 5 26 13 7 15 2 9 4 18 30 3 8 20
>24 27 22 19 16 28 12 1 23 14 10
>Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
>24 25 26 27 28 29 30
>
>$Nmatdf[[4]]
> [1] 24 8 17 30 10 20 4 28 25 16 14 13 7 12 26 29 21 19 1 22
> 11 6 23 18 15 5 27 2 3 9 1 24 27 15 26 14 28
> [38] 20 8 5 4 29 2 25 9 13 6 21 7 22 30 17 3 10 12 19 11
> 18 16 23 25 18 3 29 1 4 8 6 9 30 2 14 11 16
> [75] 23 13 10 12 7 19 17 5 21 28 24 20 15 27 26 22 14
> 5 7 6 17 3 1 29 25 23 19 11 21 18 4 30 20 8 2 12 9
>[112] 16 10 15 27 26 13 24 28 22 19 7 17 1 12 8 18 16 14 22 3 28
>27 25 10 6 4 15 30 9 11 5 20 26 24 29 21 2
>[149] 23 13 2 16 10 25 18 15 26 22 12 19 30 17 23 8 3 7 20 14 13
>28 9 21 11 29 6 5 4 24 27 1
>Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
>24 25 26 27 28 29 30
>
>$Nmatdf[[5]]
> [1] 12 18 8 22 9 21 2 1 29 13 30 25 17 6 16 5 26 7 3 14
> 23 15 28 27 10 24 20 11 19 4 20 30 14 27 25 4 6
> [38] 28 23 8 9 29 26 19 24 7 5 1 11 22 21 2 10 18 12 15
> 3 17 13 16 16 22 6 9 21 5 14 2 30 10 3 25 27 15
> [75] 28 7 17 20 11 8 19 29 12 26 24 13 1 4 18 23 4 16 10 25
> 5 13 18 19 22 7 28 30 23 21 11 2 14 9 20 24 8
>[112] 17 1 15 29 6 12 27 3 26 14 8 26 6 20 9 15 23 3 22 7 30
>25 24 1 10 19 21 4 11 2 18 17 13 28 29 27 16
>[149] 12 5 19 2 4 5 15 21 17 7 25 8 6 16 20 29 10 18 1 12 26
>28 27 11 14 23 22 9 3 13 30 24
>Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
>24 25 26 27 28 29 30
>
>
Michael Dewey
info at aghmed.fsnet.co.uk
http://www.aghmed.fsnet.co.uk/home.html
More information about the R-help
mailing list