[R] speeding up likelihood computation

DEEPANKAR BASU basu.15 at osu.edu
Sun Dec 2 18:49:34 CET 2007


R Users:

I am trying to estimate a model of fertility behaviour using birth history data with maximum likelihood. My code works but is extremely slow (because of several for loops and my programming inefficiencies); when I use the genetic algorithm to optimize the likelihood function, it takes several days to complete (on a machine with Intel Core 2 processor [2.66GHz] and 2.99 GB RAM). Computing the hessian and using it to calculate the standard errors takes a large chunk of this time. 

I am copying the code for my likelihood function below; it would be great if someone could suggest any method to speed up the code (by avoiding the for loops or by any other method).

I am not providing details of my model or what exactly I am trying to do in each step of the computation below; i would be happy to provide these details if they are deemed necessary for re-working the code.

Thanks.
Deepankar


--------- begin code -----------------------

LLK1 <- function(paramets, data.frame, ...) {  # DEFINING THE LOGLIKELIHOOD FUNCTION

# paramets IS A 1x27 VECTOR OF PARAMETERS OVER WHICH THE FUNCTION WILL BE MAXIMISED
# data.frame IS A DATA FRAME. THE DATA FRAME CONTAINS OBSERVATIONS ON SEVERAL VARIABLES
# (LIKE EDUCATION, AGE, ETC.) FOR EACH RESPONDENT. COLUMNS REFER TO VARIABLES AND ROWS REFER
# TO OBSERVATIONS.

########## PARAMETERS ###############################

# alpha: interaction between son targeting and family size
# beta : son targeting
# gamma : family size
# delta : a 1x6 vector of probabilities of male birth at various parities (q1, q2, q3, q4, q5, q6)
# zeta : a 1x11 vector of conditional probabilities with zeta[1]=1 always

alpha <- paramets[1]      # FIRST PARAMETER
beta <- paramets[2:9]     # SECOND TO SEVENTH PARAMETER
gamma <- paramets[10:16]
delta <- paramets[17]
zeta <- paramets[18:27]   # LAST 10 PARAMETERS

################# VARIABLES ###############################
# READING IN THE VARIABLES IN THE DATA FRAME
# AND RENAMING THEM FOR USE IN THE LIKELIHOOD FUNCTION

everborn <- data.frame$v201
alive <- data.frame$alive
age <- data.frame$age
edu <- data.frame$edu
rural <- data.frame$rur
rich <- data.frame$rich
middle <- data.frame$middle
poor <- data.frame$poor
work <- data.frame$work
jointfam <- data.frame$jfam
contracep <- data.frame$contra
hindu <- data.frame$hindu
muslim <- data.frame$muslim
scaste <- data.frame$scaste
stribe <- data.frame$stribe
obc <- data.frame$obc
ucaste <- data.frame$ucaste
N <- data.frame$dfsize
indN <- data.frame$dfsize1  # INDICATOR FUNCTION THAT dfsize==alive
nb <- data.frame$nboy
ng <- data.frame$ngirl
ncord1 <- data.frame$ncord1  # FIRST CHILD: BOY=0; GIRL=1  
ncord2 <- data.frame$ncord2  #SECOND CHILD: BOY=0; GIRL=1
ncord3 <- data.frame$ncord3
ncord4 <- data.frame$ncord4
ncord5 <- data.frame$ncord5
ncord6 <- data.frame$ncord6  # SIXTH CHILD: BOY=0; GIRL=1



######### POSITION OF i^th BOY ################################################
boy1 <- data.frame$boy1     # BIRTH POSITION OF FIRST BOY (ZERO IF THE FAMILY HAS NO BOYS)
boy2 <- data.frame$boy2     # BIRTH POSITION OF SECOND BOY (ZERO IF THE FAMILY HAS ONLY ONE BOY)
boy3 <- data.frame$boy3
boy4 <- data.frame$boy4
boy5 <- data.frame$boy5
boy6 <- data.frame$boy6     # BIRTH POSITION OF SIXTH BOY (ZERO IF THE FAMILY HAS ONLY FIVE BOYS)


######################## CONDITIONAL PROBABILITIES ##########################
qq21 <- 1

qq31 <- 1/(1+exp(zeta[1]))
qq32 <- exp(zeta[1])/(1+exp(zeta[1]))

qq41 <- 1/(1+exp(zeta[2])+exp(zeta[3]))
qq42 <- exp(zeta[2])/(1+exp(zeta[2])+exp(zeta[3]))
qq43 <- exp(zeta[3])/(1+exp(zeta[2])+exp(zeta[3]))

qq51 <- 1/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq52 <- exp(zeta[4])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq53 <- exp(zeta[5])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq54 <- exp(zeta[6])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))

qq61 <- 1/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq62 <- exp(zeta[7])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq63 <- exp(zeta[8])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq64 <- exp(zeta[9])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq65 <- exp(zeta[10])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))

zeta1 <- c(qq21,qq31,qq32,qq41,qq42,qq43,qq51,qq52,qq53,qq54,qq61,qq62,qq63,qq64,qq65)

#########################################################################

n <- length(N)         # LENGTH OF SAMPLE; SIZE OF THE MAIN LOOP

lglik <- numeric(n)    # INITIALIZING THE LOGLIKELIHOOD FUNCTION
                       # CREATES A 1xn VECTOR OF ZEROS

 for (j in 1:n) {      # START OF MAIN LOOP

    S <- matrix(0, 6, 6)  # CREATE A 6x6 MATRIX OF ZEROS
    y <- numeric(15)      # CREATE A 1x15 VECTOR OF ZEROS  
    N1 <- N[j]       # DESIRED FAMILY SIZE

      
      q <- 1/(1+exp(delta))   # PROBABILITY OF MALE BIRTH
      

    if (alive[j]==N1) {
 
	for (i in 1:(N1-1)) {
		S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))  
	}
    }

    else {
	for (i in 1:(N1-1)) {
		S[N1,i] <- 0
	}

    }	

######### CREATE A 1x6 VECTOR WITH POSITION OF BOYS WITHIN FAMILY
      x <- c(boy1[j], boy2[j], boy3[j], boy4[j], boy5[j]) 

      if (N1>1) {    
                     for (i in 1:(N1-1)) {
     		               if (alive[j]>x[i] & x[i]>0) { 
        	                   S[N1,i] <- 0
        	               } 
     		               if (x[i] == alive[j] ) { 
        	                   S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))
        	               }
                     }
      }

   y <- c(S[2,1],S[3,1],S[3,2],S[4,1],S[4,2],S[4,3],S[5,1],S[5,2],S[5,3],S[5,4],S[6,1],S[6,2],S[6,3],S[6,4],S[6,5])

   
   z1 <- c(age[j],edu[j],work[j],rural[j],poor[j],middle[j],hindu[j])         # DETERMINANTS OF FAMILY SIZE
   z2 <- c(1,age[j],edu[j],work[j],contracep[j],rural[j],jointfam[j],hindu[j])  # DETERMINANTS OF SON TARGETING

   t1 <- (indN[j])*((q^(nb[j]))*((1-q)^(ng[j])))*(exp(-exp(sum(z1*gamma)))*((exp(sum(z1*gamma)))^N1)*pnorm(-sum(z2*beta)))/factorial(N1)
   t2 <- (sum(y*zeta1))*(exp(-exp(sum(z1*gamma) + alpha))*((exp(sum(z1*gamma) + alpha))^N1)*(1-pnorm(-sum(z2*beta)))/factorial(N1))
   lglik[j] <- log(t1+t2)
 }

 return(-sum(lglik)) # RETURNING THE NEGATIVE OF THE LOGLIKELIHOOD
                     # SUMMED OVER ALL OBSERVATIONS


} 

------------ end code ----------------------



More information about the R-help mailing list