[R] speeding up likelihood computation
DEEPANKAR BASU
basu.15 at osu.edu
Sun Dec 2 18:49:34 CET 2007
R Users:
I am trying to estimate a model of fertility behaviour using birth history data with maximum likelihood. My code works but is extremely slow (because of several for loops and my programming inefficiencies); when I use the genetic algorithm to optimize the likelihood function, it takes several days to complete (on a machine with Intel Core 2 processor [2.66GHz] and 2.99 GB RAM). Computing the hessian and using it to calculate the standard errors takes a large chunk of this time.
I am copying the code for my likelihood function below; it would be great if someone could suggest any method to speed up the code (by avoiding the for loops or by any other method).
I am not providing details of my model or what exactly I am trying to do in each step of the computation below; i would be happy to provide these details if they are deemed necessary for re-working the code.
Thanks.
Deepankar
--------- begin code -----------------------
LLK1 <- function(paramets, data.frame, ...) { # DEFINING THE LOGLIKELIHOOD FUNCTION
# paramets IS A 1x27 VECTOR OF PARAMETERS OVER WHICH THE FUNCTION WILL BE MAXIMISED
# data.frame IS A DATA FRAME. THE DATA FRAME CONTAINS OBSERVATIONS ON SEVERAL VARIABLES
# (LIKE EDUCATION, AGE, ETC.) FOR EACH RESPONDENT. COLUMNS REFER TO VARIABLES AND ROWS REFER
# TO OBSERVATIONS.
########## PARAMETERS ###############################
# alpha: interaction between son targeting and family size
# beta : son targeting
# gamma : family size
# delta : a 1x6 vector of probabilities of male birth at various parities (q1, q2, q3, q4, q5, q6)
# zeta : a 1x11 vector of conditional probabilities with zeta[1]=1 always
alpha <- paramets[1] # FIRST PARAMETER
beta <- paramets[2:9] # SECOND TO SEVENTH PARAMETER
gamma <- paramets[10:16]
delta <- paramets[17]
zeta <- paramets[18:27] # LAST 10 PARAMETERS
################# VARIABLES ###############################
# READING IN THE VARIABLES IN THE DATA FRAME
# AND RENAMING THEM FOR USE IN THE LIKELIHOOD FUNCTION
everborn <- data.frame$v201
alive <- data.frame$alive
age <- data.frame$age
edu <- data.frame$edu
rural <- data.frame$rur
rich <- data.frame$rich
middle <- data.frame$middle
poor <- data.frame$poor
work <- data.frame$work
jointfam <- data.frame$jfam
contracep <- data.frame$contra
hindu <- data.frame$hindu
muslim <- data.frame$muslim
scaste <- data.frame$scaste
stribe <- data.frame$stribe
obc <- data.frame$obc
ucaste <- data.frame$ucaste
N <- data.frame$dfsize
indN <- data.frame$dfsize1 # INDICATOR FUNCTION THAT dfsize==alive
nb <- data.frame$nboy
ng <- data.frame$ngirl
ncord1 <- data.frame$ncord1 # FIRST CHILD: BOY=0; GIRL=1
ncord2 <- data.frame$ncord2 #SECOND CHILD: BOY=0; GIRL=1
ncord3 <- data.frame$ncord3
ncord4 <- data.frame$ncord4
ncord5 <- data.frame$ncord5
ncord6 <- data.frame$ncord6 # SIXTH CHILD: BOY=0; GIRL=1
######### POSITION OF i^th BOY ################################################
boy1 <- data.frame$boy1 # BIRTH POSITION OF FIRST BOY (ZERO IF THE FAMILY HAS NO BOYS)
boy2 <- data.frame$boy2 # BIRTH POSITION OF SECOND BOY (ZERO IF THE FAMILY HAS ONLY ONE BOY)
boy3 <- data.frame$boy3
boy4 <- data.frame$boy4
boy5 <- data.frame$boy5
boy6 <- data.frame$boy6 # BIRTH POSITION OF SIXTH BOY (ZERO IF THE FAMILY HAS ONLY FIVE BOYS)
######################## CONDITIONAL PROBABILITIES ##########################
qq21 <- 1
qq31 <- 1/(1+exp(zeta[1]))
qq32 <- exp(zeta[1])/(1+exp(zeta[1]))
qq41 <- 1/(1+exp(zeta[2])+exp(zeta[3]))
qq42 <- exp(zeta[2])/(1+exp(zeta[2])+exp(zeta[3]))
qq43 <- exp(zeta[3])/(1+exp(zeta[2])+exp(zeta[3]))
qq51 <- 1/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq52 <- exp(zeta[4])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq53 <- exp(zeta[5])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq54 <- exp(zeta[6])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq61 <- 1/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq62 <- exp(zeta[7])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq63 <- exp(zeta[8])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq64 <- exp(zeta[9])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq65 <- exp(zeta[10])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
zeta1 <- c(qq21,qq31,qq32,qq41,qq42,qq43,qq51,qq52,qq53,qq54,qq61,qq62,qq63,qq64,qq65)
#########################################################################
n <- length(N) # LENGTH OF SAMPLE; SIZE OF THE MAIN LOOP
lglik <- numeric(n) # INITIALIZING THE LOGLIKELIHOOD FUNCTION
# CREATES A 1xn VECTOR OF ZEROS
for (j in 1:n) { # START OF MAIN LOOP
S <- matrix(0, 6, 6) # CREATE A 6x6 MATRIX OF ZEROS
y <- numeric(15) # CREATE A 1x15 VECTOR OF ZEROS
N1 <- N[j] # DESIRED FAMILY SIZE
q <- 1/(1+exp(delta)) # PROBABILITY OF MALE BIRTH
if (alive[j]==N1) {
for (i in 1:(N1-1)) {
S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))
}
}
else {
for (i in 1:(N1-1)) {
S[N1,i] <- 0
}
}
######### CREATE A 1x6 VECTOR WITH POSITION OF BOYS WITHIN FAMILY
x <- c(boy1[j], boy2[j], boy3[j], boy4[j], boy5[j])
if (N1>1) {
for (i in 1:(N1-1)) {
if (alive[j]>x[i] & x[i]>0) {
S[N1,i] <- 0
}
if (x[i] == alive[j] ) {
S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))
}
}
}
y <- c(S[2,1],S[3,1],S[3,2],S[4,1],S[4,2],S[4,3],S[5,1],S[5,2],S[5,3],S[5,4],S[6,1],S[6,2],S[6,3],S[6,4],S[6,5])
z1 <- c(age[j],edu[j],work[j],rural[j],poor[j],middle[j],hindu[j]) # DETERMINANTS OF FAMILY SIZE
z2 <- c(1,age[j],edu[j],work[j],contracep[j],rural[j],jointfam[j],hindu[j]) # DETERMINANTS OF SON TARGETING
t1 <- (indN[j])*((q^(nb[j]))*((1-q)^(ng[j])))*(exp(-exp(sum(z1*gamma)))*((exp(sum(z1*gamma)))^N1)*pnorm(-sum(z2*beta)))/factorial(N1)
t2 <- (sum(y*zeta1))*(exp(-exp(sum(z1*gamma) + alpha))*((exp(sum(z1*gamma) + alpha))^N1)*(1-pnorm(-sum(z2*beta)))/factorial(N1))
lglik[j] <- log(t1+t2)
}
return(-sum(lglik)) # RETURNING THE NEGATIVE OF THE LOGLIKELIHOOD
# SUMMED OVER ALL OBSERVATIONS
}
------------ end code ----------------------
More information about the R-help
mailing list