[R] need help EM algorithm to find MLE of coeff in mixed effects model

Jie jimmycloud at gmail.com
Tue Jul 3 19:56:14 CEST 2012

Dear All,

have a general question about coefficients estimation of the mixed model.

I simulated a very basic model: Y|b=X*\beta+Z*b +\sigma^2* diag(ni);
b follows
N(0,\psi)  #i.e. bivariate normal
where b is the latent variable, Z and X are ni*2 design matrices, sigma is
the error variance,
Y are longitudinal data, i.e. there are ni measurements for object i.
Parameters are \beta, \sigma, \psi; call them \theta.

I wrote a regular EM, the M step is to maximize the log(f(Y,b;\theta))  by
taking first order derivatives, setting to 0 and solving the equation.
The E step involves the evaluation of E step, using Gauss Hermite
approximation. (10 points)

All are simulated data. X and Z are naive like cbind(rep(1,m),1:m)
After 200 iterations, the estimated \beta converges to the true value while
\sigma and \psi do not. Even after one iteration, the \sigma goes up from
I am confused since the \hat{\beta} requires \sigma and \psi from previous
iteration. If something wrong then all estimations should be incorrect...

Another question is that I calculated the logf(Y;\theta) to see if it
increases after updating \theta.
Seems decreasing.....

I thought the X and Z are linearly dependent would cause some issue but I
also changed the X and Z to some random numbers from normal distribution.

I also tried ECM, which converges fast but sigma and psi are not close to
the desired values.
Is this due to the limitation of some methods that I used?

Can any one give some help? I am stuck for a week. I can send the code to
you.
First time to raise a question here. Not sure if it is proper to post all
code.
Below is the same as the zip file.
##########################################################################
# the main R script
n=100
beta=c(-0.5,1)
vvar=2   #sigma^2=2
psi=matrix(c(1,0.2,0.2,1),2,2)
b.m.true=mvrnorm(n=n,mu=c(0,0),Sigma=psi)  #100*2 matrix, each row is the
b_i
Xi=cbind(rnorm(7,mean=2,sd=0.5),log(2:8)) #Xi=cbind(rep(1,7),1:7)
y.m=matrix(NA,nrow=n,ncol=nrow(Xi))   #100*7, each row is a y_i
Zi=Xi

b.list=as.list(data.frame(t(b.m.true)))
psi.old=matrix(c(0.5,0.4,0.4,0.5),2,2)      #starting psi, beta and var,
not exactly the same as the true value
beta.old=c(-0.3,0.7)
var.old=1.7

data.list.wob=list()
for (i in 1:n)
{
data.list.wob[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi)
}

#compute true loglikelihood and initial loglikelihood
truelog=0
for (i in 1:length(data.list.wob))
{
truelog=truelog+loglike(data.list.wob[[i]],vvar,beta,psi)
}

truelog

loglikeseq=c()
loglikeseq[1]=sum(sapply(data.list.wob,loglike))

ECM=F

for (m in 1:300)
{

Sig.hat=Zi%*%psi.old%*%t(Zi)+var.old*diag(nrow(Zi))
W.hat=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old

Sigb=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old
det(Sigb)^(-0.5)

Y.minus.X.beta=t(t(y.m)-c(Xi%*%beta.old))
miu.m=t(apply(Y.minus.X.beta,MARGIN=1,function(s,B=psi.old%*%t(Zi)%*%solve(Sig.hat))

{
B%*%s
}
))  ### each row is the miu_i

tmp1=permutations(length(gausspar$nodes),2,repeats.allowed=T) tmp2=c(tmp1) a.mat=matrix(gausspar$nodes[tmp2],nrow=nrow(tmp1)) #a1,a1
#a1,a2
#...
#a10,a9
#a10,a10
a.mat.list=as.list(data.frame(t(a.mat)))
tmp1=permutations(length(gausspar$weights),2,repeats.allowed=T) tmp2=c(tmp1) weights.mat=matrix(gausspar$weights[tmp2],nrow=nrow(tmp1)) #w1,w1
#w1,w2
#...
#w10,w9
#w10,w10
L=chol(solve(W.hat))
LL=sqrt(2)*solve(L)
halfb=t(LL%*%t(a.mat))

# each page of b.array is all values of bi_k and bi_j for b_i
b.list=list()
for (i in 1:n)
{
b.list[[i]]=t(t(halfb)+miu.m[i,])
}

#generate a list, each page contains Xi,yi,Zi,
data.list=list()
for (i in 1:n)
{
data.list[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi,b=b.list[[i]])
}

#update sigma^2
t1=proc.time()
tempaa=c()
tempbb=c()
for (j in 1:n)
{
#tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old)
tempbb[j]=Eh4newv2(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old)

}
var.new=mean(tempbb)
if (ECM==T){var.old=var.new}

sumXiXi=matrix(rowSums(sapply(data.list,function(s){t(s$Xi)%*%(s$Xi)})),ncol=ncol(Xi))

tempbb=c()
for (j in 1:n)
{
tempbb=cbind(tempbb,Eh2new(data.list[[j]],weights.m=weights.mat))
}
beta.new=solve(sumXiXi)%*%rowSums(tempbb)

if (ECM==T){beta.old=beta.new}

#update psi
tempcc=array(NA,c(2,2,n))
sumtempcc=matrix(0,2,2)
for (j in 1:n)
{
tempcc[,,j]=Eh1new(data.list[[j]],weights.m=weights.mat)
sumtempcc=sumtempcc+tempcc[,,j]
}
psi.new=sumtempcc/n

#stop
if(sum(abs(beta.old-beta.new))+sum(abs(psi.old-psi.new))+sum(abs(var.old-var.new))<0.01)

{print("converge, stop");break;}

#update
var.old=var.new
psi.old=psi.new
beta.old=beta.new
loglikeseq[m+1]=sum(sapply(data.list,loglike))
} # end of M loop

########################################################################
#function to calculate loglikelihood
loglike=function(datai=data.list[[i]],vvar=var.old,beta=beta.old,psi=psi.old)

{
temp1=-0.5*log(det(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))) temp2=-0.5*t(datai$yi-datai$Xi%*%beta)%*%solve(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))%*%(datai$yi-datai$Xi%*%beta)

return(temp1+temp2)
}

#######################################################################
#functions to evaluate the conditional expection, saved as Efun v2.R
#Eh1new=E(bibi')
#Eh2new=E(X'(y-Zbi))
#Eh3new=E(bi'Z'Zbi)
#Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi)
#Eh5new=E(Xibeta-yi)'Zibi
#Eh6new=E(bi)

Eh1new=function(datai=data.list[[i]],
weights.m=weights.mat)
{
#one way
#temp=matrix(0,2,2)
#for (i in 1:nrow(bi))
#{
#temp=temp+bi[i,]%*%t(bi[i,])*weights.m[i,1]*weights.m[i,2]
#}
#print(temp)

#the other way
bi=datai$b tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt #deno=sum(weights.m[,1]*weights.m[,2]) return(t(tempb)%*%tempb/pi) } # end of Eh1 #Eh2new=E(X'(y-Zbi)) Eh2new=function(datai=data.list[[i]], weights.m=weights.mat) { #one way #temp=rep(0,2) #for (j in 1:nrow(bi)) #{ #temp=temp+c(t(datai$Xi)%*%(datai$yi-datai$Zi%*%bi[j,])*weights.m[j,1]*weights.m[j,2])

#}
#deno=sum(weights.m[,1]*weights.m[,2])
#print(temp/deno)

#another way
bi=datai$b tempb=bi*rep(weights.m[,1]*weights.m[,2],2) tt=t(datai$Xi)%*%datai$yi-t(datai$Xi)%*%datai$Zi%*%colSums(tempb)/pi return(c(tt)) } # end of Eh2 #Eh3new=E(b'Z'Zbi) Eh3new=function(datai=data.list[[i]], weights.m=weights.mat) { #one way #deno=sum(weights.m[,1]*weights.m[,2]) #tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt #sum(apply(datai$Zi%*%t(tempb),MARGIN=2,function(s){sum(s^2)}))/deno

#another way
bi=datai$b tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt return(sum(sapply(as.list(data.frame(datai$Zi%*%t(tempb))),function(s){sum(s^2)}))/pi)

}  # end of Eh3

#Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi)
Eh4new=function(datai=data.list[[i]],
weights.m=weights.mat,beta=beta.old)
{
#one way
#temp=0
#bi=datai$b #tt=c() #for (j in 1:nrow(bi)) #{ #tt[j]=sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2]

#temp=temp+sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] #} #temp/deno # another way bi=datai$b

tt=sapply(as.list(ata.frame(c(datai$yi-datai$Xi%*%beta)-datai$Zi%*%t(bi))), function(s){sum(s^2)})*(weights.m[,1]*weights.m[,2]) return(sum(tt)/pi) } # end of Eh4 Eh4newv2=function(datai=data.list[[i]], weights.m=weights.mat,beta=beta.old) { bi=datai$b
v=weights.m[,1]*weights.m[,2]
temp=c()
for (i in 1:length(v))
{
temp[i]=sum(((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[i,])*sqrt(v[i]))^2) } return(sum(temp)/pi) } # end of Eh4 #Eh5new=E(Xibeta-yi)'Zib Eh5new=function(datai=data.list[[i]], weights.m=weights.mat,beta=beta.old) { bi=datai$b
tempb=bi*rep(weights.m[,1]*weights.m[,2],2)
t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)) return(sum(t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)))/pi)
}