[R-sig-ME] Problems with convergence

Ken Beath ken.beath at mq.edu.au
Thu Apr 2 11:27:52 CEST 2015


It would be nice to have this work properly, as I need it for certain
things and it seems that other people are having similar problems.

Getting it to work by increasing the quadrature points is a bit of an
aberration, it is not what typically happens, and I've put an example at
the end. At least in this one the profiling works which means the maximum
must be fairly close to that obtained from the optimisation.

My feeling on this, is that possibly the problem is not with the optimiser,
seeing that it fails with so many optimisers, but rather with the
calculation of the marginal likelihood. These optimisers don't tend to stop
with 0.001 gradients. When I have time I will find in the code how the node
locations are calculated and see what is happening.

Anyway, here is one that fails irrespective of  nAGQ value.

thedata <- structure(list(nEvents = c(10L, 53L, 17L, 18L, 22L, 6L, 16L,
14L, 13L, 18L, 15L, 19L, 52L, 19L, 8L, 16L, 50L, 8L, 9L, 4L,
26L, 45L, 18L, 20L, 5L, 16L, 18L, 7L, 3L, 19L, 30L, 26L, 66L,
23L, 29L, 18L, 72L, 25L, 9L, 2L), total = c(200, 200, 200, 200,
200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
200, 200, 200, 200, 200, 200, 200, 200, 200, 200), trt = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"), class = "factor"),
trt12 = c(-0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5,
-0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5,
-0.5, -0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)), .Names =
c("nEvents",
"total", "trt", "id", "trt12"), row.names = c(NA, 40L), class =
"data.frame")

glmer1a <- glmer(cbind(nEvents,total-nEvents) ~ -1 + trt + factor(id) +
(0+trt12|id), data=thedata, family=binomial, nAGQ=7)

glmer1b <- glmer(cbind(nEvents,total-nEvents) ~ -1 + trt + factor(id) +
(0+trt12|id), data=thedata, family=binomial, nAGQ=21)





On 1 April 2015 at 02:25, Viechtbauer Wolfgang (STAT) <
wolfgang.viechtbauer at maastrichtuniversity.nl> wrote:

> This discussion piqued my interest. The model that Ken was fitting is in
> essence one of the models that is fitted by the rma.glmm() function in the
> metafor package. This is sometimes called the unconditional model with
> fixed study effects. To illustrate:
>
> ### original data
>
> thedata <- structure(list(nEvents=c(10L,53L,17L,18L,22L,6L,16L,
> 14L,13L,18L,15L,19L,52L,19L,8L,16L,50L,8L,9L,4L,
> 26L,45L,18L,20L,5L,16L,18L,7L,3L,19L,30L,26L,66L,
> 23L,29L,18L,72L,25L,9L,2L),total=c(200,200,200,200,
> 200,200,200,200,200,200,200,200,200,200,200,200,200,
> 200,200,200,200,200,200,200,200,200,200,200,200,200,
> 200,200,200,200,200,200,200,200,200,200),trt=c(0,
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
> 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),id=structure(c(1L,
> 2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L,13L,14L,15L,
> 16L,17L,18L,19L,20L,1L,2L,3L,4L,5L,6L,7L,8L,9L,
> 10L,11L,12L,13L,14L,15L,16L,17L,18L,19L,20L),.Label=c("1",
> "2","3","4","5","6","7","8","9","10","11","12","13",
> "14","15","16","17","18","19","20"),class="factor")),.Names=c("nEvents",
> "total","trt","id"),row.names=c(NA,40L),class="data.frame")
>
> ### restructure data as needed for input into rma.glmm()
>
> dat <- cbind(thedata[1:20,], thedata[21:40,])
> dat$id <- dat$id <- dat$trt <- dat$trt <- NULL
> colnames(dat) <- c("ci", "n2i", "ai", "n1i")
>
> library(metafor)
> library(lme4)
>
> ### model fitted by Ken
> res1 <- glmer(cbind(nEvents,total-nEvents) ~ trt + factor(id) +
> (0+trt|id), data=thedata, family=binomial)
>
> ### fit unconditional model with fixed study effects via rma.glmm()
> res2 <- rma.glmm(measure="OR", ai=ai, n1i=n1i, ci=ci, n2i=n2i, data=dat,
> nAGQ=1)
>
> ### to get exact equivalence, use +-1/2 coding for the random effects
> thedata$trt12 <- thedata$trt - 1/2
> res3 <- glmer(cbind(nEvents,total-nEvents) ~ -1 + trt + factor(id) +
> (0+trt12|id), data=thedata, family=binomial)
>
> summary(res1)
> summary(res2)
> summary(res3)
>
> ### end example
>
> A few notes:
>
> 1) rma.glmm() uses nAGQ=7 by default, so I switched that to 1 for the
> comparison.
>
> 2) Some discussion of the 0/1 versus +-1/2 coding can be found in Turner
> et al. (2000) and Higgins et al. (2001). I tend to prefer the +-1/2 coding,
> so that is also what is currently implemented in rma.glmm(), but I may add
> the 0/1 coding as an option.
>
> 3) A nice discussion of the model is provided by Senn (2000). He also
> discusses a variety of other modeling options, including a model using
> random study effects.
>
> 4) In fact, the unconditional model with random study effects can be
> fitted with:
>
> rma.glmm(measure="OR", ai=ai, n1i=n1i, ci=ci, n2i=n2i, data=dat, model="
> UM.RS")
>
> (which makes use of glmer() underneath). As discussed by Senn, this model
> may violate what he calls the 'concurrent control principle', but his
> wording is cautious ('may violate', 'may be regarded as undesirable'),
> which reflects the lack of a thorough discussion in the literature
> comparing the various models.
>
> 5) Yet another option is the (mixed-effects) conditional logistic model.
> See, for example, Stijnen et al. (2010). This model is obtained when
> conditioning on the total number of events within each study and leads to
> non-central hypergeometric distributions for the data within each study.
> This model can be fitted with:
>
> rma.glmm(measure="OR", ai=ai, n1i=n1i, ci=ci, n2i=n2i, data=dat,
> model="CM.EL")
>
> Sorry, it's slow (I haven't found a clever way of speeding up the
> integration over the non-central hypergeometric distributions). Much
> faster, thanks to lme4, is:
>
> rma.glmm(measure="OR", ai=ai, n1i=n1i, ci=ci, n2i=n2i, data=dat, model="
> CM.AL")
>
> which uses an approximation to the exact conditional likelihood.
>
> 6) And of course there are Bayesian implementations of such models.
>
> 7) With respect to the model fitted by Ken, it's maybe interesting to note
> that NOT using the Laplace approximation, but something like 7 quadrature
> points, does not cause any convergence warnings:
>
> glmer(cbind(nEvents,total-nEvents) ~ trt + factor(id) + (0+trt|id),
> data=thedata, family=binomial, nAGQ=7)
>
> Alright, I'll shut up now.
>
> References mentioned above:
>
> Higgins, J. P. T., Whitehead, A., Turner, R. M., Omar, R. Z., & Thompson,
> S. G. (2001). Meta-analysis of continuous outcome data from individual
> patients. Statistics in Medicine, 20(15), 2219-2241.
>
> Senn, S. (2000). The many modes of meta. Drug Information Journal, 34,
> 535-549.
>
> Stijnen, T., Hamza, T. H., & Ozdemir, P. (2010). Random effects
> meta-analysis of event outcome in the framework of the generalized linear
> mixed model with applications in sparse data. Statistics in Medicine,
> 29(29), 3046-3067.
>
> Turner, R. M., Omar, R. Z., Yang, M., Goldstein, H., & Thompson, S. G.
> (2000). A multilevel model framework for meta-analysis of clinical trials
> with binary outcomes. Statistics in Medicine, 19(24), 3417-3432.
>
> Best,
> Wolfgang
>
> --
> Wolfgang Viechtbauer, Ph.D., Statistician
> Department of Psychiatry and Neuropsychology
> School for Mental Health and Neuroscience
> Faculty of Health, Medicine, and Life Sciences
> Maastricht University, P.O. Box 616 (VIJV1)
> 6200 MD Maastricht, The Netherlands
> +31 (43) 388-4170 | http://www.wvbauer.com
>
> > -----Original Message-----
> > From: R-sig-mixed-models [mailto:r-sig-mixed-models-bounces at r-
> > project.org] On Behalf Of Ben Bolker
> > Sent: Monday, March 30, 2015 22:21
> > To: r-sig-mixed-models at r-project.org
> > Subject: Re: [R-sig-ME] Problems with convergence
> >
> > Ken Beath <ken.beath at ...> writes:
> >
> > > Yes, I was demonstrating that it fails convergence and then as a
> > > consequence fails to profile. I have my doubts about
> > > convergence for the bobyqa
> > > algorithm, I have other applications where it doesn't converge
> > properly.
> > > For some of my own work I've used nlminb followed by Nelder-Mead if
> > there
> > > is a convergence failure. Not optimal but it seems to work.
> >
> >    I'm still not sure whether you expect it to converge (I think you
> > do), or whether you are just pointing out that the convergence warning
> > in this case is probably justified (in the face of so many convergence
> > warnings that turn out to be false positives, this is a useful piece
> > of information).
> >
> > > While it is fairly heavily parameterised it is a real model,
> > >  a frequentist
> > > implementation of Smith, T. C., Spiegelhalter, D. J., & Thomas, a.
> > (1995).
> > > Bayesian approaches to random-effects meta-analysis: a comparative
> > study.
> > > Statistics in Medicine, 14(24), 2685–99. The reason for having studies
> > as
> > > fixed effects is probably philosophical, the overall success rates are
> > not
> > > likely to be given by normally distributed random effects, and are in
> > many
> > > cases specifically chosen.
> >
> >   I can appreciate that, but I still think it's unrealistic to expect
> > to be able to fit 22 parameters to 40 observations except under very
> > special circumstances.  One point about switching from the Bayesian
> > to the frequentist world is that the Bayesians (by definition) put
> > priors on their parameters, which provides a degree of regularization
> > that is not by default available to frequentist methods.  What priors
> > did Smith et al. use?  It might be worth trying this in blme with
> > priors on the fixed effects ...
> >
> > > I did find that one of the data sets that I have also failed, but
> > fitted
> > > with a commercial program that is based on the EM algorithm. For this
> > type
> > > of problem it is actually faster, as any type of quasi-Newton needs to
> > > calculate lots of derivatives.
> >
> >   I could whine about the difficulty of finding globally robust,
> > reliable, and fast optimization algorithms, but I won't.  I can certainly
> > appreciate that there are more reliable methods for particular
> > sub-classes of problems.
> >
> > > Anyway, I'm going to keep looking at the methods, and eventually the
> > code
> > > for glmer and may eventually have some suggestions.
> >
> >   Would be happy to hear them.
> >
> >   It's worth pointing out that lme4 is using a preliminary "nAGQ=0"
> > step, which ignores the terms contributed by the integrals over the
> > distributions of the conditional modes and as a result is able to
> > fit both the fixed-effect parameters and the conditional modes in
> > a single linear-algebra step, reducing the dimensionality of the
> > nonlinear optimization to the length of the variance-covariance
> > parameter vector ...
> >
> > > On 19 March 2015 at 14:45, Ben Bolker <bbolker <at> gmail.com> wrote:
> > >
> > > > Ken Beath <ken.beath <at> ...> writes:
> > > >
> > > > > The following code shows that there are convergence problem
> > messages
> > > > > where there is a problem with convergence. The profiling shows that
> > > > > the maximum found is not the correct one. This is simulated data
> > for
> > > > > a binary meta-analysis with fixed effect for study and random
> > effect
> > > > > for treatment.
> > > >
> >
> >  [paragraph snipped to try to make Gmane happy]
> >
> > > >   However, may I comment that this is a slightly ridiculous scenario?
> > > > The data set here has 40 observations, and the model tries to fit 22
> > > > parameters.  The model that treats id as a random effect works much
> > > > better.  I can believe there are scenarios where you really do
> > > > want study as a fixed effect, but did you expect it to be practical
> > > > here?
> > > >
> > > > But maybe you're just trying to show that this is a "true positive"
> > > > case for the convergence warnings.
> > > >
> > > > Some random code I wrote while diagnosing what was going on:
> > > >
> > > > library(ggplot2); theme_set(theme_bw())
> > > >
> > > > ## proportion + weights is a little easier to handle
> > > > thedata <- transform(thedata,prop=nEvents/total)
> > > >
> > > > ggplot(thedata,aes(trt,prop))+geom_point(aes(size=total))+
> > > >     geom_line(aes(group=id),colour="gray")
> > > > glmer1 <- glmer(prop~trt+factor(id)+(0+trt|id),
> > > >                 weights=total,data=thedata,family=binomial)
> > > >
> > > > ## id as RE
> > > > glmer2 <- glmer(prop~trt+(1|id)+(0+trt|id),
> > > >                 weights=total,data=thedata,family=binomial)
> > > >
> > > > dd <- update(glmer1,devFunOnly=TRUE)
> > > > pars <- unlist(getME(glmer1,c("theta","fixef")))
> > > > library("bbmle")
> > > > ss <- slice2D(pars,dd)
> > > > library("lattice")
> > > > plot(ss)
> > > > ## too complex, but too much work to cut down significantly
> > > >
> > > > > library(lme4)
> > > > >
> > > > > thedata <- structure(list(nEvents=c(10L,53L,17L,18L,22L,6L,16L,
> > > > > 14L,13L,18L,15L,19L,52L,19L,8L,16L,50L,8L,9L,4L,
> > > > > 26L,45L,18L,20L,5L,16L,18L,7L,3L,19L,30L,26L,66L,
> > > > > 23L,29L,18L,72L,25L,9L,2L),total=c(200,200,200,200,
> > > > > 200,200,200,200,200,200,200,200,200,200,200,200,200,
> > > > > 200,200,200,200,200,200,200,200,200,200,200,200,200,
> > > > > 200,200,200,200,200,200,200,200,200,200),trt=c(0,
> > > > > 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
> > > > > 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),id=structure(c(1L,
> > > > > 2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L,13L,14L,15L,
> > > > > 16L,17L,18L,19L,20L,1L,2L,3L,4L,5L,6L,7L,8L,9L,
> > > > > 10L,11L,12L,13L,14L,15L,16L,17L,18L,19L,20L),.Label=c("1",
> > > > > "2","3","4","5","6","7","8","9","10","11","12","13",
> > > > >
> > "14","15","16","17","18","19","20"),class="factor")),.Names=c("nEvents",
> > > > > "total","trt","id"),row.names=c(NA,40L),class="data.frame")
> > > > >
> > > > > glmer1<-glmer(cbind(nEvents,total-nEvents)~trt+factor(id)+
> > > > ##   (0+trt|id),data=thedata,family=binomial)
> > > > >
> > > > > # while glmer has problems with component 9 it is 8 with a problem
> > > > profile
> > > > > # I've use devtol so the discrepancy is printed
> > > > > prof.glmer1<-profile(glmer1,which=8,devtol=1.0e-3)
>



-- 

*Ken Beath*
Lecturer
Statistics Department
MACQUARIE UNIVERSITY NSW 2109, Australia

Phone: +61 (0)2 9850 8516

Building E4A, room 526
http://stat.mq.edu.au/our_staff/staff_-_alphabetical/staff/beath,_ken/

CRICOS Provider No 00002J
This message is intended for the addressee named and may...{{dropped:9}}



More information about the R-sig-mixed-models mailing list