[RsR] Robust Parameter Estimation: Negative Binomial

Peter Ruckdeschel peter@ruckde@che| @end|ng |rom web@de
Fri May 28 21:38:57 CEST 2010


Am 28.05.2010 14:40, schrieb Markus Kalisch:
> Dear R-SIG Robust
>
> I'd like to estimate the parameters of a negative binomial in a
> robust way. Could anyone give me a hint whether a solution of
> this problem is already implemented or point me to some
> reference?
>
>   
Package ROptEst on CRAN provides infrastructure to do this;
find below a script to do optimally robust estimation in negative
binomial models;

So far we had not yet defined a class for it;
so we do this in the script; the definition has entered into the
devel version of distrMod so will be on CRAN soon;
then you may skip parts of the code as indicated.

Basically this script can easily be adjusted to other smooth parametric
models as well.

Hth, best
Peter

=============================================================

require(ROptEst)
options("newDevice"=TRUE)

#### < - has entered into devel version of pkg distrMod

setClass("NbinomFamily", contains = "L2ParamFamily")

##################################################################
## NegBinomial family
##################################################################

## a generating function to produce an object of class NbinomFamily

NbinomFamily <- function(size = 1, prob = 0.5, trafo){
### simple namings ...
    name <- "Negative Binomial family"
    distribution <- Nbinom(size = size, prob = prob)
    distrSymm <- NoSymmetry()
### parameter of the family   
    param0 <- prob
    names(param0) <- "prob"
    param1 <- size
    names(param1) <- "size"
    if(missing(trafo)) trafo <- matrix(1, dimnames = list("prob","prob"))
    param <- ParamFamParameter(name = "probability of success",
                               main = param0,
                               fixed = param1,
                               trafo = trafo)
### howto move from one model distribution to the next   
    modifyParam <- function(theta){ Nbinom(size = size, prob = theta) }
    body(modifyParam) <- substitute({ Nbinom(size = size, prob = theta)
}, list(size = size))
    props <- ""

### helpers for estimating: search interval and what to do if estimate
is not in [0,1]
    startPar <- function(x,...) c(.Machine$double.eps,1-.Machine$double.eps)
    makeOKPar <- function(param) {if(param<=0) return(.Machine$double.eps)
                                  if(param>=1) return(1-.Machine$double.eps)
                                  return(param)}
### the scores function
    L2deriv.fct <- function(param){
                   prob <- main(param)
                   fct <- function(x){}
                   body(fct) <- substitute({ (size/prob- x/(1-prob)) },
                                list(size = size, prob = prob))
                   return(fct)}
### its distribution
    L2derivSymm <- FunSymmList(NonSymmetric())
    L2derivDistr <- UnivarDistrList((size/prob- distribution/(1-prob)))
    L2derivDistrSymm <- DistrSymmList(NoSymmetry())
### Fisher informaiton
    FisherInfo.fct <- function(param){
                       prob <- main(param)
                       PosDefSymmMatrix(matrix(size/(prob^2*(1-prob)),
                           dimnames=list("prob","prob")))}

    FisherInfo <- FisherInfo.fct(param)
### building the return object:
    res <- L2ParamFamily(name = name, distribution = distribution,
        distrSymm = distrSymm, param = param, modifyParam = modifyParam,
        props = props, L2deriv.fct = L2deriv.fct, L2derivSymm = L2derivSymm,
        L2derivDistr = L2derivDistr, L2derivDistrSymm = L2derivDistrSymm,
        FisherInfo.fct = FisherInfo.fct, FisherInfo = FisherInfo,
        startPar = startPar, makeOKPar = makeOKPar,
        .returnClsName = "NbinomFamily")
    if(!is.function(trafo))
       f.call <- substitute(NbinomFamily(size = s, prob = p,
                           trafo = matrix(Tr, dimnames =
list("prob","prob"))),
                           list(s = size, p = prob, Tr = trafo))
    else
       f.call <- substitute(NbnomFamily(size = s, prob = p,
                           trafo = Tr), list(s = size, p = prob, Tr =
trafo))

    res using fam.call <- f.call
    return(res)
}

####  end of code entered into distrMod ->

###### slightly modified from script BinomModel.R which ships out
###  with the installation of ROptEst / see scripts folder of ROptEst
###  in the corresponding library (where you installed package ROptEst to).

###############################################################################
## Example: Neg Binomial Family
###############################################################################


#-------------------------------------------------------------------------------
## Preparations
#-------------------------------------------------------------------------------
## generates Neg.Binomial Family with
## m = 25 and probability of success theta = 0.25
N <- NbinomFamily(size = 25, prob = 0.25)
N       # show N

#-------------------------------------------------------------------------------
## classical optimal IC
#-------------------------------------------------------------------------------
IC0 <- optIC(model = N, risk = asCov())
IC0       # show IC

plot(IC0) # plot IC
checkIC(IC0)

#-------------------------------------------------------------------------------
## L_2 family + infinitesimal neighborhood
#-------------------------------------------------------------------------------
## a neighborhood model is laid around NbinomFamily
RobN1 <- InfRobModel(center = N, neighbor = ContNeighborhood(radius = 0.5))
RobN1     # show RobN1

### similarly Total variation nbd
(RobN2 <- InfRobModel(center = N, neighbor = TotalVarNeighborhood(radius
= 0.5)))

#-------------------------------------------------------------------------------
## MSE solution
#-------------------------------------------------------------------------------
system.time(IC1 <- optIC(model=RobN1, risk=asMSE()))
IC1
checkIC(IC1)
plot(IC1)

## total variation
system.time(IC2 <- optIC(model=RobN2, risk=asMSE()))
IC2
checkIC(IC2)
plot(IC2)

#-------------------------------------------------------------------------------
## lower case / most bias robust solutions
#-------------------------------------------------------------------------------
(IC3 <- optIC(model=RobN1, risk=asBias()))

(IC4 <- optIC(model=RobN2, risk=asBias()))

#-------------------------------------------------------------------------------
## Hampel solution
#-------------------------------------------------------------------------------
(IC5 <- optIC(model=RobN1, risk=asHampel(bound=clip(IC1))))

(IC6 <- optIC(model=RobN2, risk=asHampel(bound=Risks(IC2)$asBias$value),
maxiter = 200))


#-------------------------------------------------------------------------------
## radius minimax IC
### --- to be used if radius is unknown
### H. Rieder, M. Kohl, P. Ruckdeschel (2008): The costs of not knowing
the radius.
### {\it Statistical Methods and Applications\/}, {\bf 17}(1), 13--40.
#-------------------------------------------------------------------------------
system.time(IC7 <- radiusMinimaxIC(L2Fam=N, neighbor=ContNeighborhood(),
                        risk=asMSE(), loRad=0.01, upRad=3.9))
IC7

system.time(IC8 <- radiusMinimaxIC(L2Fam=N, neighbor=TotalVarNeighborhood(),
                        risk=asMSE(), loRad=0.01, upRad=1.8))

IC8



###############################################################################
## k-step (k >= 1) estimation
###############################################################################

## one-step estimation
## 1. generate a contaminated sample
ind <- rbinom(100, size=1, prob=0.05)
x <- rnbinom(100, size=25, prob=(1-ind)*0.25 + ind*0.01)

### MLE:
(estML <- MLEstimator(x=x, NbinomFamily(size=25)))

## 2. Kolmogorov(-Smirnov) minimum distance estimator
(est0 <- MDEstimator(x=x, NbinomFamily(size=25)))

## 3.1. one-step estimation: radius known

### gross error
## ac) Define infinitesimal robust model
RobN3 <- InfRobModel(center=NbinomFamily(size=25, prob=estimate(est0)),
                     neighbor=ContNeighborhood(radius=0.5))
## bc) Compute optimally robust IC
IC9 <- optIC(model=RobN3, risk=asMSE())
## cc) Determine 1-step estimate
(est1c <- oneStepEstimator(x, IC=IC9, start=est0))


## instead of ac)-cc) you can also use function roptest
est1c1 <- roptest(x, NbinomFamily(size = 25), eps = 0.05, initial.est =
est0)
checkIC(pIC(est1c1))
## Using Kolmogorov MD estimator (default)
est1c2 <- roptest(x, NbinomFamily(size = 25), eps = 0.05, distance =
KolmogorovDist)
## Using Cramer-von-Mises MD estimator (default)
est1c3 <- roptest(x, NbinomFamily(size = 25), eps = 0.025)

## comparison of estimates
estimate(est1c)
estimate(est1c1)
estimate(est1c2)
estimate(est1c3)
## confidence intervals
confint(est1c, symmetricBias())
confint(est1c1, symmetricBias())
confint(est1c2, symmetricBias())
confint(est1c3, symmetricBias())

### total variation
## av) Define infinitesimal robust model
RobN4 <- InfRobModel(center=NbinomFamily(size=25, prob=estimate(est0)),
                neighbor=TotalVarNeighborhood(radius=0.25))
## bv) Compute optimally robust IC
IC10 <- optIC(model=RobN4, risk=asMSE())
## cv) Determine 1-step estimate
(est1v <- oneStepEstimator(x, IC=IC10, start=est0))

## instead of av)-cv) you can also use function roptest
est1v1 <- roptest(x, NbinomFamily(size = 25), eps = 0.025, initial.est =
est0,
                  neighbor = TotalVarNeighborhood())

## comparison of estimates
estimate(est1v)
estimate(est1v1)

## confidence intervals
confint(est1v, symmetricBias())
## ....
###############################################################################
## k-step (k >= 1) estimation
###############################################################################

## one-step estimation
## 1. generate a contaminated sample
ind <- rbinom(100, size=1, prob=0.05)
x <- rnbinom(100, size=25, prob=(1-ind)*0.25 + ind*0.01)

### MLE:
(estML <- MLEstimator(x=x, NbinomFamily(size=25)))

## 2. Kolmogorov(-Smirnov) minimum distance estimator
(est0 <- MDEstimator(x=x, NbinomFamily(size=25)))

## 3.1. one-step estimation: radius known

### gross error
## ac) Define infinitesimal robust model
RobN3 <- InfRobModel(center=NbinomFamily(size=25, prob=estimate(est0)),
                     neighbor=ContNeighborhood(radius=0.5))
## bc) Compute optimally robust IC
IC9 <- optIC(model=RobN3, risk=asMSE())
## cc) Determine 1-step estimate
(est1c <- oneStepEstimator(x, IC=IC9, start=est0))


## instead of ac)-cc) you can also use function roptest
est1c1 <- roptest(x, NbinomFamily(size = 25), eps = 0.05, initial.est =
est0)
checkIC(pIC(est1c1))
## Using Kolmogorov MD estimator (default)
est1c2 <- roptest(x, NbinomFamily(size = 25), eps = 0.05, distance =
KolmogorovDist)
## Using Cramer-von-Mises MD estimator (default)
est1c3 <- roptest(x, NbinomFamily(size = 25), eps = 0.025)

## comparison of estimates
estimate(est1c)
estimate(est1c1)
estimate(est1c2)
estimate(est1c3)
## confidence intervals
confint(est1c, symmetricBias())
confint(est1c1, symmetricBias())
confint(est1c2, symmetricBias())
confint(est1c3, symmetricBias())

### total variation
## av) Define infinitesimal robust model
RobN4 <- InfRobModel(center=NbinomFamily(size=25, prob=estimate(est0)),
                neighbor=TotalVarNeighborhood(radius=0.25))
## bv) Compute optimally robust IC
IC10 <- optIC(model=RobN4, risk=asMSE())
## cv) Determine 1-step estimate
(est1v <- oneStepEstimator(x, IC=IC10, start=est0))

## instead of av)-cv) you can also use function roptest
est1v1 <- roptest(x, NbinomFamily(size = 25), eps = 0.025, initial.est =
est0,
                  neighbor = TotalVarNeighborhood())

## comparison of estimates
estimate(est1v)
estimate(est1v1)

## confidence intervals
confint(est1v, symmetricBias())
## ....

## 3.2. k-step estimation: radius known
IC9 <- optIC(model=RobN3, risk=asMSE())
(est2c <- kStepEstimator(x, IC=IC9, start=est0, steps = 3L))
### or:
est2c1 <- roptest(x, NbinomFamily(size = 25), eps = 0.05, initial.est =
est0, steps = 3L)
## comparison of estimates
estimate(est2c)
estimate(est2c1)

## 4.1. one-step estimation: radius interval
IC11 <- radiusMinimaxIC(L2Fam=NbinomFamily(size=25, prob=estimate(est0)),
                neighbor=ContNeighborhood(), risk=asMSE(), loRad=0,
upRad=Inf)
(est3c <- oneStepEstimator(x, IC=IC11, start=est0))
## maximum radius for given sample size n: sqrt(n)*0.5

(est3c1 <- roptest(x, NbinomFamily(size = 25), eps.upper = 0.5))




More information about the R-SIG-Robust mailing list