[R] bootstrapping quantile regression

Frank Harrell f.harrell at vanderbilt.edu
Wed Oct 31 14:18:49 CET 2012


A piece of this is solved by the rms package's Rq and bootcov functions. 
-Frank

Roger Koenker-3 wrote
> There is no automatic "clustering" option for QR bootstrapping.
> You will have to roll your own.
> 
> 
> url:    www.econ.uiuc.edu/~roger            Roger Koenker
> email    

> rkoenker@

>             Department of Economics
> vox:     217-333-4558                University of Illinois
> fax:       217-244-6678                Urbana, IL 61801
> 
> On Oct 31, 2012, at 1:38 AM, Kay Cichini wrote:
> 
>> sry, I forgot to replace rlm() - but actually I tried both and the
>> question
>> applies to both approaches..
>> 
>> Am 31.10.2012 00:19 schrieb "Kay Cichini" <

> kay.cichini@

> >:
>>> 
>>> HI everyone,
>>> 
>>> I try to get some bootstrap CIs for coefficients obtained by quantile
>> regression. I have influencial values and thus switched to quantreg..
>>> The data is clustered and within clusters the variance of my DV = 0..
>>> 
>>> Is this sensible for the below data? And what about the warnings?
>>> 
>>> Thanks in advance for any guidance,
>>> Kay
>>> 
>>>> dput(d)
>>> structure(list(Porenfläche = c(4990L, 7002L, 7558L, 7352L, 7943L,
>>> 7979L, 9333L, 8209L, 8393L, 6425L, 9364L, 8624L, 10651L, 8868L,
>>> 9417L, 8874L, 10962L, 10743L, 11878L, 9867L, 7838L, 11876L, 12212L,
>>> 8233L, 6360L, 4193L, 7416L, 5246L, 6509L, 4895L, 6775L, 7894L,
>>> 5980L, 5318L, 7392L, 7894L, 3469L, 1468L, 3524L, 5267L, 5048L,
>>> 1016L, 5605L, 8793L, 3475L, 1651L, 5514L, 9718L), P.Perimeter =
>>> c(2791.9,
>>> 3892.6, 3930.66, 3869.32, 3948.54, 4010.15, 4345.75, 4344.75,
>>> 3682.04, 3098.65, 4480.05, 3986.24, 4036.54, 3518.04, 3999.37,
>>> 3629.07, 4608.66, 4787.62, 4864.22, 4479.41, 3428.74, 4353.14,
>>> 4697.65, 3518.44, 1977.39, 1379.35, 1916.24, 1585.42, 1851.21,
>>> 1239.66, 1728.14, 1461.06, 1426.76, 990.388, 1350.76, 1461.06,
>>> 1376.7, 476.322, 1189.46, 1644.96, 941.543, 308.642, 1145.69,
>>> 2280.49, 1174.11, 597.808, 1455.88, 1485.58), P.Form = c(0.0903296,
>>> 0.148622, 0.183312, 0.117063, 0.122417, 0.167045, 0.189651, 0.164127,
>>> 0.203654, 0.162394, 0.150944, 0.148141, 0.228595, 0.231623, 0.172567,
>>> 0.153481, 0.204314, 0.262727, 0.200071, 0.14481, 0.113852, 0.291029,
>>> 0.240077, 0.161865, 0.280887, 0.179455, 0.191802, 0.133083, 0.225214,
>>> 0.341273, 0.311646, 0.276016, 0.197653, 0.326635, 0.154192, 0.276016,
>>> 0.176969, 0.438712, 0.163586, 0.253832, 0.328641, 0.230081, 0.464125,
>>> 0.420477, 0.200744, 0.262651, 0.182453, 0.200447), Durchlässigkeit =
>> c(6.3,
>>> 6.3, 6.3, 6.3, 17.1, 17.1, 17.1, 17.1, 119, 119, 119, 119, 82.4,
>>> 82.4, 82.4, 82.4, 58.6, 58.6, 58.6, 58.6, 142, 142, 142, 142,
>>> 740, 740, 740, 740, 890, 890, 890, 890, 950, 950, 950, 950, 100,
>>> 100, 100, 100, 1300, 1300, 1300, 1300, 580, 580, 580, 580), Gebiete =
>> structure(c(1L,
>>> 1L, 1L, 1L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 3L,
>>> 3L, 3L, 3L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
>>> 11L, 11L, 11L, 11L, 5L, 5L, 5L, 5L, 12L, 12L, 12L, 12L, 8L, 8L,
>>> 8L, 8L), .Label = c("6.3", "17.1", "58.6", "82.4", "100", "119",
>>> "142", "580", "740", "890", "950", "1300"), class = "factor")), .Names =
>> c("Porenfläche",
>>> "P.Perimeter", "P.Form", "Durchlässigkeit", "Gebiete"), row.names =
>> c("1",
>>> "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
>>> "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
>>> "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
>>> "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
>>> "47", "48"), class = "data.frame")
>>> 
>>> ## do quantile regression and bootstrap the coefficients, allowing for
>> clustered data
>>> ## by putting "Gebiet" as strata argument (?),
>>> ## dv variation within clusters/Gebiet = 0!
>>> bs <- function(formula, data, indices) {
>>>  d <- data[indices, ] # allows boot to select sample
>>>  fit <- rlm(formula, data = d)
>>>  return(coef(fit))
>>> }
>>> 
>>> results <- boot(data = d, statistic = bs, strata = d$Gebiete,
>>>                R = 199, formula = Durchlässigkeit ~ P.Perimeter +
>>> P.Form)
>>> 
>>> # get 99% confidence intervals
>>> boot.ci(results, type="bca", index=1, conf = .99) # intercept
>>> boot.ci(results, type="bca", index=2, conf = .99) # P.Perimeter
>>> boot.ci(results, type="bca", index=3, conf = .99) # P.Form
>>> 
>>> --
>>> 
>>> Kay Cichini, MSc Biol
>>> 
>>> Grubenweg 22, 6071 Aldrans
>>> 
>>> E-Mail: 

> kay.cichini@

>>> --
>>> 
>>> 
>> 
>> 	[[alternative HTML version deleted]]
>> 
>> ______________________________________________
>> 

> R-help@

>  mailing list
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
> 
> ______________________________________________

> R-help@

>  mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.





-----
Frank Harrell
Department of Biostatistics, Vanderbilt University
--
View this message in context: http://r.789695.n4.nabble.com/bootstrapping-quantile-regression-tp4647948p4648001.html
Sent from the R help mailing list archive at Nabble.com.




More information about the R-help mailing list