[R] extracting bootstrap statistics by group with loop

Rui Barradas ru|pb@rr@d@@ @end|ng |rom @@po@pt
Sun Oct 31 17:55:01 CET 2021


Hello,

Sorry, bug. In both by instructions it's boot_mean_se, not bootprop.


boot_year <- by(DaT, DaT$Year, boot_mean_se, statistic = bootprop, R = R)
boot_year_area <- by(DaT,
                      INDICES = list(Year = DaT$Year, Area = DaT$Area),
                      FUN = boot_mean_se,
                      statistic = bootprop, R = R)


Hope this helps,

Rui Barradas

Às 16:48 de 31/10/21, Rui Barradas escreveu:
> Hello,
> 
> Try to aggregate with ?by.
> 
> 
> bootprop <- function(data, index){
>    d <- data[index, ]
>    sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
> }
> boot_mean_se <- function(data, statistic, R){
>    b <- boot::boot(DaT, bootprop, R = R)
>    c(bootMean = mean(b$t), bootSE = sd(b$t))
> }
> 
> boot_year <- by(DaT, DaT$Year, boot_mean_se, statistic = bootprop, R = R)
> boot_year_area <- by(DaT,
>                       INDICES = list(Year = DaT$Year, Area = DaT$Area),
>                       FUN = boot_mean_se,
>                       statistic = bootprop, R = R)
> boot_year
> boot_year_area
> 
> boot_year <- do.call(rbind, boot_year)
> 
> d <- dimnames(boot_year_area)
> boot_year_area <- cbind(Reduce(expand.grid, rev(d))[2:1],
>                          do.call(rbind, boot_year_area))
> names(boot_year_area)[1:2] <- names(d)
> boot_year_area
> 
> 
> Hope this helps,
> 
> Rui Barradas
> 
> Às 11:47 de 31/10/21, Marna Wagley escreveu:
>> Hi R users,
>> I was trying to extract the bootstrap mean and its SE by group but I have
>> been doing  it by separating the group manually. The data set is big so
>> doing it manually is a kind of tedious task. I am wondering whether there
>> is a possibility to do it by creating a loop. I am weak in writing loop
>> functions. I am attaching an example data and how I performed the
>> analysis, see below.
>> Thanks for your help.
>> Sincerely,
>> MW
>> ####
>> library(boot)
>> DaT<-structure(list(bothTimes = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
>> 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L),
>> total = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
>> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Area = c("A",
>> "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A",
>> "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"), Year = c(2015L, 2015L,
>> 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
>> 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
>> 2016L, 2016L, 2016L, 2016L, 2016L)), class = "data.frame", row.names =
>> c(NA, -24L))
>>
>> head(DaT)
>> R=100
>> bootprop <- function(data, index){
>>    d <- data[index, ]
>>    sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
>> }
>>
>> ###################
>> #2015
>> ###################
>> #-----Year2015_pooled
>> Y2015_pooled<-subset(DaT, DaT$Year=="2015")
>> Y2015_pooled_boot <- boot(Y2015_pooled, bootprop, R)
>> boot_Y2015_pooled<-data.frame(Year="2015", Area= "Pooled", bootMean=
>> Y2015_pooled_boot$t0, SE=sd(Y2015_pooled_boot$t))
>> #-----Year2015_AreaA
>> Y2015_A<-subset(DaT, DaT$Year=="2015" & DaT$Area=="A")
>> Y2015_A_boot <- boot(Y2015_A, bootprop, R)
>> boot_Y2015_A<-data.frame(Year="2015", Area= "A", bootMean= 
>> Y2015_A_boot$t0,
>> SE=sd(Y2015_A_boot$t))
>> #----Year2015_AreaB
>> Y2015_B<-subset(DaT, DaT$Year=="2015" & DaT$Area=="B")
>> Y2015_B_boot <- boot(Y2015_B, bootprop, R)
>> boot_Y2015_B<-data.frame(Year="2015", Area= "B", bootMean= 
>> Y2015_B_boot$t0,
>> SE=sd(Y2015_B_boot$t))
>> ###################
>> #2016
>> ###################
>> #-----Year2016_pooled
>> Y2016_pooled<-subset(DaT, DaT$Year=="2016")
>> Y2016_pooled_boot <- boot(Y2016_pooled, bootprop, R)
>> boot_Y2016_pooled<-data.frame(Year="2016", Area= "Pooled", bootMean=
>> Y2016_pooled_boot$t0, SE=sd(Y2016_pooled_boot$t))
>>
>> #-----Year2016_AreaA
>> Y2016_A<-subset(DaT, DaT$Year=="2016" & DaT$Area=="A")
>> Y2016_A_boot <- boot(Y2016_A, bootprop, R)
>>
>> boot_Y2016_A<-data.frame(Year="2016", Area= "A", bootMean= 
>> Y2016_A_boot$t0,
>> SE=sd(Y2016_A_boot$t))
>> #----Year2016_AreaB
>> Y2016_B<-subset(DaT, DaT$Year=="2016" & DaT$Area=="B")
>> Y2016_B_boot <- boot(Y2016_B, bootprop, R)
>> boot_Y2016_B<-data.frame(Year="2016", Area= "B", bootMean= 
>> Y2016_B_boot$t0,
>> SE=sd(Y2016_B_boot$t))
>>
>> ## output data.matrix
>> BootMean_All<-rbind(boot_Y2015_pooled,boot_Y2015_A,boot_Y2015_B,boot_Y2016_pooled,boot_Y2016_A,boot_Y2016_B) 
>>
>> BootMean_All
>>
>>     [[alternative HTML version deleted]]
>>
>> ______________________________________________
>> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide 
>> http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>>
> 
> ______________________________________________
> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide 
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list