[R] extracting bootstrap statistics by group with loop

Rui Barradas ru|pb@rr@d@@ @end|ng |rom @@po@pt
Sun Oct 31 17:48:08 CET 2021


Hello,

Try to aggregate with ?by.


bootprop <- function(data, index){
   d <- data[index, ]
   sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
}
boot_mean_se <- function(data, statistic, R){
   b <- boot::boot(DaT, bootprop, R = R)
   c(bootMean = mean(b$t), bootSE = sd(b$t))
}

boot_year <- by(DaT, DaT$Year, boot_mean_se, statistic = bootprop, R = R)
boot_year_area <- by(DaT,
                      INDICES = list(Year = DaT$Year, Area = DaT$Area),
                      FUN = boot_mean_se,
                      statistic = bootprop, R = R)
boot_year
boot_year_area

boot_year <- do.call(rbind, boot_year)

d <- dimnames(boot_year_area)
boot_year_area <- cbind(Reduce(expand.grid, rev(d))[2:1],
                         do.call(rbind, boot_year_area))
names(boot_year_area)[1:2] <- names(d)
boot_year_area


Hope this helps,

Rui Barradas

Às 11:47 de 31/10/21, Marna Wagley escreveu:
> Hi R users,
> I was trying to extract the bootstrap mean and its SE by group but I have
> been doing  it by separating the group manually. The data set is big so
> doing it manually is a kind of tedious task. I am wondering whether there
> is a possibility to do it by creating a loop. I am weak in writing loop
> functions. I am attaching an example data and how I performed the
> analysis, see below.
> Thanks for your help.
> Sincerely,
> MW
> ####
> library(boot)
> DaT<-structure(list(bothTimes = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
> 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L),
> total = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Area = c("A",
> "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A",
> "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"), Year = c(2015L, 2015L,
> 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
> 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
> 2016L, 2016L, 2016L, 2016L, 2016L)), class = "data.frame", row.names =
> c(NA, -24L))
> 
> head(DaT)
> R=100
> bootprop <- function(data, index){
>    d <- data[index, ]
>    sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
> }
> 
> ###################
> #2015
> ###################
> #-----Year2015_pooled
> Y2015_pooled<-subset(DaT, DaT$Year=="2015")
> Y2015_pooled_boot <- boot(Y2015_pooled, bootprop, R)
> boot_Y2015_pooled<-data.frame(Year="2015", Area= "Pooled", bootMean=
> Y2015_pooled_boot$t0, SE=sd(Y2015_pooled_boot$t))
> #-----Year2015_AreaA
> Y2015_A<-subset(DaT, DaT$Year=="2015" & DaT$Area=="A")
> Y2015_A_boot <- boot(Y2015_A, bootprop, R)
> boot_Y2015_A<-data.frame(Year="2015", Area= "A", bootMean= Y2015_A_boot$t0,
> SE=sd(Y2015_A_boot$t))
> #----Year2015_AreaB
> Y2015_B<-subset(DaT, DaT$Year=="2015" & DaT$Area=="B")
> Y2015_B_boot <- boot(Y2015_B, bootprop, R)
> boot_Y2015_B<-data.frame(Year="2015", Area= "B", bootMean= Y2015_B_boot$t0,
> SE=sd(Y2015_B_boot$t))
> ###################
> #2016
> ###################
> #-----Year2016_pooled
> Y2016_pooled<-subset(DaT, DaT$Year=="2016")
> Y2016_pooled_boot <- boot(Y2016_pooled, bootprop, R)
> boot_Y2016_pooled<-data.frame(Year="2016", Area= "Pooled", bootMean=
> Y2016_pooled_boot$t0, SE=sd(Y2016_pooled_boot$t))
> 
> #-----Year2016_AreaA
> Y2016_A<-subset(DaT, DaT$Year=="2016" & DaT$Area=="A")
> Y2016_A_boot <- boot(Y2016_A, bootprop, R)
> 
> boot_Y2016_A<-data.frame(Year="2016", Area= "A", bootMean= Y2016_A_boot$t0,
> SE=sd(Y2016_A_boot$t))
> #----Year2016_AreaB
> Y2016_B<-subset(DaT, DaT$Year=="2016" & DaT$Area=="B")
> Y2016_B_boot <- boot(Y2016_B, bootprop, R)
> boot_Y2016_B<-data.frame(Year="2016", Area= "B", bootMean= Y2016_B_boot$t0,
> SE=sd(Y2016_B_boot$t))
> 
> ## output data.matrix
> BootMean_All<-rbind(boot_Y2015_pooled,boot_Y2015_A,boot_Y2015_B,boot_Y2016_pooled,boot_Y2016_A,boot_Y2016_B)
> BootMean_All
> 
> 	[[alternative HTML version deleted]]
> 
> ______________________________________________
> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



More information about the R-help mailing list