# [R] extracting bootstrap statistics by group with loop

Rui Barradas ru|pb@rr@d@@ @end|ng |rom @@po@pt
Sun Oct 31 17:48:08 CET 2021

```Hello,

Try to aggregate with ?by.

bootprop <- function(data, index){
d <- data[index, ]
sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
}
boot_mean_se <- function(data, statistic, R){
b <- boot::boot(DaT, bootprop, R = R)
c(bootMean = mean(b\$t), bootSE = sd(b\$t))
}

boot_year <- by(DaT, DaT\$Year, boot_mean_se, statistic = bootprop, R = R)
boot_year_area <- by(DaT,
INDICES = list(Year = DaT\$Year, Area = DaT\$Area),
FUN = boot_mean_se,
statistic = bootprop, R = R)
boot_year
boot_year_area

boot_year <- do.call(rbind, boot_year)

d <- dimnames(boot_year_area)
boot_year_area <- cbind(Reduce(expand.grid, rev(d))[2:1],
do.call(rbind, boot_year_area))
names(boot_year_area)[1:2] <- names(d)
boot_year_area

Hope this helps,

Às 11:47 de 31/10/21, Marna Wagley escreveu:
> Hi R users,
> I was trying to extract the bootstrap mean and its SE by group but I have
> been doing  it by separating the group manually. The data set is big so
> doing it manually is a kind of tedious task. I am wondering whether there
> is a possibility to do it by creating a loop. I am weak in writing loop
> functions. I am attaching an example data and how I performed the
> analysis, see below.
> Sincerely,
> MW
> ####
> library(boot)
> DaT<-structure(list(bothTimes = c(0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L,
> 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L),
> total = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Area = c("A",
> "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A",
> "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"), Year = c(2015L, 2015L,
> 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
> 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
> 2016L, 2016L, 2016L, 2016L, 2016L)), class = "data.frame", row.names =
> c(NA, -24L))
>
> R=100
> bootprop <- function(data, index){
>    d <- data[index, ]
>    sum(d[["bothTimes"]], na.rm = TRUE)/sum(d[["total"]], na.rm = TRUE)#
> }
>
> ###################
> #2015
> ###################
> #-----Year2015_pooled
> Y2015_pooled<-subset(DaT, DaT\$Year=="2015")
> Y2015_pooled_boot <- boot(Y2015_pooled, bootprop, R)
> boot_Y2015_pooled<-data.frame(Year="2015", Area= "Pooled", bootMean=
> Y2015_pooled_boot\$t0, SE=sd(Y2015_pooled_boot\$t))
> #-----Year2015_AreaA
> Y2015_A<-subset(DaT, DaT\$Year=="2015" & DaT\$Area=="A")
> Y2015_A_boot <- boot(Y2015_A, bootprop, R)
> boot_Y2015_A<-data.frame(Year="2015", Area= "A", bootMean= Y2015_A_boot\$t0,
> SE=sd(Y2015_A_boot\$t))
> #----Year2015_AreaB
> Y2015_B<-subset(DaT, DaT\$Year=="2015" & DaT\$Area=="B")
> Y2015_B_boot <- boot(Y2015_B, bootprop, R)
> boot_Y2015_B<-data.frame(Year="2015", Area= "B", bootMean= Y2015_B_boot\$t0,
> SE=sd(Y2015_B_boot\$t))
> ###################
> #2016
> ###################
> #-----Year2016_pooled
> Y2016_pooled<-subset(DaT, DaT\$Year=="2016")
> Y2016_pooled_boot <- boot(Y2016_pooled, bootprop, R)
> boot_Y2016_pooled<-data.frame(Year="2016", Area= "Pooled", bootMean=
> Y2016_pooled_boot\$t0, SE=sd(Y2016_pooled_boot\$t))
>
> #-----Year2016_AreaA
> Y2016_A<-subset(DaT, DaT\$Year=="2016" & DaT\$Area=="A")
> Y2016_A_boot <- boot(Y2016_A, bootprop, R)
>
> boot_Y2016_A<-data.frame(Year="2016", Area= "A", bootMean= Y2016_A_boot\$t0,
> SE=sd(Y2016_A_boot\$t))
> #----Year2016_AreaB
> Y2016_B<-subset(DaT, DaT\$Year=="2016" & DaT\$Area=="B")
> Y2016_B_boot <- boot(Y2016_B, bootprop, R)
> boot_Y2016_B<-data.frame(Year="2016", Area= "B", bootMean= Y2016_B_boot\$t0,
> SE=sd(Y2016_B_boot\$t))
>
> ## output data.matrix
> BootMean_All<-rbind(boot_Y2015_pooled,boot_Y2015_A,boot_Y2015_B,boot_Y2016_pooled,boot_Y2016_A,boot_Y2016_B)
> BootMean_All
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help