[R] help with stepAIC and boot

Luca Braglia lbraglia at gmail.com
Fri Mar 7 09:31:42 CET 2014


Hello everybody.

I've a problem with stepAIC inside a boot function (i'm trying to do
bootstrap backward elimination, with a "statistic" function that allow one
to specify the formula of a coxph model)

For the moment the function would be aimed storing selected vars

However I've a problem in the stepAIC code (coxph seems to me to be
estimated)...the error message is

Error in terms.formula(formula, special, data = data) :
  object 'db' not found

aka stepAIC doesn't find bootstrap sample data

How can i fix it?

Many thanks in advance

-------------------------------------------------------------------------------------

library(boot)
library(MASS)
library(survival)

hn <-
structure(list(pfs.time = c(309, 519, 318, 1114, 671, 1539, 1110,
1798, 2213, 1802, 1170, 1259, 189, 1939, 361, 1311, 1537, 706,
1934, 595, 173, 1147, 389, 854, 744, 172, 1530, 1993, 341, 1468,
540, 562, 1079, 1747, 1920, 1018, 899, 591, 1722, 400, 1099,
755, 1679, 1049, 1261, 408, 1695, 788, 324, 505, 505, 1159, 691,
924, 443, 781, 504, 1031, 205, 115, 506, 642, 637, 1426, 404,
944, 651, 863, 1758, 859, 502, 1899, 1746, 1517, 1087, 160, 1803,
706, 122, 996, 869, 611, 2484, 2438, 1095, 114, 574, 830, 2236,
616, 180, 398, 879, 281, 761, 764, 594, 874, 886, 1959, 1424,
239, 1650, 1689, 310, 1865, 2159, 1459, 980, 379, 1668, 394,
787, 1326, 1931, 2435, 356, 708, 1019, 100, 289), pfs.dummy = c(1,
0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1), varA = c(12.8,
19, 15.4, 21.3, 18.6, 11.4, 0, 14.6, 7.9, 5.5, 23.5, 17.1, 13.2,
13.2, 30, 11, 12.8, 30.5, 13.6, 10.1, 13.5, 23.6, 11.1, 20.8,
6.1, 7, 13.3, 11.3, 13.9, 12, 8.3, 11.9, 5.2, 6.7, 28.4, 5.8,
22.5, 12.1, 12.4, 8.8, 20.2, 34.2, 5.1, 16, 24.7, 16.9, 0, 8.1,
0, 17.6, 6.5, 16.5, 22.6, 18.3, 12.1, 16.2, 14.9, 21.3, 15.9,
2.9, 9.9, 13.3, 0, 12.8, 19.9, 17.6, 21.8, 15.9, 20.5, 13.4,
14.1, 13, 5.4, 16.3, 18.7, 13, 21, 3.7, 23.6, 17.1, 6.1, 7.4,
0, 14.9, 11.5, 29.8, 8.8, 14, 13.1, 5.5, 16.4, 11.4, 6.3, 18.1,
11.9, 14.2, 25.8, 9.6, 5.3, 22.1, 12.5, 25.6, 17.5, 7.2, 5.7,
12.8, 11.6, 22.7, 17.4, 23.7, 6.7, 10.3, 13.6, 5, 16.4, 16.1,
20.5, 15.6, 6.7, 20, 14.7), varB = c(3.3, 4.13, 14.25, 5.2,
13.25, 5, 0, 19.1, 7.2, 24.2, 12.6, 11.1, 21.32, 13.6, 36.9,
9.98, 8.51, 9.4, 8.12, 3, 31.25, 10.3, 13.1, 9.4, 6.94, 10.9,
8.6, 6.2, 27, 2.5, 12.6, 17.2, 13.2, 4.6, 17.6, 3.3, 6.7, 4.9,
12.5, 5.4, 3.9, 11.7, 0.6, 26.3, 23.9, 15.9, 0, 13.1, 0, 4.45,
5.2, 18.8, 7.73, 12.4, 4.2, 2.71, 3.4, 8.5, 15.6, 18.4, 6.72,
31.15, 0, 8.1, 25.7, 5.8, 48.7, 7.43, 6.1, 9.4, 3.9, 15.1, 5.2,
7.2, 6.8, 7.4, 2.5, 6.16, 32.9, 6.5, 15.5, 3.96, 0, 4.2, 4.94,
14.7, 7.4, 8.6, 7.2, 20.5, 5.6, 8.75, 5.6, 10.2, 2.7, 7.4, 9.9,
6.14, 2.2, 36, 2.4, 4.6, 5.9, 4.9, 21.9, 9.1, 2.5, 33.2, 11.4,
5, 2.2, 2.3, 14.9, 6, 13.9, 4.7, 30.44, 3.4, 10.7, 7, 7.9), varC = c(24.6,
48.5, 135.1, 67.9, 156, 34.3, 0, 14.6, 33.2, 75.6, 192.8, 118.4,
174.1, 113.5, 699.4, 63.1, 68, 175.1, 66.3, 17.1, 265.6, 162.1,
87.1, 122.5, 23.3, 40.8, 65.1, 46, 241.6, 17.1, 64.5, 123.9,
37.9, 17.7, 284.1, 10.3, 95.3, 42.6, 92.2, 26.8, 45.2, 238.3,
2.3, 261, 380.4, 147.4, 0, 57, 0, 45.6, 19.1, 186.8, 107.9, 143.3,
27.9, 27.8, 30.1, 110.1, 161.3, 32.2, 42, 249, 0, 54.7, 313.1,
63.8, 656, 80, 81.5, 78.4, 31.4, 110.3, 14.9, 76.1, 78.5, 59.8,
31.7, 12.1, 457.9, 62.5, 49.5, 16, 0, 37, 345, 255, 35.9, 72,
54.5, 61.8, 55.9, 57.2, 18.4, 104.2, 18.2, 66.1, 156.1, 35.4,
7.2, 523.8, 17.7, 68.4, 52.8, 19.7, 74.6, 72.5, 19.2, 498.7,
121.7, 78.4, 10.8, 14, 133.2, 17.3, 133.9, 43.8, 375.2, 32.9,
44.3, 86.5, 63.8), varD = c(4.3, 10.3, 4.7, 5.4, 6.7, 2.7,
7, 9.2, 17.7, 0, 4.6, 0, 12.1, 5.5, 0, 3.4, 7, 7.2, 10.8, 8.3,
13.7, 19.6, 0, 11.8, 0, 7.5, 0, 0, 3.2, 0, 6.2, 9.6, 0, 4, 5.1,
0, 18.8, 16.1, 6.4, 0, 12.2, 15.8, 10.1, 13.9, 0, 11.7, 15.5,
0, 13.1, 11, 0, 0, 32.7, 6.3, 5.9, 0, 0, 12.8, 0, 0, 3.8, 7.3,
3.7, 18.1, 15.6, 14.9, 21.4, 13, 0, 8.1, 9.6, 8.1, 0, 13.1, 12.1,
10.7, 8.5, 2.9, 0, 15.4, 0, 3.3, 4.8, 0, 7.5, 38.9, 3.6, 6.7,
5.7, 4.4, 18.9, 11.5, 0, 15.2, 0, 8.5, 0, 6.4, 0, 17.6, 0, 26.7,
4.6, 4.4, 0, 5.9, 9.2, 20.2, 0, 0, 10.1, 7, 11.3, 0, 4.4, 10.2,
11.2, 0, 0, 0, 4), varE = c(1.9, 3.55, 1.96, 3.1, 1.5, 0, 8.3,
0.7, 4.3, 0, 2.1, 0, 11.69, 2, 0, 2.5, 4.4, 0.8, 0.8, 9.4, 19.19,
14.2, 0, 2.3, 0, 6.7, 0, 0, 1.7, 0, 1.6, 8.3, 0, 20.6, 1.8, 0,
12.5, 15.1, 1.6, 0, 1.6, 2, 25.3, 4.2, 0, 6.55, 16.7, 0, 8.7,
4.84, 0, 0, 16.72, 8.4, 1.5, 0, 0, 3.8, 0, 0, 3.89, 1.5, 4, 15.65,
4.3, 2.5, 10.9, 3.23, 0, 2.9, 7.7, 8.3, 0, 15.6, 5.2, 3.9, 1.6,
3.52, 0, 5.4, 0, 4.3, 2.7, 0, 8, 5.1, 0.9, 2.35, 0.6, 14.2, 10.7,
8.7, 0, 10.9, 0, 2.3, 0, 1.8, 0, 8.7, 0, 16.2, 0.1, 2.8, 0, 0.2,
8.3, 7.3, 0, 0, 10.3, 0.6, 7.1, 0, 5.3, 8.2, 0.83, 0, 0, 0, 1.8
), varF = c(4.5, 21.4, 5.1, 9.4, 6.1, 0, 33.6, 5.2, 47.4, 0,
7.1, 0, 83, 6.6, 0, 4.7, 14.2, 3.2, 4.7, 44.3, 147.5, 157.1,
0, 14.9, 0, 27.4, 0, 0, 3, 0, 5.6, 42.1, 0, 43.7, 5.3, 0, 150.2,
140.9, 5.8, 0, 11.7, 15.8, 145, 33.7, 0, 44.7, 172.2, 0, 55.1,
28.4, 0, 0, 338.2, 41.1, 5.1, 0, 0, 30.9, 0, 0, 7.7, 6.1, 7.7,
180.1, 10.1, 23.6, 141, 26.6, 0, 13.2, 44.8, 44.3, 0, 118.7,
36.9, 26.2, 8.2, 6, 0, 45.8, 0, 7.6, 6.7, 0, 35, 119.1, 2.1,
8.6, 3.3, 32.8, 129.4, 55.4, 0, 100.2, 0, 12.1, 0, 7.2, 0, 98.2,
0, 267.3, 0.4, 8.1, 0, 1, 48, 83.9, 0, 0, 66.5, 2.5, 46.1, 0,
12.5, 54.8, 5.6, 0, 0, 0, 3.7)), .Names = c("pfs.time", "pfs.dummy",
"varA", "varB", "varC", "varD", "varE", "varF"), class = "data.frame",
row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L, 69L, 70L, 71L,
72L, 74L, 75L, 76L, 77L, 79L, 80L, 82L, 83L, 84L, 85L, 86L, 87L,
88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L,
112L, 113L, 114L, 115L, 116L, 117L, 119L, 120L, 121L, 122L, 123L,
124L, 125L, 126L, 127L, 128L, 129L))

boot.selection <- function(db, indices, model) {

    db <- db[indices, ]

    step.mod <- stepAIC(coxph(model, data=db),
                     direction = "backward",
                     trace=FALSE)

    return(c(attributes(terms(model))$term.labels) %in%
           names(coef(step.mod)))

}

modello <-  Surv(pfs.time, pfs.dummy) ~ varA + varB + varC +
            varD + varE + varF


# debug(boot)
# debug(boot.selection)
boot.test <- boot(data = hn,
                  statistic = boot.selection,
                  R = 10,
                  # parallel="multicore",
                  # ncpus = 8,
                  model = modello
                  )




More information about the R-help mailing list