[R] censor=FALSE and id options in survfit.coxph

Andrews, Chris chrisaa at med.umich.edu
Tue Jun 25 22:07:45 CEST 2013


I recently noticed the censor argument of survfit.  For some analyses it greatly reduces the size of the resulting object, which is a nice feature.  

However, when combined with the id argument, only 1 prediction is made.  Predictions can be made individually but I'd prefer to do them all at once if that change can be made.


# create data

n <- 100 # sample size
x <- rbinom(n, 1, 0.5) # covariate
z <- rep(0, n) # start time
y <- rexp(n, exp(x)) # event time
e <- y < 2 # censor at 2
y <- pmin(y, 2) # observation time
dat <- data.frame(x,z,y,e)

# fit cox model with start/stop format
mod <- coxph(Surv(z, y, e)~x, data=dat)

# create prediction dataset with 3 individuals with
# x = 0 on (0,2)
# x = 1 on (0,2)
# x = 0 on (0,1) and x = 1 on (1,2)
datnew <- data.frame(x=c(0,1,0,1), z=c(0,0,0,1), y=c(2,2,1,2), e=rep(0,4), id=c(1,2,3,3))

# as expected
modsf1 <- survfit(mod, newdata=datnew, id=id)

# not as expected 
modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)

# for comparison
modsf3 <- survfit(mod, newdata=datnew[1:2,])

# appears to work when individual=FALSE (id not specified)
modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)

# visually
plot(modsf1, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf2, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf3, col=1:2, lty=1:2, conf.int=FALSE)
plot(modsf4, col=1:2, lty=1:2, conf.int=FALSE)

# Can be done individually
modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)

# one at a time
plot(modsf2a, col=1, lty=1, conf.int=FALSE)
lines(modsf2b, col=2, lty=2, conf.int=FALSE)
lines(modsf2c, col=3, lty=3, conf.int=FALSE)


> # create data
> set.seed(20130625)

> n <- 100 # sample size

> x <- rbinom(n, 1, 0.5) # covariate

> z <- rep(0, n) # start time

> y <- rexp(n, exp(x)) # event time

> e <- y < 2 # censor at 2

> y <- pmin(y, 2) # observation time

> dat <- data.frame(x,z,y,e)

> # fit cox model with start/stop format
> library(survival)

> mod <- coxph(Surv(z, y, e)~x, data=dat)

> summary(mod)
coxph(formula = Surv(z, y, e) ~ x, data = dat)

  n= 100, number of events= 98 

    coef exp(coef) se(coef)     z Pr(>|z|)    
x 0.7162    2.0466   0.2091 3.425 0.000614 ***
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  exp(coef) exp(-coef) lower .95 upper .95
x     2.047     0.4886     1.359     3.083

Concordance= 0.601  (se = 0.029 )
Rsquare= 0.109   (max possible= 0.999 )
Likelihood ratio test= 11.58  on 1 df,   p=0.0006666
Wald test            = 11.73  on 1 df,   p=0.0006137
Score (logrank) test = 12.18  on 1 df,   p=0.0004831

> # create prediction dataset with 3 individuals with
> # x = 0 on (0,2)
> # x = 1 on (0,2)
> # x = 0 on (0,1) and x = 1 on (1,2)
> datnew <- data.fra .... [TRUNCATED] 

> datnew
  x z y e id
1 0 0 2 0  1
2 1 0 2 0  2
3 0 0 1 0  3
4 1 1 2 0  3

> # as expected
> modsf1 <- survfit(mod, newdata=datnew, id=id)

> modsf1
Call: survfit(formula = mod, newdata = datnew, id = id)

     records n.max n.start events median 0.95LCL 0.95UCL
0        100   100     100     98  0.663   0.457   0.948
<NA>     100   100     100     98  0.333   0.288   0.457
<NA>     100   100     100     98  0.663   0.457   0.948

> # not as expected 
> modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)

> modsf2
Call: survfit(formula = mod, newdata = datnew, censor = FALSE, id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000 294.000   0.663   0.457   0.948 

> # for comparison
> modsf3 <- survfit(mod, newdata=datnew[1:2,])

> modsf3
Call: survfit(formula = mod, newdata = datnew[1:2, ])

     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457

> # appears to work when individual=FALSE (id not specified)
> modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)

> modsf4
Call: survfit(formula = mod, newdata = datnew[1:2, ], censor = FALSE)

     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457

> modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
> modsf2a
Call: survfit(formula = mod, newdata = datnew[1, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 
> modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
> modsf2b
Call: survfit(formula = mod, newdata = datnew[2, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.333   0.288   0.457 
> modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
> modsf2c
Call: survfit(formula = mod, newdata = datnew[3:4, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 

Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues 

More information about the R-help mailing list