[R] censor=FALSE and id options in survfit.coxph
    Andrews, Chris 
    chrisaa at med.umich.edu
       
    Tue Jun 25 22:07:45 CEST 2013
    
    
  
Terry,
I recently noticed the censor argument of survfit.  For some analyses it greatly reduces the size of the resulting object, which is a nice feature.  
However, when combined with the id argument, only 1 prediction is made.  Predictions can be made individually but I'd prefer to do them all at once if that change can be made.
Chris
#####################################
# CODE
# create data
set.seed(20130625)
n <- 100 # sample size
x <- rbinom(n, 1, 0.5) # covariate
z <- rep(0, n) # start time
y <- rexp(n, exp(x)) # event time
e <- y < 2 # censor at 2
y <- pmin(y, 2) # observation time
dat <- data.frame(x,z,y,e)
# fit cox model with start/stop format
library(survival)
mod <- coxph(Surv(z, y, e)~x, data=dat)
summary(mod)
# create prediction dataset with 3 individuals with
# x = 0 on (0,2)
# x = 1 on (0,2)
# x = 0 on (0,1) and x = 1 on (1,2)
datnew <- data.frame(x=c(0,1,0,1), z=c(0,0,0,1), y=c(2,2,1,2), e=rep(0,4), id=c(1,2,3,3))
datnew
# as expected
modsf1 <- survfit(mod, newdata=datnew, id=id)
modsf1
# not as expected 
modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)
modsf2
# for comparison
modsf3 <- survfit(mod, newdata=datnew[1:2,])
modsf3
# appears to work when individual=FALSE (id not specified)
modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)
modsf4
# visually
par(mfrow=c(2,2))
plot(modsf1, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf2, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf3, col=1:2, lty=1:2, conf.int=FALSE)
plot(modsf4, col=1:2, lty=1:2, conf.int=FALSE)
# Can be done individually
modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
modsf2a
modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
modsf2b
modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
modsf2c
# one at a time
par(mfrow=c(1,1))
plot(modsf2a, col=1, lty=1, conf.int=FALSE)
lines(modsf2b, col=2, lty=2, conf.int=FALSE)
lines(modsf2c, col=3, lty=3, conf.int=FALSE)
#####################################
# OUTPUT
> # create data
> 
> set.seed(20130625)
> n <- 100 # sample size
> x <- rbinom(n, 1, 0.5) # covariate
> z <- rep(0, n) # start time
> y <- rexp(n, exp(x)) # event time
> e <- y < 2 # censor at 2
> y <- pmin(y, 2) # observation time
> dat <- data.frame(x,z,y,e)
> # fit cox model with start/stop format
> library(survival)
> mod <- coxph(Surv(z, y, e)~x, data=dat)
> summary(mod)
Call:
coxph(formula = Surv(z, y, e) ~ x, data = dat)
  n= 100, number of events= 98 
    coef exp(coef) se(coef)     z Pr(>|z|)    
x 0.7162    2.0466   0.2091 3.425 0.000614 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  exp(coef) exp(-coef) lower .95 upper .95
x     2.047     0.4886     1.359     3.083
Concordance= 0.601  (se = 0.029 )
Rsquare= 0.109   (max possible= 0.999 )
Likelihood ratio test= 11.58  on 1 df,   p=0.0006666
Wald test            = 11.73  on 1 df,   p=0.0006137
Score (logrank) test = 12.18  on 1 df,   p=0.0004831
> # create prediction dataset with 3 individuals with
> # x = 0 on (0,2)
> # x = 1 on (0,2)
> # x = 0 on (0,1) and x = 1 on (1,2)
> datnew <- data.fra .... [TRUNCATED] 
> datnew
  x z y e id
1 0 0 2 0  1
2 1 0 2 0  2
3 0 0 1 0  3
4 1 1 2 0  3
> # as expected
> modsf1 <- survfit(mod, newdata=datnew, id=id)
> modsf1
Call: survfit(formula = mod, newdata = datnew, id = id)
     records n.max n.start events median 0.95LCL 0.95UCL
0        100   100     100     98  0.663   0.457   0.948
<NA>     100   100     100     98  0.333   0.288   0.457
<NA>     100   100     100     98  0.663   0.457   0.948
> # not as expected 
> modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)
> modsf2
Call: survfit(formula = mod, newdata = datnew, censor = FALSE, id = id)
records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000 294.000   0.663   0.457   0.948 
> # for comparison
> modsf3 <- survfit(mod, newdata=datnew[1:2,])
> modsf3
Call: survfit(formula = mod, newdata = datnew[1:2, ])
     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457
> # appears to work when individual=FALSE (id not specified)
> modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)
> modsf4
Call: survfit(formula = mod, newdata = datnew[1:2, ], censor = FALSE)
     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457
> modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
> modsf2a
Call: survfit(formula = mod, newdata = datnew[1, ], censor = FALSE, 
    id = id)
records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 
> modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
> modsf2b
Call: survfit(formula = mod, newdata = datnew[2, ], censor = FALSE, 
    id = id)
records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.333   0.288   0.457 
> modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
> modsf2c
Call: survfit(formula = mod, newdata = datnew[3:4, ], censor = FALSE, 
    id = id)
records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 
**********************************************************
Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues 
    
    
More information about the R-help
mailing list