[R] censor=FALSE and id options in survfit.coxph

Andrews, Chris chrisaa at med.umich.edu
Tue Jun 25 22:07:45 CEST 2013


Terry,

I recently noticed the censor argument of survfit.  For some analyses it greatly reduces the size of the resulting object, which is a nice feature.  

However, when combined with the id argument, only 1 prediction is made.  Predictions can be made individually but I'd prefer to do them all at once if that change can be made.

Chris

#####################################
# CODE
# create data

set.seed(20130625)
n <- 100 # sample size
x <- rbinom(n, 1, 0.5) # covariate
z <- rep(0, n) # start time
y <- rexp(n, exp(x)) # event time
e <- y < 2 # censor at 2
y <- pmin(y, 2) # observation time
dat <- data.frame(x,z,y,e)


# fit cox model with start/stop format
library(survival)
mod <- coxph(Surv(z, y, e)~x, data=dat)
summary(mod)

# create prediction dataset with 3 individuals with
# x = 0 on (0,2)
# x = 1 on (0,2)
# x = 0 on (0,1) and x = 1 on (1,2)
datnew <- data.frame(x=c(0,1,0,1), z=c(0,0,0,1), y=c(2,2,1,2), e=rep(0,4), id=c(1,2,3,3))
datnew

# as expected
modsf1 <- survfit(mod, newdata=datnew, id=id)
modsf1

# not as expected 
modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)
modsf2

# for comparison
modsf3 <- survfit(mod, newdata=datnew[1:2,])
modsf3

# appears to work when individual=FALSE (id not specified)
modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)
modsf4

# visually
par(mfrow=c(2,2))
plot(modsf1, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf2, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf3, col=1:2, lty=1:2, conf.int=FALSE)
plot(modsf4, col=1:2, lty=1:2, conf.int=FALSE)


# Can be done individually
modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
modsf2a
modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
modsf2b
modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
modsf2c

# one at a time
par(mfrow=c(1,1))
plot(modsf2a, col=1, lty=1, conf.int=FALSE)
lines(modsf2b, col=2, lty=2, conf.int=FALSE)
lines(modsf2c, col=3, lty=3, conf.int=FALSE)

#####################################
# OUTPUT


> # create data
> 
> set.seed(20130625)

> n <- 100 # sample size

> x <- rbinom(n, 1, 0.5) # covariate

> z <- rep(0, n) # start time

> y <- rexp(n, exp(x)) # event time

> e <- y < 2 # censor at 2

> y <- pmin(y, 2) # observation time

> dat <- data.frame(x,z,y,e)

> # fit cox model with start/stop format
> library(survival)

> mod <- coxph(Surv(z, y, e)~x, data=dat)

> summary(mod)
Call:
coxph(formula = Surv(z, y, e) ~ x, data = dat)

  n= 100, number of events= 98 

    coef exp(coef) se(coef)     z Pr(>|z|)    
x 0.7162    2.0466   0.2091 3.425 0.000614 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  exp(coef) exp(-coef) lower .95 upper .95
x     2.047     0.4886     1.359     3.083

Concordance= 0.601  (se = 0.029 )
Rsquare= 0.109   (max possible= 0.999 )
Likelihood ratio test= 11.58  on 1 df,   p=0.0006666
Wald test            = 11.73  on 1 df,   p=0.0006137
Score (logrank) test = 12.18  on 1 df,   p=0.0004831


> # create prediction dataset with 3 individuals with
> # x = 0 on (0,2)
> # x = 1 on (0,2)
> # x = 0 on (0,1) and x = 1 on (1,2)
> datnew <- data.fra .... [TRUNCATED] 

> datnew
  x z y e id
1 0 0 2 0  1
2 1 0 2 0  2
3 0 0 1 0  3
4 1 1 2 0  3

> # as expected
> modsf1 <- survfit(mod, newdata=datnew, id=id)

> modsf1
Call: survfit(formula = mod, newdata = datnew, id = id)

     records n.max n.start events median 0.95LCL 0.95UCL
0        100   100     100     98  0.663   0.457   0.948
<NA>     100   100     100     98  0.333   0.288   0.457
<NA>     100   100     100     98  0.663   0.457   0.948

> # not as expected 
> modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)

> modsf2
Call: survfit(formula = mod, newdata = datnew, censor = FALSE, id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000 294.000   0.663   0.457   0.948 

> # for comparison
> modsf3 <- survfit(mod, newdata=datnew[1:2,])

> modsf3
Call: survfit(formula = mod, newdata = datnew[1:2, ])

     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457

> # appears to work when individual=FALSE (id not specified)
> modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)

> modsf4
Call: survfit(formula = mod, newdata = datnew[1:2, ], censor = FALSE)

     records n.max n.start events median 0.95LCL 0.95UCL
[1,]     100   100     100     98  0.663   0.457   0.948
[2,]     100   100     100     98  0.333   0.288   0.457

> modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
> modsf2a
Call: survfit(formula = mod, newdata = datnew[1, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 
> modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
> modsf2b
Call: survfit(formula = mod, newdata = datnew[2, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.333   0.288   0.457 
> modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
> modsf2c
Call: survfit(formula = mod, newdata = datnew[3:4, ], censor = FALSE, 
    id = id)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
100.000 100.000 100.000  98.000   0.663   0.457   0.948 

**********************************************************
Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues 



More information about the R-help mailing list