[R] censor=FALSE and id options in survfit.coxph
Andrews, Chris
chrisaa at med.umich.edu
Tue Jun 25 22:07:45 CEST 2013
Terry,
I recently noticed the censor argument of survfit. For some analyses it greatly reduces the size of the resulting object, which is a nice feature.
However, when combined with the id argument, only 1 prediction is made. Predictions can be made individually but I'd prefer to do them all at once if that change can be made.
Chris
#####################################
# CODE
# create data
set.seed(20130625)
n <- 100 # sample size
x <- rbinom(n, 1, 0.5) # covariate
z <- rep(0, n) # start time
y <- rexp(n, exp(x)) # event time
e <- y < 2 # censor at 2
y <- pmin(y, 2) # observation time
dat <- data.frame(x,z,y,e)
# fit cox model with start/stop format
library(survival)
mod <- coxph(Surv(z, y, e)~x, data=dat)
summary(mod)
# create prediction dataset with 3 individuals with
# x = 0 on (0,2)
# x = 1 on (0,2)
# x = 0 on (0,1) and x = 1 on (1,2)
datnew <- data.frame(x=c(0,1,0,1), z=c(0,0,0,1), y=c(2,2,1,2), e=rep(0,4), id=c(1,2,3,3))
datnew
# as expected
modsf1 <- survfit(mod, newdata=datnew, id=id)
modsf1
# not as expected
modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)
modsf2
# for comparison
modsf3 <- survfit(mod, newdata=datnew[1:2,])
modsf3
# appears to work when individual=FALSE (id not specified)
modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)
modsf4
# visually
par(mfrow=c(2,2))
plot(modsf1, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf2, col=1:3, lty=1:3, conf.int=FALSE)
plot(modsf3, col=1:2, lty=1:2, conf.int=FALSE)
plot(modsf4, col=1:2, lty=1:2, conf.int=FALSE)
# Can be done individually
modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
modsf2a
modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
modsf2b
modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
modsf2c
# one at a time
par(mfrow=c(1,1))
plot(modsf2a, col=1, lty=1, conf.int=FALSE)
lines(modsf2b, col=2, lty=2, conf.int=FALSE)
lines(modsf2c, col=3, lty=3, conf.int=FALSE)
#####################################
# OUTPUT
> # create data
>
> set.seed(20130625)
> n <- 100 # sample size
> x <- rbinom(n, 1, 0.5) # covariate
> z <- rep(0, n) # start time
> y <- rexp(n, exp(x)) # event time
> e <- y < 2 # censor at 2
> y <- pmin(y, 2) # observation time
> dat <- data.frame(x,z,y,e)
> # fit cox model with start/stop format
> library(survival)
> mod <- coxph(Surv(z, y, e)~x, data=dat)
> summary(mod)
Call:
coxph(formula = Surv(z, y, e) ~ x, data = dat)
n= 100, number of events= 98
coef exp(coef) se(coef) z Pr(>|z|)
x 0.7162 2.0466 0.2091 3.425 0.000614 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
x 2.047 0.4886 1.359 3.083
Concordance= 0.601 (se = 0.029 )
Rsquare= 0.109 (max possible= 0.999 )
Likelihood ratio test= 11.58 on 1 df, p=0.0006666
Wald test = 11.73 on 1 df, p=0.0006137
Score (logrank) test = 12.18 on 1 df, p=0.0004831
> # create prediction dataset with 3 individuals with
> # x = 0 on (0,2)
> # x = 1 on (0,2)
> # x = 0 on (0,1) and x = 1 on (1,2)
> datnew <- data.fra .... [TRUNCATED]
> datnew
x z y e id
1 0 0 2 0 1
2 1 0 2 0 2
3 0 0 1 0 3
4 1 1 2 0 3
> # as expected
> modsf1 <- survfit(mod, newdata=datnew, id=id)
> modsf1
Call: survfit(formula = mod, newdata = datnew, id = id)
records n.max n.start events median 0.95LCL 0.95UCL
0 100 100 100 98 0.663 0.457 0.948
<NA> 100 100 100 98 0.333 0.288 0.457
<NA> 100 100 100 98 0.663 0.457 0.948
> # not as expected
> modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE)
> modsf2
Call: survfit(formula = mod, newdata = datnew, censor = FALSE, id = id)
records n.max n.start events median 0.95LCL 0.95UCL
100.000 100.000 100.000 294.000 0.663 0.457 0.948
> # for comparison
> modsf3 <- survfit(mod, newdata=datnew[1:2,])
> modsf3
Call: survfit(formula = mod, newdata = datnew[1:2, ])
records n.max n.start events median 0.95LCL 0.95UCL
[1,] 100 100 100 98 0.663 0.457 0.948
[2,] 100 100 100 98 0.333 0.288 0.457
> # appears to work when individual=FALSE (id not specified)
> modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE)
> modsf4
Call: survfit(formula = mod, newdata = datnew[1:2, ], censor = FALSE)
records n.max n.start events median 0.95LCL 0.95UCL
[1,] 100 100 100 98 0.663 0.457 0.948
[2,] 100 100 100 98 0.333 0.288 0.457
> modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE)
> modsf2a
Call: survfit(formula = mod, newdata = datnew[1, ], censor = FALSE,
id = id)
records n.max n.start events median 0.95LCL 0.95UCL
100.000 100.000 100.000 98.000 0.663 0.457 0.948
> modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE)
> modsf2b
Call: survfit(formula = mod, newdata = datnew[2, ], censor = FALSE,
id = id)
records n.max n.start events median 0.95LCL 0.95UCL
100.000 100.000 100.000 98.000 0.333 0.288 0.457
> modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE)
> modsf2c
Call: survfit(formula = mod, newdata = datnew[3:4, ], censor = FALSE,
id = id)
records n.max n.start events median 0.95LCL 0.95UCL
100.000 100.000 100.000 98.000 0.663 0.457 0.948
**********************************************************
Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues
More information about the R-help
mailing list