[R] need help with avg.surv (Direct Adjusted Survival Curve), Message-ID:
Terry Therneau
therneau at mayo.edu
Mon Apr 30 15:33:10 CEST 2012
Well, I would suggest using the code already in place in the survival
package. Here is my code for your problem.
I'm using a copy of the larynx data as found from the web resources for
the Klein and Moeschberger book.
larynx <- read.table("larynx.dat", skip=12,
col.names=c("stage", "time", "age", "year", "death"))
larynx$stage <- factor(larynx$stage)
larynx$age.grp <- cut(larynx$age, c(0, 55, 70, 90),
labels=c("40-55", "55-70", "70-90"))
fit1 <- coxph(Surv(time, death) ~ age.grp + stage + I(year >=74),
larynx) #fit a Cox model
km <- survfit(Surv(time, death) ~ stage, data=larynx) # KM
plot(km)
direct <- survexp( ~stage, data=larynx, ratetable=fit1) # "direct
adjusted" survival
lines(direct, col=2)
---
A few further comments
1. It would help those of us who advise if you would simplify your
code. I didn't need to see 20+ lines of data set manipulation and curve
drawing. Give the essence of the problem.
2. The "direct" estimate above was first created for expected
survival curves based on population rate tables and publised by Edererer
in 1961. The same idea was rediscovered in the context of the Cox model
and renamed the "direct adjusted" estimate; I like to give credit to the
oridingal.
3. I did not try to debug your function.
Terry Therneau
--- begin included message ---
Hello R users,
I am trying to obtain a direct adjusted survival curve. I am sending my
whole code (see below). It's basically the larynx cancer data with Stage
1-4.I am using the cox model using coxph option, see the fit3 coxph.
When I use the avg.surv option on fit3, I get the following error:
"fits<-avg.surv(fit3, var.name="stage.fac", var.values=c(1,2,3,4),
data=larynx)
Error in `contrasts<-`(`*tmp*`, value = contrasts.arg[[nn]]) :
contrasts apply only to factors"
If try using var.values = c ("1","2","3","4") option, I get the
following error:
"Error in xmat %*% cfit$coef : non-conformable arguments
In addition: Warning message:
In model.matrix.default(Terms, mf, contrasts = contrast.arg) :
variable 'stage.fac' converted to a factor"
Please advise me on what I am doing wrong!
Regards,
Manish Dalwani
University of Colorado
library(survival)
library(ggplot2)
setwd("/Users/manishdalwani/surv_6646/")
#file.show("Larynx.txt")
#Stage of disease (1=stage 1, 2=stage2, 3=stage 3, 4=stage 4)
#Time to death or on-study time, months
#Age at diagnosis of larynx cancer
#Year of diagnosis of larynx cancer
#Death indicator (0=alive, 1=dead)
larynx <-read.table(file ="larynx.dat",col.names
=c("stage","time","age","year","death"),colClasses
=c("factor","numeric","numeric","numeric","numeric"))
#the death indicator needs to be numeric for the survfit function to work
# splitting age into three intervals: <55, 50-70, >70
age.larynx <- rep(0, length(larynx[,1]))
age.larynx [larynx$age < 55] <- 1
age.larynx [larynx$age >= 55 & larynx$age <= 70] <- 2
age.larynx [larynx$age >70] <- 3
larynx <- cbind(larynx, age.larynx)
larynx$age.larynx <- factor(larynx$age.larynx, labels=c("1", "2", "3"))
larynx$stage.fac<-as.factor(larynx$stage)
year.larynx <- rep(0, length(larynx[,1]))
year.larynx [larynx$year >= 74] <- 1
year.larynx [larynx$year < 74] <- 2
larynx <- cbind(larynx, year.larynx)
head(larynx)
summary(larynx)
kapmeier.fit <- survfit( Surv(time, death) ~ stage, data = larynx, type
= "kaplan-meier")
sumkap <- summary(kapmeier.fit)
attributes(kapmeier.fit)
attributes(sumkap)
plot(kapmeier.fit, lty=2:3, fun="cumhaz",
xlab="Months",ylab="Cumulative Hazard of death")
legend(4,.4,c("stage1","stage2","stage3","stage4"),lty=1:2)
plot(kapmeier.fit, lty=2:3,
xlab="Months",ylab="Survival Function")
#construct a data frame for the plots
plotdata <- data.frame(time = sumkap$time, surv = sumkap$surv, strata =
sumkap$strata)
fact.stage<-factor(larynx$stage)
fit1<-coxph(Surv(time, death) ~ fact.stage + age.larynx +
factor(year>=74), data=larynx, method=c("efron"))
summary(fit1)
avg.surv <- function(cfit, var.name, var.values, data, weights)
{
if(missing(data)){
if(!is.null(cfit$model))
mframe <- cfit$model
elsemframe <-model.frame(cfit,sys.parent())
} else mframe <- model.frame(cfit, data)
var.num <- match(var.name, names(mframe))
data.patterns <- apply(data.matrix(mframe[, - c(1, var.num)]), 1,
paste, collapse = ",")
data.patterns <- factor(data.patterns,levels=unique(data.patterns))
if(missing(weights))
weights <- table(data.patterns)
else weights <- tapply(weights, data.patterns, sum)
kp <- !duplicated(data.patterns)
mframe <- mframe[kp,]
obs.var <- mframe[,var.num]
lps <- (cfit$linear.predictor)[kp]
tframe <- mframe[rep(1,length(var.values)),]
tframe[,var.num] <- var.values
xmat <- model.matrix(cfit,data=tframe)[,-1]
tlps <- as.vector(xmat%*%cfit$coef)
names(tlps) <- var.values
obs.off <- tlps[as.character(obs.var)]
explp.off <- exp(outer(lps - obs.off ,tlps,"+"))
bfit <- survfit.coxph(cfit, se.fit = F)
fits <- outer(bfit$surv,explp.off,function(x,y) x^y)
avg.fits <-
apply(sweep(fits,2,weights,"*"),c(1,3),sum)/sum(weights)
dimnames(avg.fits) <- list(NULL,var.values)
list(time=bfit$time,fits=avg.fits)
}
fits<-avg.surv(fit1, var.name="fact.stage", data=larynx,
var.values=c("0","1","2","3"))
matlines(fits$time,fits$fits)
fit2<-coxph(Surv(time, death) ~ stage + age.larynx + factor(year>=74),
data=larynx, method=c("efron"))
summary(fit2)
fit3<-coxph(Surv(time, death) ~ stage.fac + age.larynx + year.larynx),
data=larynx, method=c("efron"))
summary(fit3)
fit4<-coxph(Surv(time,death)~factor(stage)+factor(age>=74)+factor(year>=74),data=larynx,method=c("efron"))
summary(fit4)
lines(survexp(~stage+ratetable(stage,age.larynx,year>=74),data=larynx,ratetable=fit2,cohort=TRUE),col="purple")
fits<-avg.surv(fit3, var.name="stage.fac", var.values=c(1,2,3,4),
data=larynx)
matlines(fits$time,fits$fits)
More information about the R-help
mailing list