[R] need help with avg.surv (Direct Adjusted Survival Curve), Message-ID:

Terry Therneau therneau at mayo.edu
Mon Apr 30 15:33:10 CEST 2012


  Well, I would suggest using the code already in place in the survival 
package.  Here is my code for your problem.
I'm using a copy of the larynx data as found from the web resources for 
the Klein and Moeschberger book.

larynx <- read.table("larynx.dat", skip=12,
                      col.names=c("stage", "time", "age", "year", "death"))
larynx$stage <- factor(larynx$stage)
larynx$age.grp <- cut(larynx$age, c(0, 55, 70, 90),
                       labels=c("40-55", "55-70", "70-90"))

fit1 <- coxph(Surv(time, death) ~  age.grp + stage + I(year >=74), 
larynx) #fit a Cox model
km <- survfit(Surv(time, death) ~ stage, data=larynx)    # KM
plot(km)

direct <- survexp( ~stage, data=larynx, ratetable=fit1)  # "direct 
adjusted" survival
lines(direct, col=2)

---
  A few further comments
   1. It would help those of us who advise if you would simplify your 
code.  I didn't need to see 20+ lines of data set manipulation and curve 
drawing.  Give the essence of the problem.
   2. The "direct" estimate above was first created for expected 
survival curves based on population rate tables and publised by Edererer 
in 1961.  The same idea was rediscovered in the context of the Cox model 
and renamed the "direct adjusted" estimate; I like to give credit to the 
oridingal.
   3. I did not try to debug your function.


Terry Therneau


---  begin included message ---
Hello R users,

I am trying to obtain a direct adjusted survival curve. I am sending my 
whole code (see below). It's basically the larynx cancer data with Stage 
1-4.I am using the cox model using coxph option, see the fit3 coxph. 
When I use the avg.surv option on fit3, I get the following error: 
"fits<-avg.surv(fit3, var.name="stage.fac", var.values=c(1,2,3,4), 
data=larynx)
Error in `contrasts<-`(`*tmp*`, value = contrasts.arg[[nn]]) :
   contrasts apply only to factors"

If try using var.values = c ("1","2","3","4") option, I get the 
following error:
"Error in xmat %*% cfit$coef : non-conformable arguments
In addition: Warning message:
In model.matrix.default(Terms, mf, contrasts = contrast.arg) :
   variable 'stage.fac' converted to a factor"

Please advise me on what I am doing wrong!

Regards,
Manish Dalwani
University of Colorado

library(survival)
library(ggplot2)

setwd("/Users/manishdalwani/surv_6646/")
#file.show("Larynx.txt")

#Stage of disease (1=stage 1, 2=stage2, 3=stage 3, 4=stage 4)
#Time to death or on-study time, months
#Age at diagnosis of larynx cancer
#Year of diagnosis of larynx cancer
#Death indicator (0=alive, 1=dead)

larynx <-read.table(file ="larynx.dat",col.names 
=c("stage","time","age","year","death"),colClasses 
=c("factor","numeric","numeric","numeric","numeric"))
#the death indicator needs to be numeric for the survfit function to work

# splitting age into three intervals: <55, 50-70, >70
age.larynx <- rep(0, length(larynx[,1]))
age.larynx [larynx$age < 55] <- 1
age.larynx [larynx$age >= 55 & larynx$age <= 70] <- 2
age.larynx [larynx$age >70] <- 3
larynx <- cbind(larynx, age.larynx)
larynx$age.larynx <- factor(larynx$age.larynx, labels=c("1", "2", "3"))
larynx$stage.fac<-as.factor(larynx$stage)

year.larynx <- rep(0, length(larynx[,1]))
year.larynx [larynx$year >= 74] <- 1
year.larynx [larynx$year < 74] <- 2
larynx <- cbind(larynx, year.larynx)

head(larynx)
summary(larynx)

kapmeier.fit <- survfit( Surv(time, death) ~ stage, data = larynx, type 
= "kaplan-meier")
sumkap <- summary(kapmeier.fit)
attributes(kapmeier.fit)
attributes(sumkap)
plot(kapmeier.fit, lty=2:3, fun="cumhaz",
xlab="Months",ylab="Cumulative Hazard of death")
legend(4,.4,c("stage1","stage2","stage3","stage4"),lty=1:2)

plot(kapmeier.fit, lty=2:3,
xlab="Months",ylab="Survival Function")
#construct a data frame for the plots
plotdata <- data.frame(time = sumkap$time, surv = sumkap$surv, strata = 
sumkap$strata)
fact.stage<-factor(larynx$stage)
fit1<-coxph(Surv(time, death) ~ fact.stage + age.larynx + 
factor(year>=74), data=larynx, method=c("efron"))
summary(fit1)
avg.surv <- function(cfit, var.name, var.values, data, weights)
{
if(missing(data)){
if(!is.null(cfit$model))
mframe <- cfit$model
elsemframe <-model.frame(cfit,sys.parent())
}   else mframe <- model.frame(cfit, data)
var.num <- match(var.name, names(mframe))
data.patterns <- apply(data.matrix(mframe[,  - c(1, var.num)]), 1,
paste, collapse = ",")
         data.patterns <- factor(data.patterns,levels=unique(data.patterns))
if(missing(weights))
weights <- table(data.patterns)
else weights <- tapply(weights, data.patterns, sum)
         kp <- !duplicated(data.patterns)
mframe <- mframe[kp,]
         obs.var <- mframe[,var.num]
         lps <- (cfit$linear.predictor)[kp]
         tframe <- mframe[rep(1,length(var.values)),]
         tframe[,var.num] <- var.values
         xmat <- model.matrix(cfit,data=tframe)[,-1]
         tlps <- as.vector(xmat%*%cfit$coef)
         names(tlps) <- var.values
         obs.off <- tlps[as.character(obs.var)]
         explp.off <- exp(outer(lps - obs.off ,tlps,"+"))
bfit <- survfit.coxph(cfit, se.fit = F)
         fits <- outer(bfit$surv,explp.off,function(x,y) x^y)
         avg.fits <-
            apply(sweep(fits,2,weights,"*"),c(1,3),sum)/sum(weights)
         dimnames(avg.fits) <- list(NULL,var.values)
         list(time=bfit$time,fits=avg.fits)
}

fits<-avg.surv(fit1, var.name="fact.stage", data=larynx, 
var.values=c("0","1","2","3"))
matlines(fits$time,fits$fits)
fit2<-coxph(Surv(time, death) ~ stage + age.larynx + factor(year>=74), 
data=larynx, method=c("efron"))
summary(fit2)
fit3<-coxph(Surv(time, death) ~ stage.fac + age.larynx + year.larynx), 
data=larynx, method=c("efron"))
summary(fit3)
fit4<-coxph(Surv(time,death)~factor(stage)+factor(age>=74)+factor(year>=74),data=larynx,method=c("efron"))
summary(fit4)
  lines(survexp(~stage+ratetable(stage,age.larynx,year>=74),data=larynx,ratetable=fit2,cohort=TRUE),col="purple")
  fits<-avg.surv(fit3, var.name="stage.fac", var.values=c(1,2,3,4), 
data=larynx)
matlines(fits$time,fits$fits)



More information about the R-help mailing list