require(foreign)
require(survival)
require(cmprsk)

################################################################
# Green & Byar data set
gb <- read.dta("h:/competing risks/gb.dta")  # N = 483
attach(gb)

#################################
# Cox model fits for cause-specific hazards
fit1 <- coxph( Surv(time, status=="Cancer") ~ treatment + ag + hx + sz + sg, method="breslow", data=gb) 
fit2 <- coxph( Surv(time, status=="CVD") ~ treatment + ag + hx + sz + sg, method="breslow", data=gb) 
fit3 <- coxph( Surv(time, status=="Other") ~ treatment + ag + hx + sz + sg, method="breslow", data=gb) 
fit <- coxph( Surv(time, status=="Cancer" | status=="CVD" | status=="Other") ~ treatment + ag + hx + sz + sg, method="breslow", data=gb) 

fits <- list(fit1, fit2, fit3, fit)

##########################################################
cif <- function(fits, etype=1, z) {
# Prediction of cumulative incidence curves (Cheng, Fine, Wei; Biometrics 1998)
# Indirect regression method
nfits <- length(fits)
cif <- NULL
# if z is missing mean covariate value is used
if (missing(z)) {
	zmiss <- TRUE 
	z <- matrix(NA, 1, 1)
	} else {
	zmiss <- FALSE
	if (is.null(dim(z))) z <- matrix(z, nrow=1, ncol=length(z))	
	}
for (i in 1:nrow(z)) {
sfit.e <- if(!zmiss) survfit(fits[[etype]], type="aalen", newdata=z[i, ])
	else survfit(fits[[etype]], type="aalen")
surv <- sfit.e$surv
time <- sfit.e$time
dH <- c(-log(surv)[1], diff(-log(surv) ))
sfit.o <- if(!zmiss) survfit(fits[[nfits]], type="aalen", newdata=z[i, ])
	else survfit(fits[[nfits]], type="aalen")
Osurv <- stepfun(x=sfit.o$time, y=c(1, sfit.o$surv))

cif <- cbind(cif, cumsum(Osurv(time) * dH))
}
if(!zmiss) colnames(cif) <- paste("cif.z", 1:nrow(z), sep="")
else 	colnames(cif) <- "cif.zmean"
data.frame(time = time, cif)
}
######################

# two sets of covariates for which CIFs are to be predicted
z1 <- data.frame(treatment=0, ag=0, hx=1, sz=0, sg=0)
z2 <- data.frame(treatment=1, ag=0, hx=1, sz=0, sg=0)

model.mat <- model.matrix( ~ time + as.numeric(status) + treatment + as.numeric(ag) + hx + sz + sg, data=gb)  

# Comparison with Fine & Gray's direct regression

# prostate cancer
cr1 <- cif(fits, z=rbind(z1, z2), etype=1)  # Indirect regression
plot(cr1[,1], cr1[,2], type="o", xlab="Time", ylab="Cumulative incidence of Prostate cancer deaths", ylim=c(0, 0.6), main="Comparison of indirect and direct CIF prediction")
lines(cr1[,1], cr1[,3],col=2, type="o") 

cif.cancer <- crr(ftime=model.mat[, 2], fstatus=model.mat[, 3], cov1=model.mat[, 4:8], failcode=2, maxiter=50)  # Fine-Gray
z.cancer <- predict(cif.cancer, cov1=rbind(unlist(z1), unlist(z2))) 
lines(z.cancer[,1], z.cancer[,2], col=3)
lines(z.cancer[,1], z.cancer[,3], col=4)
legend(locator(1), legend=c("z1 (indirect)", "z2 (indirect)", "z1 (FG)", "z2 (FG)"), lty=rep(1, 4), col=c(1,2, 3,4))

# CVD
# The two methods give quite different estimates of CIF here
cr2 <- cif(fits, z=rbind(z1, z2), etype=2)  
plot(cr2[,1], cr2[,2], type="o", xlab="Time", ylab="Cumulative incidence of CVD deaths", ylim=c(0, 0.6), main="Comparison of indirect and direct CIF prediction")
lines(cr2[,1], cr2[,3],col=2, type="o") 

cif.cvd <- crr(ftime=model.mat[, 2], fstatus=model.mat[, 3], cov1=model.mat[, 4:8], failcode=3, maxiter=50)
z.cvd <- predict(cif.cvd, cov1=rbind(unlist(z1), unlist(z2))) 
lines(z.cvd[,1], z.cvd[,2], col=3)
lines(z.cvd[,1], z.cvd[,3], col=4)
legend(locator(1), legend=c("z1 (indirect)", "z2 (indirect)", "z1 (FG)", "z2 (FG)"), lty=rep(1, 4), col=c(1,2, 3,4))


# Other
cr3 <- cif(fits, z=rbind(z1, z2), etype=3)  
plot(cr3[,1], cr3[,2], type="o", xlab="Time", ylab="Cumulative incidence of Other deaths", ylim=c(0, 0.6), main="Comparison of indirect and direct CIF prediction")
lines(cr3[,1], cr3[,3],col=2, type="o") 

cif.oth <- crr(ftime=model.mat[, 2], fstatus=model.mat[, 3], cov1=model.mat[, 4:8], failcode=4, maxiter=50)
z.oth <- predict(cif.oth, cov1=rbind(unlist(z1), unlist(z2))) 
lines(z.oth[,1], z.oth[,2], col=3)
lines(z.oth[,1], z.oth[,3], col=4)
legend(locator(1), legend=c("z1 (indirect)", "z2 (indirect)", "z1 (FG)", "z2 (FG)"), lty=rep(1, 4), col=c(1,2, 3,4))