# R-code reproduced from John Fox (2002), # "Cox proportional-Hazards Regression for Survival Data", # Appendix to An R and Splus Companion to Applied Regression library(survival) data <- read.table("http://socserv.mcmaster.ca/jfox/Books/Companion/Rossi.txt", header=T) # week: week of first arrest after release, or censoring time # arrest: indicator for the event: # 1 for those arrested during the study, 0 otherwise # fin: dummy variable for financial aid: # 1 for those receiving financial aid after being # released from prison, 0 otherwise # age: age in years at time of release # race: dummy variable for race: # 1 for blacks, 0 for others # wexp: dummy variable for work experience: # 1 for people who had full time work experience before # going in prison, 0 otherwise # mar: dummy variabe for marriage: # 1 for people who were married at the time of release, 0 otherwise # paro: dummy variable for parole: # 1 for people who were released on parole, 0 otherwise # prio: number of prior convictions # educ: education variable with codes 2 (grade 6 or less), # 3 (grades 6 through 9), 4 (grades 10 and 11), # 5 (grade 12), 6 (post-secondary) # take a look at the data data[1:5, 1:10] # fit Cox regression surv.object <- Surv(data$week,data$arrest) surv.object m <- coxph(surv.object ~ fin+age+race+wexp+mar+paro+prio, data=data) summary(m) # age and prio seem highly significant # fin (was focus of the study) seems marginally significant # note Rsquared is quite low # Interpretation easiest for exp(coef): # Holding all other factors constant, each extra year of age # is associated, on average, with a reduction of the hazard # by a factor .944, that is, a 5.6% reduction. # Holding all other factors constant, each extra prior conviction # is associated, on average, with an increase of the hazard # by a factor of 1.096, that is, a 9.6% increase. # plot 1-F, at average value of covariates: plot(survfit(m), ylim=c(.7,1), xlab="Weeks", ylab="Proportion not arrested") # create new data frame to look at effect of fin: attach(data) data.new <- data.frame(fin=c(0,1), age=rep(mean(age),2), race=rep(mean(race),2), wexp = rep(mean(wexp),2), mar=rep(mean(mar),2), paro=rep(mean(paro),2), prio=rep(mean(prio),2)) data.new plot(survfit(m, newdata=data.new), conf.int=T, lty=c(1,2), col=c(1,2), ylim=c(.6,1), xlab="Weeks", ylab="Proportion not arrested") legend(locator(1), legend=c("fin=0","fin=1"), lty=c(1,2), col=c(1,2))