# [R] survSplit: further exploration and related topics

Danardono daodao99 at student.umu.se
Thu Nov 11 12:04:29 CET 2004

```While waiting for R.2.0.1 or 2.1, for you  who need function for this
survival-splitting business, as I do,   this 'survcut' function below
It is not an elegant nor efficient function but it works, at least for
some examples below.

data(aml)
m1<-coxph(Surv(time,status)~x,data=aml)

#unfortunately, start time must be created first
#but not a big deal I believe
aml\$t0<-0

#then try :

d1<-survcut(cut=c(5,10,50),c("t0","time","status"),data=aml)
coxph(Surv(t0,time,status)~x,data=d1)
d2<-survcut(cut=c(9,12,40),c("t0","time","status"),data=d1)
coxph(Surv(t0,time,status)~x,data=d2)
d3<-survcut(cut=c(9,12,40),c("t0","time","status"),data=d2)
coxph(Surv(t0,time,status)~x,data=d3)

# splitting at the risk times
# useful for coxph with time-dependent covariate
d4<-survcut(cut=unique(aml\$time[aml\$status==1]),c("t0","time","status"),data=d2)
coxph(Surv(t0,time,status)~x,data=d4)

# "random" splitting
dr<-survcut(cut=runif(rpois(1,4),0,100),c("t0","time","status"),data=d1)
dim(dr)
coxph(Surv(t0,time,status)~x,data=dr)

# "per unit time" splitting
d5<-survcut(cut=0:161,c("t0","time","status"),data=d4)
coxph(Surv(t0,time,status)~x,data=d5)

#### the code --------------------------------------------------
survcut<-function (cuts, surv = c("t0", "t1", "event"), data,
id = NULL, addv = TRUE, sq = FALSE)
# cuts: vector of timepoints to cut at
# surv: a Surv like input
# id :  (optional) variable name, if the data has an id variable
#advv :  (optional) include other variables in the output?
#sq: (optional) sequence of splitting , may be similar with episode in
survSplit
{
#cutting one counting process survival line
cutting <- function(x, a, s) {
x <- as.numeric(x)
tmp <- sort(c(x[2:3], a[(x[2] < a) & (x[3] > a)]))
n <- length(tmp)
idx <- rep(x[1], n - 1)
t0 <- tmp[1:(n - 1)]
t1 <- tmp[2:n]
event <- rep(0, n - 1)
event[n - 1] <- x[4]
if (s)
data.frame(idx, t0, t1, event, s = 1:(n - 1))
else data.frame(idx, t0, t1, event)
}

#Note that this database-joint-like function is similar to 'merge'
#I made this before I knew 'merge' is available in R
#merge can substitute this function, I believe
#x: the main data (all rows will be selected)
#variable: variable name (character vector) from refdat
#key: key variable has to be exist in x and refdat
#refdat: reference data, must have unique 'key' id
addvars<-function (x, variable, key = "id", refdat)
{
nama <- names(refdat)
if (is.numeric(variable)) {
if (!prod(variable %in% 1:length(nama)))
stop("variable does not exist")
newname <- names(refdat)[variable]
}
else {
if (!prod(variable %in% nama))
stop("variable does not exist")
newname <- variable
}
if (!(length(unique(refdat[, key])) == length(refdat[, key])))
stop("key must be unique")
newvar <- refdat[, variable, drop = FALSE]
newvar <- newvar[match(x[, key], refdat[, key]), , drop = FALSE]
x <- cbind(x, newvar)
return(x)
}

vars <- names(data)
if (sum(surv %in% vars) != 3)
stop("one or more surv variables do not exist")
idx <- 1:NROW(data)
data<- cbind(idx, data)
t0 <- data[, surv[1]]
t1 <- data[, surv[2]]
event <- data[, surv[3]]
x <- data.frame(idx, t0, t1, event)
out <- by(x, list(1:NROW(x)), cutting, a = cuts, s = sq)
out <- do.call("rbind", out)
rownames(out) <- 1:NROW(out)
names(out)[2:4] <- surv
out <- addvars(out, setdiff(names(data), c(surv, "idx")),
key = "idx", refdat = data)
out <- out[, -1]
if (!is.null(id)) {
slct <- c(id, setdiff(names(out), id))
out <- subset(out, select = slct)
}
out
}

-----------------------------
/Danar

```