[R] Biobass, SAGx, and Jonckheere-Terpstra test

Fri Jun 30 14:21:47 CEST 2006

Horace,
Last year I found another Jonckheere-Terpstra function at 
http://www.biostat.wustl.edu/archives/html/s-news/2000-10/msg00126.html
Using that and the version in SAGx and wanting a few other options, I 
created the version below.

Hope it is useful,
Chris
-- 
Christopher Andrews, PhD
SUNY Buffalo, Department of Biostatistics

#####
#####
#####
j.test <- function(x, y,
   alternative = c("two.sided", "decreasing", "increasing"),
   asymp=TRUE, correct=FALSE, perm=0, na.action=c("omit", "fail"),
   permgraph=FALSE, permreps=FALSE, ...) {

#Jonckheere-Terpstra test
#x = response
#y = group membership
#alternative hypothesis
#asymptotic = asymptotic formula for variance or don't bother
#correct = continuity correction
#perm = number of repetitions for a permutation test
#na.action = what to do with missing data
#permgraph = draw a histogram of the permutations?
#permreps = return the permutations?
#... for graph

   alternative <- match.arg(alternative)
   complete <- complete.cases(x,y)
   nn <- sum(complete)
   cat(nn, "complete cases.\n")
   if (!all(complete) && (na.action=="fail")) {
     cat("option na.action is 'fail' and",
       sum(!complete), "cases are not complete.\n")
     return(NULL)
   }
   response <- x[complete]
   groupvec <- y[complete, drop=TRUE]

   if(!is.ordered(groupvec)) {
     groupvec <- as.ordered(groupvec)
     cat("y was not an ordered factor.  Redefined to be one.\n")
   }

   lev <- levels(groupvec)
   nlev <- length(lev)
   if(nlev <= 2) {
     if (nlev < 2) {
       cat("Not enough groups (k =", nlev, ") for Jonckheere.\n")
       return(NULL)
     } else {
       cat("Two groups.  Could use wilcox.test.\n")
     }
   }

   computestat <- function(response, groupvec, n.groups) {
     pairwise <- function(x, y) {
       length(x)*(length(y) + (1+length(x))/2) - 
sum(rank(c(x,y))[seq(along=x)])
     }
     H<-0
     for (i in seq(n.groups-1))
       for (j in seq(i+1, n.groups))
         H <- H+pairwise(response[groupvec == lev[i]], response[groupvec 
== lev[j]])
     return(H)
   }

   retval <- list(statistic=computestat(response, groupvec, nlev),
     alternative = paste(alternative,
       paste(levels(groupvec),collapse=switch(alternative,
         two.sided = ", ", decreasing= " > ", increasing = " < ")), 
sep=": "),
     method = "Jonckheere",
     data.name = paste(deparse(substitute(x)), "by", 
deparse(substitute(y))))

   if (asymp) {
     ns <- tabulate(as.numeric(groupvec))
     retval$EH <- (nn * nn - sum(ns * ns))/4
     retval$VH <- if (!any(duplicated(response))) {
       (nn * nn * (2 * nn + 3) - sum(ns * ns * (2 * ns + 3)))/72
     } else {
       ds <- as.vector(table(response))
       ((nn*(nn-1)*(2*nn+5) - sum(ns*(ns-1)*(2*ns+5)) - 
sum(ds*(ds-1)*(2*ds+5)))/72 +
       sum(ns*(ns-1)*(ns-2))*sum(ds*(ds-1)*(ds-2))/(36*nn*(nn-1)*(nn-2)) +
       sum(ns*(ns-1))*sum(ds*(ds-1))/(8*nn*(nn - 1)))
     }
     pp <- if (!correct) {
       pnorm(retval$statistic, retval$EH, sqrt(retval$VH))
     } else if (retval$statistic >= retval$EH + 1) {
       pnorm(retval$statistic - 1, retval$EH, sqrt(retval$VH))
     } else if (retval$statistic <= retval$EH - 1) {
       pnorm(retval$statistic + 1, retval$EH, sqrt(retval$VH))
     } else {
       .5
     }
     retval$p.value <- switch(alternative,
       two.sided = 2*min(pp,1-pp), decreasing = pp, increasing = 1-pp)
   }
   if (perm>0) {
     reps <- numeric(perm)
     for (i in seq(perm)) {
       reps[i] <- computestat(response, sample(groupvec), nlev)
     }
     retval$EH.perm <- mean(reps)
     retval$VH.perm <- var(reps)
     ppp <- if (!correct) {
       pnorm(retval$statistic, retval$EH.perm, sqrt(retval$VH.perm))
     } else if (retval$statistic >= retval$EH.perm + 1) {
       pnorm(retval$statistic - 1, retval$EH.perm, sqrt(retval$VH.perm))
     } else if (retval$statistic <= retval$EH.perm - 1) {
       pnorm(retval$statistic + 1, retval$EH.perm, sqrt(retval$VH.perm))
     } else {
       .5
     }
     retval$p.value.perm <- switch(alternative,
       two.sided = 2*min(ppp,1-ppp), decreasing = ppp, increasing = 1-ppp)
     rr <- rank(c(retval$statistic, reps))[1]/(perm+2)
     retval$p.value.rank <- switch(alternative,
       two.sided = 2*min(rr,1-rr), decreasing = rr, increasing = 1-rr)
     if (permreps)
       retval$reps <- reps
     if (permgraph) {
       hist(reps, xlab="Jonckheere Statistic", prob=TRUE,
         main="Histogram of Permutations", sub=paste(perm, "permutations"))
       abline(v=retval$statistic, col="red")
       points((-3:3)*sqrt(retval$VH.perm)+retval$EH.perm, rep(0,7), 
col="blue",
         pch=as.character(c(3:1,0:3)))
     }
   }
   class(retval) <- "htest"
   names(retval$statistic) <- "J"
   return(retval)

#returns list (of class htest) with the following components
#Always:
#statistic = value of J
#alternative = same as input
#method = "Jonckheere"
#data.name = source of data based on call
#Most of the time (i.e., when asymp=TRUE):
#EH = expected value based on sample size
#VH = variance (adjusted for ties if necessary)
#p.value = asymptotic p value
#Sometimes (i.e., when perm>0):
#EH.perm = expected value based on permutations
#VH.perm = variance based on permutations
#p.value.perm = p value based on normal approximation to permutations
#p.value.rank = p value based on rank within permutation
#perms = vector of permutation.
}

j.test(rnorm(54), ordered(rep(letters[1:27],2)), na.action="fail")
j.test(rnorm(54), ordered(rep(letters[1:27],2)), na.action="omit", 
asymp=FALSE, perm=1000)
j.test(ttt <- rnorm(54), ordered(rep(letters[1:2],27)), alt="d")
wilcox.test(ttt ~ordered(rep(letters[1:2],27)), correct=FALSE, 
exact=FALSE, alt="g")

#Higgins p 178 Example
totals <- c(10,12,17,30,9,9,11,35,7,8,12,43)
tlevels <- factor(c("A", "B", "C"),
   levels=c("A", "B", "C"),
   labels=c("A", "B", "C"),
   ordered=TRUE)
Treatment <- rep(
   rep(tlevels, each=4)
   , times = totals)
rlevels <- factor(c("Severe", "Moderate", "Slight", "None"),
   levels=rev(c("Severe", "Moderate", "Slight", "None")),
   labels=rev(c("Severe", "Moderate", "Slight", "None")),
   ordered=TRUE)
Response <- rep(
   rep(rlevels, times=3)
   , times=totals)

result <- j.test(Response, Treatment, alt="t", perm=1000, 
permgraph=TRUE, correct=TRUE)
result
# note that the standard print does not report the permutation pvalues.
# But they are there
result$p.value.perm
result$p.value.rank