[R] Exact test for count data

soeren.vogel at eawag.ch soeren.vogel at eawag.ch
Sat Nov 28 18:37:49 CET 2009


Hello!

Bortz, Lienert, & Boehnke (2008, pp. 140--142) suggest an exact  
polynomial test for low frequency tables. I used it recently, and  
thus, created the code attached. Maybe someone would use (and likely  
modify) it or incorporate it into their package.

Sören

References:

Bortz, J., Lienert, G. A., & Boehnke, K. (2008). Verteilungsfreie  
Methoden in der Biostatistik. Berlin: Springer

Code:

library(forensim)
myfun <- function(cur, exp, p.obs, force=F){
   if(length(cur) != length(exp)){
     stop("wrong length of dimensions, try transpose the matrix\n")
   }
   # keep users from hot laptops
   if(!force){
     if(Cmn(sum(obs), length(obs)) > (2 * (10^6))){
       stop("Use option \"force=T\" to compute more than 2 million  
combinations.\n")
     }
   }
   p.cur <- factorial(sum(cur)) / prod(sapply(cur, function(x)  
factorial(x))) * prod(exp ^ cur)
   if(p.cur <= p.obs){
     return(p.cur)
   }
}

# Trial 1, Book example
d.sta <- Sys.time()
exp <- c(.28, .08, .04, .42, .12, .06)
exp <- exp/sum(exp)
obs <- c(0, 3, 0, 0, 0, 0)
Cmn(sum(obs), length(obs)) # 56
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)  
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 0.1103680 secs

# Trial 2
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 0, 2, 0, 1, 1, 0, 5, 3) # now with a longer vector
Cmn(sum(obs), length(obs)) # 490314
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)  
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 1.050846 mins

# Trial 3
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 1, 1, 0, 5, 3) # changed 0 to 2 on position 2
Cmn(sum(obs), length(obs)) # 1081575
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)  
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 2.425888 mins

# Trial 4
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 1562275
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)  
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins

# Trial 5
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 3, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 2220075
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)  
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins

-- 
Sören Vogel, Dipl.-Psych. (Univ.), PhD-Student, Eawag, Dept. SIAM
http://www.eawag.ch, http://sozmod.eawag.ch




More information about the R-help mailing list