[R] Exact test for count data
soeren.vogel at eawag.ch
soeren.vogel at eawag.ch
Sat Nov 28 18:37:49 CET 2009
Hello!
Bortz, Lienert, & Boehnke (2008, pp. 140--142) suggest an exact
polynomial test for low frequency tables. I used it recently, and
thus, created the code attached. Maybe someone would use (and likely
modify) it or incorporate it into their package.
Sören
References:
Bortz, J., Lienert, G. A., & Boehnke, K. (2008). Verteilungsfreie
Methoden in der Biostatistik. Berlin: Springer
Code:
library(forensim)
myfun <- function(cur, exp, p.obs, force=F){
if(length(cur) != length(exp)){
stop("wrong length of dimensions, try transpose the matrix\n")
}
# keep users from hot laptops
if(!force){
if(Cmn(sum(obs), length(obs)) > (2 * (10^6))){
stop("Use option \"force=T\" to compute more than 2 million
combinations.\n")
}
}
p.cur <- factorial(sum(cur)) / prod(sapply(cur, function(x)
factorial(x))) * prod(exp ^ cur)
if(p.cur <= p.obs){
return(p.cur)
}
}
# Trial 1, Book example
d.sta <- Sys.time()
exp <- c(.28, .08, .04, .42, .12, .06)
exp <- exp/sum(exp)
obs <- c(0, 3, 0, 0, 0, 0)
Cmn(sum(obs), length(obs)) # 56
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 0.1103680 secs
# Trial 2
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 0, 2, 0, 1, 1, 0, 5, 3) # now with a longer vector
Cmn(sum(obs), length(obs)) # 490314
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 1.050846 mins
# Trial 3
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 1, 1, 0, 5, 3) # changed 0 to 2 on position 2
Cmn(sum(obs), length(obs)) # 1081575
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 2.425888 mins
# Trial 4
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 2, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 1562275
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins
# Trial 5
d.sta <- Sys.time()
exp <- c(3, 5, 7, 4, 5, 4, 2, 9, 1)
exp <- exp/sum(exp)
obs <- c(3, 3, 2, 0, 2, 1, 0, 5, 3) # changed 1 to 2 on position 5
Cmn(sum(obs), length(obs)) # 2220075
p0 <- factorial(sum(obs)) / prod(sapply(obs, function(x)
factorial(x))) * prod(exp ^ obs)
mat <- comb(sum(obs), length(obs))
sum(unlist(apply(mat, 1, function(x) myfun(x, exp, p0))))
d.sto <- Sys.time()
difftime(d.sto, d.sta) # Time difference of 3.658092 mins
--
Sören Vogel, Dipl.-Psych. (Univ.), PhD-Student, Eawag, Dept. SIAM
http://www.eawag.ch, http://sozmod.eawag.ch
More information about the R-help
mailing list