[R] G-test : log-likelihood ratio test
Peter L. Hurd
phurd at ualberta.ca
Thu Sep 20 18:48:34 CEST 2001
I've written a g.test() aka log-likelihood ratio test function for my
opwn use. It's something I've seen requested (and looked to find
myself) on this list a few times.
It has the same basic syntax as chisq.test().
It does both goodness of fit tests and tests of independence.
Yates' and Williams' corrections are implemented.
I've put some examples from Sokal & Rohlf up at
http://www.psych.ualberta.ca/~phurd/cruft
Feedback welcomed,
-P.
# Kludgy log-likelihood test of independence & goodness of fit
# Does Williams' and Yates' correction
#
# G (TOI) calculation from Zar (2000) Biostatistical Analysis 4th ed.
# G (GOF) calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# q calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# TOI Yates' correction taken from Mike Camann's 2x2 G-test fn.
# GOF Yates' correction as described in Zar (2000)
# more stuff taken from ctest's chisq.test()
#
# ToDo:
# 1) Beautify
# 2) Add warnings for violations
# 3) Make appropriate corrections happen by default
#
# V2.1 Pete Hurd Sept 20 2001.
g.test <- function(x, y = NULL, correct="none", p = rep(1/length(x), length(x)))
{
DNAME <- deparse(substitute(x))
if (is.data.frame(x)) x <- as.matrix(x)
if (is.matrix(x)) {
if (min(dim(x)) == 1)
x <- as.vector(x)
}
if (!is.matrix(x) && !is.null(y)) {
if (length(x) != length(y))
stop("x and y must have the same length")
DNAME <- paste(DNAME, "and", deparse(substitute(y)))
OK <- complete.cases(x, y)
x <- as.factor(x[OK])
y <- as.factor(y[OK])
if ((nlevels(x) < 2) || (nlevels(y) < 2))
stop("x and y must have at least 2 levels")
x <- table(x, y)
}
if (any(x < 0) || any(is.na(x)))
stop("all entries of x must be nonnegative and finite")
if ((n <- sum(x)) == 0)
stop("at least one entry of x must be positive")
#If x is matrix, do test of independence
if (is.matrix(x)) {
#this block was the separate g.stat function
cell.tot <- row.tot <- col.tot <- grand.tot <- 0
nrows<-nrow(x)
ncols<-ncol(x)
if (correct=="yates"){ # Do Yates' correction
if(dim(x)[1]!=2 || dim(x)[2]!=2) # check for 2x2 matrix
stop("Yates' correction requires a 2 x 2 matrix")
if((x[1,1]*x[2,2])-(x[1,2]*x[2,1]) > 0)
{
x[1,1] <- x[1,1] - 0.5
x[2,2] <- x[2,2] - 0.5
x[1,2] <- x[1,2] + 0.5
x[2,1] <- x[2,1] + 0.5
}
else
{
x[1,1] <- x[1,1] + 0.5
x[2,2] <- x[2,2] + 0.5
x[1,2] <- x[1,2] - 0.5
x[2,1] <- x[2,1] - 0.5
}
}
# calculate G (Zar, 2000)
for (i in 1:nrows){
for (j in 1:ncols){
if (x[i,j] != 0) cell.tot <- cell.tot + x[i,j] * log10(x[i,j])
}
}
for (i in 1:nrows){ row.tot <- row.tot + (sum(x[i,])) * log10(sum(x[i,])) }
for (j in 1:ncols){ col.tot <- col.tot + (sum(x[,j])) * log10(sum(x[,j])) }
grand.tot <- sum(x)*log10(sum(x))
total <- cell.tot - row.tot - col.tot + grand.tot
G <- 4.60517 * total
q <- 1
if (correct=="williams"){ # Do Williams' correction
row.tot <- col.tot <- 0
for (i in 1:nrows){ row.tot <- row.tot + 1/(sum(x[i,])) }
for (j in 1:ncols){ col.tot <- col.tot + 1/(sum(x[,j])) }
q <- 1+ ((n*row.tot-1)*(n*col.tot-1))/(6*n*(ncols-1)*(nrows-1))
}
G <- (G/q)
# end of old g.stat function
STATISTIC <- G
PARAMETER <- (nrow(x)-1)*(ncol(x)-1)
PVAL <- 1-pchisq(STATISTIC,df=PARAMETER)
if(correct=="none")
METHOD <- "Log likelihood ratio (G-test) test of independence without correction"
if(correct=="williams")
METHOD <- "Log likelihood ratio (G-test) test of independence with Williams' correction"
if(correct=="yates")
METHOD <- "Log likelihood ratio (G-test) test of independence with Yates' correction"
}
else {
# x is not a matrix, so we do Goodness of Fit
METHOD <- "Log likelihood ratio (G-test) goodness of fit test"
if (length(x) == 1)
stop("x must at least have 2 elements")
if (length(x) != length(p))
stop("x and p must have the same number of elements")
E <- n * p
if (correct=="yates"){ # Do Yates' correction
if(length(x)!=2)
stop("Yates' correction requires 2 data values")
if ( (x[1]-E[1]) > 0.25) {
x[1] <- x[1]-0.5
x[2] <- x[2]+0.5
}
else if ( (E[1]-x[1]) > 0.25){
x[1] <- x[1]+0.5
x[2] <- x[2]-0.5
}
}
names(E) <- names(x)
tot <- 0
for (i in 1:length(x)){
if (x[i] != 0) tot <- tot + x[i] * log(x[i]/E[i])
}
G <- (2*tot)
if (correct=="williams"){ # Do Williams' correction
q <- 1+(length(x)+1)/(6*n)
G <- (G/q)
}
STATISTIC <- (G)
PARAMETER <- length(x) - 1
PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE)
}
names(STATISTIC) <- "Log likelihood ratio statistic (G)"
names(PARAMETER) <- "X-squared df"
names(PVAL) <- "p.vlaue"
structure(list(statistic=STATISTIC,parameter=PARAMETER,p.value=PVAL,
method=METHOD,data.name=DNAME),class="htest")
}
--
Peter L. Hurd Department of Psychology
Assistant Professor University of Alberta
phurd at ualberta.ca Edmonton, Alberta
http://www.psych.ualberta.ca/~phurd T6G 2E9 Canada
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
More information about the R-help
mailing list