[R-sig-eco] Is there a package that does a G test?
Gianandrea La Porta
glaporta at freeweb.org
Thu Jul 17 08:20:56 CEST 2008
James Milks <james.milks at ...> writes:
>
> I have data with the following structure:
>
> Site Species #Vines DBH
> 1 1 0 12.3
> 1 1 4 17
> 1 2 12 48
> 1 3 1 24
> 1 3 0 19
> 2 1 2 15.8
> 2 2 4 20
> 2 3 3 30
> 2 2 9 60
> 3 3 13 23
> 3 2 2 44
> 3 1 0 13
>
> I'm interested in doing a G test to test for differences between
> sites, species, and species within sites using (#Vines / DBH) as the
> dependent variable. Are there any packages that perform a G test on
> that sort of data?
>
> Thanks in advance.
>
> Jim Milks
>
> Degree Candidate
> Environmental Sciences Ph.D. Program
> Wright State University
> Dayton, OH 45435
>
Dear James,
I had the same necessity and I found a solution using this code.
I hope this helps. Cheers,
Gianandrea La Porta
Dr. Gianandrea La Porta
Dip. Biologia Cellulare e Ambientale
Università degli Studi di Perugia
via Pascoli, 06123 Perugia
tel. 075.5855711
web: www.bio.unipg.it/staff/gianandrea.htm
Server gpg hkp://subkeys.pgp.net
------
# CODE g.test
# Log-likelihood tests of independence & goodness of fit
# Does Williams' and Yates' correction
# does Monte Carlo simulation of p-values, via gtestsim.c
#
# G & q calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# TOI Yates' correction taken from Mike Camann's 2x2 G-test fn.
# GOF Yates' correction as described in Zar (2000)
# more stuff taken from ctest's chisq.test()
#
# ToDo:
# 1) Beautify
# 2) Add warnings for violations
# 3) Make appropriate corrections happen by default
#
# V3.3 Pete Hurd Sept 29 2001. phurd at ualberta.ca
g.test <- function(x, y = NULL, correct="none",
p = rep(1/length(x), length(x)), simulate.p.value = FALSE, B = 2000)
{
DNAME <- deparse(substitute(x))
if (is.data.frame(x)) x <- as.matrix(x)
if (is.matrix(x)) {
if (min(dim(x)) == 1)
x <- as.vector(x)
}
if (!is.matrix(x) && !is.null(y)) {
if (length(x) != length(y))
stop("x and y must have the same length")
DNAME <- paste(DNAME, "and", deparse(substitute(y)))
OK <- complete.cases(x, y)
x <- as.factor(x[OK])
y <- as.factor(y[OK])
if ((nlevels(x) < 2) || (nlevels(y) < 2))
stop("x and y must have at least 2 levels")
x <- table(x, y)
}
if (any(x < 0) || any(is.na(x)))
stop("all entries of x must be nonnegative and finite")
if ((n <- sum(x)) == 0)
stop("at least one entry of x must be positive")
#If x is matrix, do test of independence
if (is.matrix(x)) {
#Test of Independence
nrows<-nrow(x)
ncols<-ncol(x)
if (correct=="yates"){ # Do Yates' correction?
if(dim(x)[1]!=2 || dim(x)[2]!=2) # check for 2x2 matrix
stop("Yates' correction requires a 2 x 2 matrix")
if((x[1,1]*x[2,2])-(x[1,2]*x[2,1]) > 0)
{
x[1,1] <- x[1,1] - 0.5
x[2,2] <- x[2,2] - 0.5
x[1,2] <- x[1,2] + 0.5
x[2,1] <- x[2,1] + 0.5
}
else
{
x[1,1] <- x[1,1] + 0.5
x[2,2] <- x[2,2] + 0.5
x[1,2] <- x[1,2] - 0.5
x[2,1] <- x[2,1] - 0.5
}
}
sr <- apply(x,1,sum)
sc <- apply(x,2,sum)
E <- outer(sr,sc, "*")/n
# are we doing a monte-carlo?
# no monte carlo GOF?
if (simulate.p.value){
METHOD <- paste("Log likelihood ratio (G-test) test of independence\n\t
with simulated p-value based on", B, "replicates")
tmp <- .C("gtestsim", as.integer(nrows), as.integer(ncols),
as.integer(sr), as.integer(sc), as.integer(n), as.integer(B),
as.double(E), integer(nrows * ncols), double(n+1),
integer(ncols), results=double(B), PACKAGE= "ctest")
g <- 0
for (i in 1:nrows){
for (j in 1:ncols){
if (x[i,j] != 0) g <- g + x[i,j] * log(x[i,j]/E[i,j])
}
}
STATISTIC <- G <- 2 * g
PARAMETER <- NA
PVAL <- sum(tmp$results >= STATISTIC)/B
}
else {
# no monte-carlo
# calculate G
g <- 0
for (i in 1:nrows){
for (j in 1:ncols){
if (x[i,j] != 0) g <- g + x[i,j] * log(x[i,j]/E[i,j])
}
}
q <- 1
if (correct=="williams"){ # Do Williams' correction
row.tot <- col.tot <- 0
for (i in 1:nrows){ row.tot <- row.tot + 1/(sum(x[i,])) }
for (j in 1:ncols){ col.tot <- col.tot + 1/(sum(x[,j])) }
q <- 1+ ((n*row.tot-1)*(n*col.tot-1))/(6*n*(ncols-1)*(nrows-1))
}
STATISTIC <- G <- 2 * g / q
PARAMETER <- (nrow(x)-1)*(ncol(x)-1)
PVAL <- 1-pchisq(STATISTIC,df=PARAMETER)
if(correct=="none")
METHOD <- "Log likelihood ratio (G-test) test of independence without
correction"
if(correct=="williams")
METHOD <- "Log likelihood ratio (G-test) test of independence with
Williams' correction"
if(correct=="yates")
METHOD <- "Log likelihood ratio (G-test) test of independence with
Yates' correction"
}
}
else {
# x is not a matrix, so we do Goodness of Fit
METHOD <- "Log likelihood ratio (G-test) goodness of fit test"
if (length(x) == 1)
stop("x must at least have 2 elements")
if (length(x) != length(p))
stop("x and p must have the same number of elements")
E <- n * p
if (correct=="yates"){ # Do Yates' correction
if(length(x)!=2)
stop("Yates' correction requires 2 data values")
if ( (x[1]-E[1]) > 0.25) {
x[1] <- x[1]-0.5
x[2] <- x[2]+0.5
}
else if ( (E[1]-x[1]) > 0.25){
x[1] <- x[1]+0.5
x[2] <- x[2]-0.5
}
}
names(E) <- names(x)
g <- 0
for (i in 1:length(x)){
if (x[i] != 0) g <- g + x[i] * log(x[i]/E[i])
}
q <- 1
if (correct=="williams"){ # Do Williams' correction
q <- 1+(length(x)+1)/(6*n)
}
STATISTIC <- G <- 2*g/q
PARAMETER <- length(x) - 1
PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE)
}
names(STATISTIC) <- "Log likelihood ratio statistic (G)"
names(PARAMETER) <- "X-squared df"
names(PVAL) <- "p.value"
structure(list(statistic=STATISTIC,parameter=PARAMETER,p.value=PVAL,
method=METHOD,data.name=DNAME, observed=x, expected=E),
class="htest")
}
More information about the R-sig-ecology
mailing list