[R-sig-ME] lme/anova vs paired t test query

Mon May 23 20:55:32 CEST 2011

  It was my understanding that testing a two-level fixed effect in a
one-way randomized block ANOVA using REML should be identical to a
paired t test ... but, at least with the attempt below, using
lme(...,method="REML") and t.test(...,paired=TRUE,var.equal=TRUE) seems
to give slightly different results.  They're not different in any
important way, but the difference bothers me.  Can anyone see what I'm
missing?

  Ben Bolker

dat <- structure(list(sex = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("f",
"m"), class = "factor"), prevalence = c(0, 0.375, 0.133333333333333,
0.176470588235294, 0.1875, 0, 0, 1, 1, 0.5, 0.6, 0.333333333333333,
0.5, 0, 0.333333333333333, 0, 0.5, 0, 0.625, 0.333333333333333,
0.5, 0, 0.333333333333333, 0.153846153846154, 0.222222222222222,
0.5, 1, 0.5, 0, 0.277777777777778, 0.125, 0, 0, 0.428571428571429,
0.451612903225806, 0.362068965517241), tripsite = structure(c(1L,
1L, 4L, 4L, 14L, 14L, 5L, 5L, 8L, 8L, 15L, 15L, 6L, 6L, 9L, 9L,
11L, 11L, 16L, 16L, 2L, 2L, 7L, 7L, 10L, 10L, 13L, 13L, 17L,
17L, 3L, 3L, 12L, 12L, 18L, 18L), .Label = c("1.2", "4.2", "5.2",
"1.3", "2.3", "3.3", "4.3", "2.4", "3.4", "4.4", "3.5", "5.5",
"4.6", "1.9", "2.9", "3.9", "4.9", "5.9"), class = "factor")), .Names =
c("sex",
"prevalence", "tripsite"), row.names = c(1L, 2L, 3L, 4L, 9L,
10L, 11L, 12L, 13L, 14L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 38L, 39L, 40L,
41L, 42L, 43L, 45L, 46L), class = "data.frame")

t0 <-
with(dat,t.test(prevalence[sex=="f"],prevalence[sex=="m"],paired=TRUE,var.equal=TRUE))

library(nlme)

(a1 <- anova(lme(prevalence~sex,random=~1|tripsite,data=dat,method="REML")))

(fstat0 <- t0$statistic^2)     ## 0.789627
(fstat1 <- a1[["F-value"]][2]) ## 0.8056624

tmpf <- function(dat) {
  t0 <-
with(dat,t.test(prevalence[sex=="f"],prevalence[sex=="m"],paired=TRUE,var.equal=TRUE))
  (a1 <-
anova(lme(prevalence~sex,random=~1|tripsite,data=dat,method="REML")))
  (fstat0 <- t0$statistic^2)     ## 0.789627
  (fstat1 <- a1[["F-value"]][2]) ## 0.8056624
  fstat0-fstat1
}

  If I then redo this with random data, the results are within 1e-5
set.seed(1001)
rr <- replicate(500,tmpf(transform(dat,prevalence=rnorm(nrow(dat)))))
##
##  summary(rr)
##      Min.    1st Qu.     Median       Mean    3rd Qu.       Max.
## -1.7770000 -0.0568400 -0.0000015 -0.0722900  0.0000000  0.0000139

mean(abs(rr)<1e-5)
## 0.502

 hist(log10(abs(rr)),breaks=50,col="gray")

## this is sort of interesting, suggesting a bimodal distribution with
one component (the 'good ones' centered at 10^{-8} and another centered
at 0.1 ...