[R] Strange Problem with "proj" and "aov" for split-plot analysis output

Sun Aug 1 18:14:39 CEST 2004

I'm using R 1.8.1 on Red Hat Linux 9. I've worked out the linear model 
decomposition by hand. Using "aov" with "Error" to fit a split-plot 
model, I get very different results depending on whether I use the 
"data" argument for "aov" or not. When I use the "data" argument, the 
ANOVA table from "summary" is correct but the results "proj" are 
comletely wrong (in particular, the residuals for both strata). The same 
with the results from "model.tables" for effects and even means. 
However, if I attach the data frame and I don't use the "data" argument 
with "aov", the results for everything appear to be correct and agrees 
completely with the manual decomposition. (The sums of squares based on 
the components in the decomposition table agree completely with the 
"summary" of the "aov" object.) Is this a known problem? I also tried 
this on the Montana Rweb site and obtained the same incorrect results 
when usign the "data" argument. I've checked to make sure that the 
vector object names are not used outside of the data frame for the 
experiment data on my computer. There is no way this could happen when 
using Rweb.

Rick B.

Here are the details:

# here is the experiment data
 > choco.split.04
     subject brand   ctime type
1  christian     h 104.000   mc
3  christina     h  33.500   mc
5       ERIN     h  46.000   mc
7     gautam     h  25.295   mc
11       joe     h  75.000   mc
15       Lei     h  44.500   mc
9      helen     n  42.500   mc
13  jonathon     n  55.500   mc
17     pablo     n  47.000   mc
19     purin     n  97.000   mc
21     scott     n  85.000   mc
23     vince     n  65.850   mc
2  christian     h 168.000   wc
4  christina     h  37.500   wc
6       ERIN     h  45.500   wc
8     gautam     h  28.860   wc
12       joe     h  70.000   wc
16       Lei     h  52.500   wc
10     helen     n  57.500   wc
14  jonathon     n  46.000   wc
18     pablo     n  58.000   wc
20     purin     n 113.000   wc
22     scott     n  97.500   wc
24     vince     n  89.650   wc
# data as an R object
choco.split.04 <-
structure(list(subject = structure(c(1, 2, 3, 4, 6, 8, 5, 7,
9, 10, 11, 12, 1, 2, 3, 4, 6, 8, 5, 7, 9, 10, 11, 12), class = "factor", 
.Label = c("christian",
"christina", "ERIN", "gautam", "helen", "joe", "jonathon", "Lei",
"pablo", "purin", "scott", "vince")), brand = structure(c(1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
2, 2), class = "factor", .Label = c("h", "n")), ctime = c(104,
33.5, 46, 25.295, 75, 44.5, 42.5, 55.5, 47, 97, 85, 65.85, 168,
37.5, 45.5, 28.86, 70, 52.5, 57.5, 46, 58, 113, 97.5, 89.65),
    type = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), class = "factor", .Label = c("mc",
    "wc"))), .Names = c("subject", "brand", "ctime", "type"), row.names 
= c("1",
"3", "5", "7", "11", "15", "9", "13", "17", "19", "21", "23",
"2", "4", "6", "8", "12", "16", "10", "14", "18", "20", "22",
"24"), class = "data.frame")
# split-plot analysis - using "aov" with argument "data"
 > summary(aov.split.04 <- 
aov(ctime~brand*type+Error(subject),data=choco.split.04))

Error: subject
          Df  Sum Sq Mean Sq F value Pr(>F)
brand      1   639.1   639.1  0.2972 0.5976
Residuals 10 21503.3  2150.3

Error: Within
           Df  Sum Sq Mean Sq F value  Pr(>F)
type        1  850.43  850.43  4.3326 0.06404 .
brand:type  1    1.16    1.16  0.0059 0.94037
Residuals  10 1962.86  196.29
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

# here is the "proj" output - and it's all completely wrong

 > proj(aov.split.04)
(Intercept) :
   (Intercept)
1     66.04813
3     66.04813
5     66.04813
7     66.04813
11    66.04813
15    66.04813
9     66.04813
13    66.04813
17    66.04813
19    66.04813
21    66.04813
23    66.04813
2     66.04813
4     66.04813
6     66.04813
8     66.04813
12    66.04813
16    66.04813
10    66.04813
14    66.04813
18    66.04813
20    66.04813
22    66.04813
24    66.04813
attr(,"df")
attr(,"df")$df
(Intercept)
          1

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$"(Intercept)"
[1] "(Intercept)"

subject :
         brand  Residuals
1  -1.15279697 -50.326456
3  -1.49365647 -53.795729
5   7.94608934  -2.679088
7  -9.57184562   9.110149
11  8.31691964  31.336764
15  0.08387137  -8.782543
9  11.06119883  45.752063
13 -7.01978726  25.900087
17 -0.10587652   3.956638
19 -0.09918658   3.706633
21 -0.09353078   3.495274
23 -0.08867137   3.313676
2  -2.17537547  88.387613
4  -1.83451597  -0.519157
6  11.33663826 -52.337849
8  -4.60767598   7.731579
12 -6.06482008 -35.264686
16 -0.08387137   8.782543
10  1.75243781 -10.962735
14 -6.49281005  -2.332554
18  0.10587652  -3.956638
20  0.09918658  -3.706633
22  0.09353078  -3.495274
24  0.08867137  -3.313676
attr(,"df")
attr(,"df")$df
    brand Residuals
        1        10

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$brand
[1] "brand"

attr(,"factors")$Residuals
[1] "subject"

Within :
            type  brand:type  Residuals
1    1.396984343  0.04496907   3.011473
3    2.938588912  0.05656711   2.953101
5    3.145124762  0.05812095   2.945280
7    3.247625778  0.05889210   2.941399
11  -3.328231709 -0.22912276   5.730411
15  -2.538082170 -0.31105255 -25.391330
9    4.266448221  0.08884555   3.630619
13  -3.992792267 -0.27283438   3.666296
17 -10.936223358 -0.46063495   6.604985
19  -9.099857196  0.13554870  10.373726
21 -12.073986774  0.35505329  -3.922691
23 -12.879482604  0.38872464  -8.547527
2    6.021798052  0.07976318   2.836355
4    4.480193482  0.06816515   2.894728
6    4.273657632  0.06661130   2.902548
8    4.171156617  0.06584015   2.906430
12  -1.324148016 -0.18585237   6.575126
16   5.418913496  0.02243897 -28.523763
10   3.152334173  0.03588670   2.217210
14   9.399714308  0.30761541   3.257817
18   5.342284161 -0.47550361  -1.871322
20  -1.793920282  0.15803004  -5.774710
22   0.713084037 -0.03900621   2.788084
24  -0.001183597 -0.01706548   5.795756
attr(,"df")
attr(,"df")$df
      type brand:type  Residuals
         1          1         10

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$type
[1] "type"

attr(,"factors")$"brand:type"
[1] "brand" "type"

attr(,"factors")$Residuals
[1] "subject" "Within"

# here is the "model.tables" output - and it's all completely wrong

 > model.tables(aov.split.04)
Tables of effects

 brand
           h        n
     0.05825 -0.05825
rep 12.00000 12.00000

 type
        mc     wc
    -3.321  3.321
rep 12.000 12.000

 brand:type
     type
brand mc     wc
  h   -0.054  0.019
  rep  6.000  6.000
  n    0.039 -0.005
  rep  6.000  6.000

# here is "aov" without "data" argument - looks the same as before
 > attach(choco.split.04)
 > summary(aov.split.04 <- aov(ctime~brand*type+Error(subject)))

Error: subject
          Df  Sum Sq Mean Sq F value Pr(>F)
brand      1   639.1   639.1  0.2972 0.5976
Residuals 10 21503.3  2150.3

Error: Within
           Df  Sum Sq Mean Sq F value  Pr(>F)
type        1  850.43  850.43  4.3326 0.06404 .
brand:type  1    1.16    1.16  0.0059 0.94037
Residuals  10 1962.86  196.29
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

# here is the "proj" output and it's correct
# as shown by manual decomposition
 > proj(aov.split.04)
(Intercept) :
   (Intercept)
1     66.04813
2     66.04813
3     66.04813
4     66.04813
5     66.04813
6     66.04813
7     66.04813
8     66.04813
9     66.04813
10    66.04813
11    66.04813
12    66.04813
13    66.04813
14    66.04813
15    66.04813
16    66.04813
17    66.04813
18    66.04813
19    66.04813
20    66.04813
21    66.04813
22    66.04813
23    66.04813
24    66.04813
attr(,"df")
attr(,"df")$df
(Intercept)
          1

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$"(Intercept)"
[1] "(Intercept)"

subject :
       brand  Residuals
1  -5.160208  75.112083
2  -5.160208 -25.387917
3  -5.160208 -15.137917
4  -5.160208 -33.810417
5  -5.160208  11.612083
6  -5.160208 -12.387917
7   5.160208 -21.208333
8   5.160208 -20.458333
9   5.160208 -18.708333
10  5.160208  33.791667
11  5.160208  20.041667
12  5.160208   6.541667
13 -5.160208  75.112083
14 -5.160208 -25.387917
15 -5.160208 -15.137917
16 -5.160208 -33.810417
17 -5.160208  11.612083
18 -5.160208 -12.387917
19  5.160208 -21.208333
20  5.160208 -20.458333
21  5.160208 -18.708333
22  5.160208  33.791667
23  5.160208  20.041667
24  5.160208   6.541667
attr(,"df")
attr(,"df")$df
    brand Residuals
        1        10

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$brand
[1] "brand"

attr(,"factors")$Residuals
[1] "subject"

Within :
        type brand:type   Residuals
1  -5.952708  -0.219375 -25.8279167
2  -5.952708  -0.219375   4.1720833
3  -5.952708  -0.219375   6.4220833
4  -5.952708  -0.219375   4.3895833
5  -5.952708  -0.219375   8.6720833
6  -5.952708  -0.219375   2.1720833
7  -5.952708   0.219375  -1.7666667
8  -5.952708   0.219375  10.4833333
9  -5.952708   0.219375   0.2333333
10 -5.952708   0.219375  -2.2666667
11 -5.952708   0.219375  -0.5166667
12 -5.952708   0.219375  -6.1666667
13  5.952708   0.219375  25.8279167
14  5.952708   0.219375  -4.1720833
15  5.952708   0.219375  -6.4220833
16  5.952708   0.219375  -4.3895833
17  5.952708   0.219375  -8.6720833
18  5.952708   0.219375  -2.1720833
19  5.952708  -0.219375   1.7666667
20  5.952708  -0.219375 -10.4833333
21  5.952708  -0.219375  -0.2333333
22  5.952708  -0.219375   2.2666667
23  5.952708  -0.219375   0.5166667
24  5.952708  -0.219375   6.1666667
attr(,"df")
attr(,"df")$df
      type brand:type  Residuals
         1          1         10

attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE

attr(,"factors")
attr(,"factors")$type
[1] "type"

attr(,"factors")$"brand:type"
[1] "brand" "type"

attr(,"factors")$Residuals
[1] "subject" "Within"

# here is "model.tables" output and it's correct
 > model.tables(aov.split.04)
Tables of effects

 brand
        h     n
    -5.16  5.16
rep 12.00 12.00

 type
        mc     wc
    -5.953  5.953
rep 12.000 12.000

 brand:type
     type
brand mc     wc
  h   -0.219  0.219
  rep  6.000  6.000
  n    0.219 -0.219
  rep  6.000  6.000

# version information
 > R.Version()
$platform
[1] "i686-pc-linux-gnu"

$arch
[1] "i686"

$os
[1] "linux-gnu"

$system
[1] "i686, linux-gnu"

$status
[1] ""

$major
[1] "1"

$minor
[1] "8.1"

$year
[1] "2003"

$month
[1] "11"

$day
[1] "21"

$language
[1] "R"