[R] Strange Problem with "proj" and "aov" for split-plot analysis output
rab
rab at nauticom.net
Sun Aug 1 18:14:39 CEST 2004
I'm using R 1.8.1 on Red Hat Linux 9. I've worked out the linear model
decomposition by hand. Using "aov" with "Error" to fit a split-plot
model, I get very different results depending on whether I use the
"data" argument for "aov" or not. When I use the "data" argument, the
ANOVA table from "summary" is correct but the results "proj" are
comletely wrong (in particular, the residuals for both strata). The same
with the results from "model.tables" for effects and even means.
However, if I attach the data frame and I don't use the "data" argument
with "aov", the results for everything appear to be correct and agrees
completely with the manual decomposition. (The sums of squares based on
the components in the decomposition table agree completely with the
"summary" of the "aov" object.) Is this a known problem? I also tried
this on the Montana Rweb site and obtained the same incorrect results
when usign the "data" argument. I've checked to make sure that the
vector object names are not used outside of the data frame for the
experiment data on my computer. There is no way this could happen when
using Rweb.
Rick B.
Here are the details:
# here is the experiment data
> choco.split.04
subject brand ctime type
1 christian h 104.000 mc
3 christina h 33.500 mc
5 ERIN h 46.000 mc
7 gautam h 25.295 mc
11 joe h 75.000 mc
15 Lei h 44.500 mc
9 helen n 42.500 mc
13 jonathon n 55.500 mc
17 pablo n 47.000 mc
19 purin n 97.000 mc
21 scott n 85.000 mc
23 vince n 65.850 mc
2 christian h 168.000 wc
4 christina h 37.500 wc
6 ERIN h 45.500 wc
8 gautam h 28.860 wc
12 joe h 70.000 wc
16 Lei h 52.500 wc
10 helen n 57.500 wc
14 jonathon n 46.000 wc
18 pablo n 58.000 wc
20 purin n 113.000 wc
22 scott n 97.500 wc
24 vince n 89.650 wc
# data as an R object
choco.split.04 <-
structure(list(subject = structure(c(1, 2, 3, 4, 6, 8, 5, 7,
9, 10, 11, 12, 1, 2, 3, 4, 6, 8, 5, 7, 9, 10, 11, 12), class = "factor",
.Label = c("christian",
"christina", "ERIN", "gautam", "helen", "joe", "jonathon", "Lei",
"pablo", "purin", "scott", "vince")), brand = structure(c(1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
2, 2), class = "factor", .Label = c("h", "n")), ctime = c(104,
33.5, 46, 25.295, 75, 44.5, 42.5, 55.5, 47, 97, 85, 65.85, 168,
37.5, 45.5, 28.86, 70, 52.5, 57.5, 46, 58, 113, 97.5, 89.65),
type = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), class = "factor", .Label = c("mc",
"wc"))), .Names = c("subject", "brand", "ctime", "type"), row.names
= c("1",
"3", "5", "7", "11", "15", "9", "13", "17", "19", "21", "23",
"2", "4", "6", "8", "12", "16", "10", "14", "18", "20", "22",
"24"), class = "data.frame")
# split-plot analysis - using "aov" with argument "data"
> summary(aov.split.04 <-
aov(ctime~brand*type+Error(subject),data=choco.split.04))
Error: subject
Df Sum Sq Mean Sq F value Pr(>F)
brand 1 639.1 639.1 0.2972 0.5976
Residuals 10 21503.3 2150.3
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
type 1 850.43 850.43 4.3326 0.06404 .
brand:type 1 1.16 1.16 0.0059 0.94037
Residuals 10 1962.86 196.29
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
# here is the "proj" output - and it's all completely wrong
> proj(aov.split.04)
(Intercept) :
(Intercept)
1 66.04813
3 66.04813
5 66.04813
7 66.04813
11 66.04813
15 66.04813
9 66.04813
13 66.04813
17 66.04813
19 66.04813
21 66.04813
23 66.04813
2 66.04813
4 66.04813
6 66.04813
8 66.04813
12 66.04813
16 66.04813
10 66.04813
14 66.04813
18 66.04813
20 66.04813
22 66.04813
24 66.04813
attr(,"df")
attr(,"df")$df
(Intercept)
1
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$"(Intercept)"
[1] "(Intercept)"
subject :
brand Residuals
1 -1.15279697 -50.326456
3 -1.49365647 -53.795729
5 7.94608934 -2.679088
7 -9.57184562 9.110149
11 8.31691964 31.336764
15 0.08387137 -8.782543
9 11.06119883 45.752063
13 -7.01978726 25.900087
17 -0.10587652 3.956638
19 -0.09918658 3.706633
21 -0.09353078 3.495274
23 -0.08867137 3.313676
2 -2.17537547 88.387613
4 -1.83451597 -0.519157
6 11.33663826 -52.337849
8 -4.60767598 7.731579
12 -6.06482008 -35.264686
16 -0.08387137 8.782543
10 1.75243781 -10.962735
14 -6.49281005 -2.332554
18 0.10587652 -3.956638
20 0.09918658 -3.706633
22 0.09353078 -3.495274
24 0.08867137 -3.313676
attr(,"df")
attr(,"df")$df
brand Residuals
1 10
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$brand
[1] "brand"
attr(,"factors")$Residuals
[1] "subject"
Within :
type brand:type Residuals
1 1.396984343 0.04496907 3.011473
3 2.938588912 0.05656711 2.953101
5 3.145124762 0.05812095 2.945280
7 3.247625778 0.05889210 2.941399
11 -3.328231709 -0.22912276 5.730411
15 -2.538082170 -0.31105255 -25.391330
9 4.266448221 0.08884555 3.630619
13 -3.992792267 -0.27283438 3.666296
17 -10.936223358 -0.46063495 6.604985
19 -9.099857196 0.13554870 10.373726
21 -12.073986774 0.35505329 -3.922691
23 -12.879482604 0.38872464 -8.547527
2 6.021798052 0.07976318 2.836355
4 4.480193482 0.06816515 2.894728
6 4.273657632 0.06661130 2.902548
8 4.171156617 0.06584015 2.906430
12 -1.324148016 -0.18585237 6.575126
16 5.418913496 0.02243897 -28.523763
10 3.152334173 0.03588670 2.217210
14 9.399714308 0.30761541 3.257817
18 5.342284161 -0.47550361 -1.871322
20 -1.793920282 0.15803004 -5.774710
22 0.713084037 -0.03900621 2.788084
24 -0.001183597 -0.01706548 5.795756
attr(,"df")
attr(,"df")$df
type brand:type Residuals
1 1 10
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$type
[1] "type"
attr(,"factors")$"brand:type"
[1] "brand" "type"
attr(,"factors")$Residuals
[1] "subject" "Within"
# here is the "model.tables" output - and it's all completely wrong
> model.tables(aov.split.04)
Tables of effects
brand
h n
0.05825 -0.05825
rep 12.00000 12.00000
type
mc wc
-3.321 3.321
rep 12.000 12.000
brand:type
type
brand mc wc
h -0.054 0.019
rep 6.000 6.000
n 0.039 -0.005
rep 6.000 6.000
# here is "aov" without "data" argument - looks the same as before
> attach(choco.split.04)
> summary(aov.split.04 <- aov(ctime~brand*type+Error(subject)))
Error: subject
Df Sum Sq Mean Sq F value Pr(>F)
brand 1 639.1 639.1 0.2972 0.5976
Residuals 10 21503.3 2150.3
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
type 1 850.43 850.43 4.3326 0.06404 .
brand:type 1 1.16 1.16 0.0059 0.94037
Residuals 10 1962.86 196.29
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
# here is the "proj" output and it's correct
# as shown by manual decomposition
> proj(aov.split.04)
(Intercept) :
(Intercept)
1 66.04813
2 66.04813
3 66.04813
4 66.04813
5 66.04813
6 66.04813
7 66.04813
8 66.04813
9 66.04813
10 66.04813
11 66.04813
12 66.04813
13 66.04813
14 66.04813
15 66.04813
16 66.04813
17 66.04813
18 66.04813
19 66.04813
20 66.04813
21 66.04813
22 66.04813
23 66.04813
24 66.04813
attr(,"df")
attr(,"df")$df
(Intercept)
1
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$"(Intercept)"
[1] "(Intercept)"
subject :
brand Residuals
1 -5.160208 75.112083
2 -5.160208 -25.387917
3 -5.160208 -15.137917
4 -5.160208 -33.810417
5 -5.160208 11.612083
6 -5.160208 -12.387917
7 5.160208 -21.208333
8 5.160208 -20.458333
9 5.160208 -18.708333
10 5.160208 33.791667
11 5.160208 20.041667
12 5.160208 6.541667
13 -5.160208 75.112083
14 -5.160208 -25.387917
15 -5.160208 -15.137917
16 -5.160208 -33.810417
17 -5.160208 11.612083
18 -5.160208 -12.387917
19 5.160208 -21.208333
20 5.160208 -20.458333
21 5.160208 -18.708333
22 5.160208 33.791667
23 5.160208 20.041667
24 5.160208 6.541667
attr(,"df")
attr(,"df")$df
brand Residuals
1 10
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$brand
[1] "brand"
attr(,"factors")$Residuals
[1] "subject"
Within :
type brand:type Residuals
1 -5.952708 -0.219375 -25.8279167
2 -5.952708 -0.219375 4.1720833
3 -5.952708 -0.219375 6.4220833
4 -5.952708 -0.219375 4.3895833
5 -5.952708 -0.219375 8.6720833
6 -5.952708 -0.219375 2.1720833
7 -5.952708 0.219375 -1.7666667
8 -5.952708 0.219375 10.4833333
9 -5.952708 0.219375 0.2333333
10 -5.952708 0.219375 -2.2666667
11 -5.952708 0.219375 -0.5166667
12 -5.952708 0.219375 -6.1666667
13 5.952708 0.219375 25.8279167
14 5.952708 0.219375 -4.1720833
15 5.952708 0.219375 -6.4220833
16 5.952708 0.219375 -4.3895833
17 5.952708 0.219375 -8.6720833
18 5.952708 0.219375 -2.1720833
19 5.952708 -0.219375 1.7666667
20 5.952708 -0.219375 -10.4833333
21 5.952708 -0.219375 -0.2333333
22 5.952708 -0.219375 2.2666667
23 5.952708 -0.219375 0.5166667
24 5.952708 -0.219375 6.1666667
attr(,"df")
attr(,"df")$df
type brand:type Residuals
1 1 10
attr(,"onedf")
attr(,"onedf")$onedf
[1] FALSE
attr(,"factors")
attr(,"factors")$type
[1] "type"
attr(,"factors")$"brand:type"
[1] "brand" "type"
attr(,"factors")$Residuals
[1] "subject" "Within"
# here is "model.tables" output and it's correct
> model.tables(aov.split.04)
Tables of effects
brand
h n
-5.16 5.16
rep 12.00 12.00
type
mc wc
-5.953 5.953
rep 12.000 12.000
brand:type
type
brand mc wc
h -0.219 0.219
rep 6.000 6.000
n 0.219 -0.219
rep 6.000 6.000
# version information
> R.Version()
$platform
[1] "i686-pc-linux-gnu"
$arch
[1] "i686"
$os
[1] "linux-gnu"
$system
[1] "i686, linux-gnu"
$status
[1] ""
$major
[1] "1"
$minor
[1] "8.1"
$year
[1] "2003"
$month
[1] "11"
$day
[1] "21"
$language
[1] "R"
More information about the R-help
mailing list