[BioC] Help expanding limmaUsersGuide section 8.7 to 3x2 factorial
Jenny Drnevich
drnevich at illinois.edu
Tue Jan 11 21:02:27 CET 2011
Hi everyone,
This is probably more of a general stats questions, but I'm using the
limmaUsersGuide as my example, so I hope that qualifies for this
mailing list. Section 8.7 goes through 3 different parametrizations
of a 2x2 factorial design, although they are all mathematically
equivalent. I have no trouble showing this with my own data, however
I run into trouble when try to expand to a 3x2 factorial and compare
the cell-means model (example 1) and the sum-to-zero parametrization
(example 3). I'm trying to re-create the same coefficients as the
sum-to-zero parametrization using contrasts from the cell-means
model, but I'm not getting the right coefficient values and p-values
for the coefficients involving the factor with 3 levels. However, I
am getting the same values if I calculate the overall F-test or the
F-test for the main effect of the factor with 3 levels. I actually
need to expand this to a 4x2 factorial, and I'm running both model
parametrizations in my analyses depending on the contrasts I need to
get (some are easier to get from one model versus the other). I
started this little comparison as a sanity check, but now it's
driving me insane because I can't figure out what I'm doing wrong! I
tried to search the BioC list, the R list and Google, but the
question is a little complicated to search and I couldn't find
anything useful. Below is my full example code using data from just 2
genes; I attached the FakeData.csv file - hopefully it will come
through, but everything is shown in the code below. Would someone
please show me what I'm doing wrong?
Thank you!
Jenny
>
> library(limma)
>
> fakeData <- read.csv("FakeData.csv",stringsAsFactors=F)
> fakeData
Sample Group Pop Trt RNA1 RNA2
1 1 BMAc BMA c 7.023854 6.153886
2 2 BMAc BMA c 7.739991 5.866103
3 3 BMAc BMA c 7.564132 6.638336
4 4 BMAc BMA c 7.270778 6.594744
5 5 BMAe BMA e 6.810056 6.533012
6 6 BMAe BMA e 6.205453 5.442167
7 7 BMAe BMA e 5.676583 6.026147
8 8 BMAe BMA e 6.810335 5.748206
9 9 BVOc BVO c 6.810056 5.816311
10 10 BVOc BVO c 7.516394 6.753504
11 11 BVOc BVO c 8.569809 6.216047
12 12 BVOc BVO c 7.866806 5.781871
13 13 BVOe BVO e 6.617231 5.788481
14 14 BVOe BVO e 7.699425 6.106843
15 15 BVOe BVO e 7.114598 7.175774
16 16 BVOe BVO e 6.491565 5.883128
17 17 BWFc BWF c 7.625755 5.903757
18 18 BWFc BWF c 7.846667 5.939844
19 19 BWFc BWF c 6.629416 5.928577
20 20 BWFc BWF c 7.801253 5.894263
21 21 BWFe BWF e 7.128194 5.842164
22 22 BWFe BWF e 7.217061 7.053346
23 23 BWFe BWF e 7.861897 7.055525
24 24 BWFe BWF e 6.889417 9.774177
25 25 CARc CAR c 8.118041 5.918385
26 26 CARc CAR c 6.178935 5.856860
27 27 CARc CAR c 7.456442 6.035643
28 28 CARc CAR c 7.538999 5.827788
29 29 CARe CAR e 6.980979 6.556491
30 30 CARe CAR e 7.614278 5.875536
31 31 CARe CAR e 5.898541 7.787749
32 32 CARe CAR e 5.947753 6.914975
>
> #do sum-to-zero parametrization for half of data (2X2) (3rd
example in limmaUsersGuide, section 8.7)
>
> Pop1 <- factor(fakeData$Pop[1:16])
> Trt1 <- factor(fakeData$Trt[1:16])
> contrasts(Pop1) <- contr.sum(2)
> contrasts(Trt1) <- contr.sum(2)
>
> design1.stz <- model.matrix(~Pop1*Trt1)
> design1.stz
(Intercept) Pop11 Trt11 Pop11:Trt11
1 1 1 1 1
2 1 1 1 1
3 1 1 1 1
4 1 1 1 1
5 1 1 -1 -1
6 1 1 -1 -1
7 1 1 -1 -1
8 1 1 -1 -1
9 1 -1 1 -1
10 1 -1 1 -1
11 1 -1 1 -1
12 1 -1 1 -1
13 1 -1 -1 1
14 1 -1 -1 1
15 1 -1 -1 1
16 1 -1 -1 1
attr(,"assign")
[1] 0 1 2 3
attr(,"contrasts")
attr(,"contrasts")$Pop1
[,1]
BMA 1
BVO -1
attr(,"contrasts")$Trt1
[,1]
c 1
e -1
>
> fit1.stz <-
eBayes(lmFit(as.matrix(t(fakeData[1:16,c("RNA1","RNA2")])),design1.stz))
>
>
> #Now do cell-mean model, again on half the data (1st example in
limmaUsersGuide, section 8.7)
>
> TS1 <- factor(fakeData$Group[1:16])
> design1.cm <- model.matrix(~0+TS1)
> colnames(design1.cm) <- levels(TS1)
> design1.cm
BMAc BMAe BVOc BVOe
1 1 0 0 0
2 1 0 0 0
3 1 0 0 0
4 1 0 0 0
5 0 1 0 0
6 0 1 0 0
7 0 1 0 0
8 0 1 0 0
9 0 0 1 0
10 0 0 1 0
11 0 0 1 0
12 0 0 1 0
13 0 0 0 1
14 0 0 0 1
15 0 0 0 1
16 0 0 0 1
attr(,"assign")
[1] 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$TS1
[1] "contr.treatment"
>
> #Make contrasts the same as the coefficients of design1.stz
> cont.matrix1 <- makeContrasts(Intercept = (BMAc + BMAe + BVOc + BVOe)/4,
+ Pop11 = (BMAc + BMAe - BVOc - BVOe)/4,
+ Trt11 = (BMAc - BMAe + BVOc - BVOe)/4,
+ Pop11.Trt11 = (BMAc - BMAe - BVOc + BVOe)/4,
+ levels=design1.cm)
>
> cont.matrix1
Contrasts
Levels Intercept Pop11 Trt11 Pop11.Trt11
BMAc 0.25 0.25 0.25 0.25
BMAe 0.25 0.25 -0.25 -0.25
BVOc 0.25 -0.25 0.25 -0.25
BVOe 0.25 -0.25 -0.25 0.25
>
>
> fit1.cm <- lmFit(as.matrix(t(fakeData[1:16,c("RNA1","RNA2")])),design1.cm)
> fit1.cm2 <- eBayes(contrasts.fit(fit1.cm,cont.matrix1))
>
> #Compare results
> fit1.stz$coef
(Intercept) Pop11 Trt11 Pop11:Trt11
RNA1 7.111692 -0.22404395 0.43353590 0.07850503
RNA2 6.157785 -0.03245994 0.06981525 0.11812693
>
> fit1.cm2$coef
Contrasts
Intercept Pop11 Trt11 Pop11.Trt11
RNA1 7.111692 -0.22404395 0.43353590 0.07850503
RNA2 6.157785 -0.03245994 0.06981525 0.11812693
> #all the same
>
> fit1.stz$p.value
(Intercept) Pop11 Trt11 Pop11:Trt11
RNA1 0 0.1003890 0.001476679 0.5648199
RNA2 0 0.8118525 0.608670022 0.3863506
>
> fit1.cm2$p.value
Contrasts
Intercept Pop11 Trt11 Pop11.Trt11
RNA1 0 0.1003890 0.001476679 0.5648199
RNA2 0 0.8118525 0.608670022 0.3863506
> #all the same
>
> #Do overall F-test
> topTable(fit1.stz,coef=2:4,sort.by="none")
ID Pop11 Trt11
Pop11.Trt11 AveExpr F P.Value adj.P.Val
1 RNA1 -0.22404395 0.43353590 0.07850503 7.111692 4.3794233
0.004346933 0.008693866
2 RNA2 -0.03245994 0.06981525 0.11812693 6.157785 0.3563916
0.784520156 0.784520156
>
> topTable(fit1.cm2,coef=2:4,sort.by="none")
ID Pop11 Trt11
Pop11.Trt11 AveExpr F P.Value adj.P.Val
1 RNA1 -0.22404395 0.43353590 0.07850503 7.111692 4.3794233
0.004346933 0.008693866
2 RNA2 -0.03245994 0.06981525 0.11812693 6.157785 0.3563916
0.784520156 0.784520156
>
>
all.equal(topTable(fit1.stz,coef=2:4,sort.by="none"),topTable(fit1.cm2,coef=2:4,sort.by="none"))
[1] TRUE
>
> #Ok, so these two model parametrizations give me the exact same results
>
>
> #Now expand to 3x2 factorial and compare models
>
> Pop2 <- factor(fakeData$Pop[1:24])
> Trt2 <- factor(fakeData$Trt[1:24])
> contrasts(Pop2) <- contr.sum(3)
> contrasts(Trt2) <- contr.sum(2)
>
> design2.stz <- model.matrix(~Pop2*Trt2)
> design2.stz
(Intercept) Pop21 Pop22 Trt21 Pop21:Trt21 Pop22:Trt21
1 1 1 0 1 1 0
2 1 1 0 1 1 0
3 1 1 0 1 1 0
4 1 1 0 1 1 0
5 1 1 0 -1 -1 0
6 1 1 0 -1 -1 0
7 1 1 0 -1 -1 0
8 1 1 0 -1 -1 0
9 1 0 1 1 0 1
10 1 0 1 1 0 1
11 1 0 1 1 0 1
12 1 0 1 1 0 1
13 1 0 1 -1 0 -1
14 1 0 1 -1 0 -1
15 1 0 1 -1 0 -1
16 1 0 1 -1 0 -1
17 1 -1 -1 1 -1 -1
18 1 -1 -1 1 -1 -1
19 1 -1 -1 1 -1 -1
20 1 -1 -1 1 -1 -1
21 1 -1 -1 -1 1 1
22 1 -1 -1 -1 1 1
23 1 -1 -1 -1 1 1
24 1 -1 -1 -1 1 1
attr(,"assign")
[1] 0 1 1 2 3 3
attr(,"contrasts")
attr(,"contrasts")$Pop2
[,1] [,2]
BMA 1 0
BVO 0 1
BWF -1 -1
attr(,"contrasts")$Trt2
[,1]
c 1
e -1
>
> fit2.stz <-
eBayes(lmFit(as.matrix(t(fakeData[1:24,c("RNA1","RNA2")])),design2.stz))
>
>
> TS2 <- factor(fakeData$Group[1:24])
> design2.cm <- model.matrix(~0+TS2)
> colnames(design2.cm) <- levels(TS2)
> design2.cm
BMAc BMAe BVOc BVOe BWFc BWFe
1 1 0 0 0 0 0
2 1 0 0 0 0 0
3 1 0 0 0 0 0
4 1 0 0 0 0 0
5 0 1 0 0 0 0
6 0 1 0 0 0 0
7 0 1 0 0 0 0
8 0 1 0 0 0 0
9 0 0 1 0 0 0
10 0 0 1 0 0 0
11 0 0 1 0 0 0
12 0 0 1 0 0 0
13 0 0 0 1 0 0
14 0 0 0 1 0 0
15 0 0 0 1 0 0
16 0 0 0 1 0 0
17 0 0 0 0 1 0
18 0 0 0 0 1 0
19 0 0 0 0 1 0
20 0 0 0 0 1 0
21 0 0 0 0 0 1
22 0 0 0 0 0 1
23 0 0 0 0 0 1
24 0 0 0 0 0 1
attr(,"assign")
[1] 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$TS2
[1] "contr.treatment"
>
> #Make contrasts the same as the coefficients of design1.stz
> #But I'm probably not doing this correctly!
>
> cont.matrix2 <- makeContrasts(Intercept = (BMAc + BMAe + BVOc +
BVOe + BWFc + BWFe)/6,
+ Pop21 = (BMAc + BMAe - BWFc - BWFe)/4,
+ Pop22 = (BVOc + BVOe - BWFc - BWFe)/4,
+ Trt21 = (BMAc - BMAe + BVOc - BVOe +
BWFc - BWFe)/6,
+ Pop21.Trt21 = (BMAc - BMAe - BWFc + BWFe)/4,
+ Pop22.Trt21 = (BVOc - BVOe - BWFc + BWFe)/4,
+ levels=design2.cm)
>
> cont.matrix2
Contrasts
Levels Intercept Pop21 Pop22 Trt21 Pop21.Trt21 Pop22.Trt21
BMAc 0.1666667 0.25 0.00 0.1666667 0.25 0.00
BMAe 0.1666667 0.25 0.00 -0.1666667 -0.25 0.00
BVOc 0.1666667 0.00 0.25 0.1666667 0.00 0.25
BVOe 0.1666667 0.00 0.25 -0.1666667 0.00 -0.25
BWFc 0.1666667 -0.25 -0.25 0.1666667 -0.25 -0.25
BWFe 0.1666667 -0.25 -0.25 -0.1666667 0.25 0.25
>
>
> fit2.cm <- lmFit(as.matrix(t(fakeData[1:24,c("RNA1","RNA2")])),design2.cm)
> fit2.cm2 <- eBayes(contrasts.fit(fit2.cm,cont.matrix2))
>
> #Compare results
> fit2.stz$coef
(Intercept) Pop21 Pop22 Trt21 Pop21:Trt21 Pop22:Trt21
RNA1 7.199447 -0.3117993 0.1362886 0.3226290 0.1894119 0.03240183
RNA2 6.329842 -0.2045172 -0.1395973 -0.2059053 0.3938475 0.15759367
>
> fit2.cm2$coef
Contrasts
Intercept Pop21 Pop22 Trt21 Pop21.Trt21 Pop22.Trt21
RNA1 7.199447 -0.2436550 -0.01961105 0.3226290 0.2056128 0.1271078
RNA2 6.329842 -0.2743158 -0.24185590 -0.2059053 0.4726444 0.3545174
>
> #The intercepts and Trt21 main effect the same, but not others
>
> fit2.stz$p.value
(Intercept) Pop21 Pop22 Trt21 Pop21:Trt21 Pop22:Trt21
RNA1 2.907470e-33 0.07375434 0.4245090 0.01094819 0.26936850 0.8486640
RNA2 1.328404e-28 0.34115782 0.5141817 0.17860405 0.07225298 0.4618975
>
> fit2.cm2$p.value
Contrasts
Intercept Pop21 Pop22 Trt21 Pop21.Trt21 Pop22.Trt21
RNA1 2.907470e-33 0.1049556 0.8938907 0.01094819 0.1686224 0.39016022
RNA2 1.328404e-28 0.1445138 0.1965220 0.17860405 0.0149244 0.06225909
>
> #same as the coefficients
>
> #Do overall F-test
> topTable(fit2.stz,coef=2:6,sort.by="none")
ID Pop21 Pop22 Trt21 Pop21.Trt21
Pop22.Trt21 AveExpr F P.Value adj.P.Val
1 RNA1 -0.3117993 0.1362886 0.3226290 0.1894119 0.03240183
7.199447 2.563622 0.04757618 0.06386501
2 RNA2 -0.2045172 -0.1395973 -0.2059053 0.3938475 0.15759367
6.329842 2.357795 0.06386501 0.06386501
>
> topTable(fit2.cm2,coef=2:6,sort.by="none")
ID Pop21 Pop22 Trt21 Pop21.Trt21
Pop22.Trt21 AveExpr F P.Value adj.P.Val
1 RNA1 -0.2436550 -0.01961105 0.3226290 0.2056128 0.1271078
7.199447 2.563622 0.04757618 0.06386501
2 RNA2 -0.2743158 -0.24185590 -0.2059053 0.4726444 0.3545174
6.329842 2.357795 0.06386501 0.06386501
>
> #despite differences in coefficients, the overall F-stats and
p-values are the same
>
> #Do F-test for main effect of Population
> topTable(fit2.stz,coef=2:3,sort.by="none")
ID Pop21 Pop22 AveExpr F P.Value adj.P.Val
1 RNA1 -0.3117993 0.1362886 7.199447 1.723965 0.1953264 0.2770150
2 RNA2 -0.2045172 -0.1395973 6.329842 1.339402 0.2770150 0.2770150
>
> topTable(fit2.cm2,coef=2:3,sort.by="none")
ID Pop21 Pop22 AveExpr F P.Value adj.P.Val
1 RNA1 -0.2436550 -0.01961105 7.199447 1.723965 0.1953264 0.2770150
2 RNA2 -0.2743158 -0.24185590 6.329842 1.339402 0.2770150 0.2770150
>
> #Again, F and p-values the same but not coefficients
>
> #Do F-test for interaction term
>
> topTable(fit2.stz,coef=5:6,sort.by="none")
ID Pop21.Trt21 Pop22.Trt21 AveExpr F P.Value adj.P.Val
1 RNA1 0.1894119 0.03240183 7.199447 1.012854 0.37510055 0.37510055
2 RNA2 0.3938475 0.15759367 6.329842 3.607216 0.03928066 0.07856131
>
> topTable(fit2.cm2,coef=5:6,sort.by="none")
ID Pop21.Trt21 Pop22.Trt21 AveExpr F P.Value adj.P.Val
1 RNA1 0.2056128 0.1271078 7.199447 1.012854 0.37510055 0.37510055
2 RNA2 0.4726444 0.3545174 6.329842 3.607216 0.03928066 0.07856131
>
> #Again, F and p-values the same but not coefficients
>
> sessionInfo()
R version 2.12.0 (2010-10-15)
Platform: i386-pc-mingw32/i386 (32-bit)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United
States.1252
[3] LC_MONETARY=English_United States.1252
LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] limma_3.6.9
-------------- next part --------------
Sample,Group,Pop,Trt,RNA1,RNA2
1,BMAc,BMA,c,7.023854419,6.153885799
2,BMAc,BMA,c,7.73999053,5.866102667
3,BMAc,BMA,c,7.564131598,6.63833632
4,BMAc,BMA,c,7.27077757,6.594744469
5,BMAe,BMA,e,6.810056104,6.533012004
6,BMAe,BMA,e,6.205452707,5.442167224
7,BMAe,BMA,e,5.676582602,6.026146898
8,BMAe,BMA,e,6.810335264,5.74820562
9,BVOc,BVO,c,6.810056104,5.816310567
10,BVOc,BVO,c,7.516394012,6.75350421
11,BVOc,BVO,c,8.569809392,6.216047464
12,BVOc,BVO,c,7.866805967,5.78187105
13,BVOe,BVO,e,6.61723119,5.788481301
14,BVOe,BVO,e,7.699424633,6.10684267
15,BVOe,BVO,e,7.114597775,7.175774448
16,BVOe,BVO,e,6.4915649,5.883128316
17,BWFc,BWF,c,7.625755429,5.903757463
18,BWFc,BWF,c,7.84666745,5.939844249
19,BWFc,BWF,c,6.629415683,5.928576696
20,BWFc,BWF,c,7.801253152,5.894262583
21,BWFe,BWF,e,7.12819398,5.84216429
22,BWFe,BWF,e,7.217060964,7.053346408
23,BWFe,BWF,e,7.861896997,7.055525414
24,BWFe,BWF,e,6.889417139,9.774177283
25,CARc,CAR,c,8.118040937,5.918385378
26,CARc,CAR,c,6.178935127,5.856860101
27,CARc,CAR,c,7.456442481,6.035642691
28,CARc,CAR,c,7.538999007,5.827788393
29,CARe,CAR,e,6.980979291,6.556491077
30,CARe,CAR,e,7.614278284,5.875536372
31,CARe,CAR,e,5.898541472,7.787748677
32,CARe,CAR,e,5.947753112,6.914974545
More information about the Bioconductor
mailing list