[R] Mann-Whitney by group

David L Carlson dcarlson at tamu.edu
Sat Jul 7 23:28:22 CEST 2012


This works. First we assign the results of dput() to a variable
So we can use it. Then we eliminate the groups we don't need.
Third we remake the factors to eliminate the groups and genes
that do not appear in the data subset. Finally, compute the tests.


Dta <- structure(list(Gene = structure(c(1L, 12L, 19L, 20L, 21L,
     ....................lines omitted..........................
"Group", "A", "B", "C", "D", "E", "F", "G", "H"), row.names = c(NA, 
25L), class = "data.frame")

# Pull out just groups 5 and 6
Dtb <- Dta[Dta$Group %in% c(5, 6), ]

# Check the resulting data frame - 8 observations, 
# four in each group, all measurements in B are 1
Dtb

# Eliminate factor levels that do not exist in the reduced
# data set
Dtb$Gene <- factor(Dtb$Gene) 
Dtb$Group <- factor(Dtb$Group)

# Mann-Whitney is the same as Wilcoxon Rank Sum test (see manual page)
?wilcox.test

# Compute test for A
wilcox.test(A~Group, Dtb)

# Compute all the tests
apply(Dtb[,3:10], 2, function(x) wilcox.test(x~Dtb$Group))

# Error relates to column B which is constant

----------------------------------------------
David L Carlson
Associate Professor of Anthropology
Texas A&M University
College Station, TX 77843-4352


> -----Original Message-----
> From: r-help-bounces at r-project.org [mailto:r-help-bounces at r-
> project.org] On Behalf Of Oxenstierna
> Sent: Friday, July 06, 2012 3:34 PM
> To: r-help at r-project.org
> Subject: Re: [R] Mann-Whitney by group
> 
> Hi David,
> 
> Thank you for the insight:  I could have sworn I added a picture of the
> data, but providing the actual data is worlds easier to deal with, I'm
> sure.
> I've never used dput(), so I entered it using the dataframe in question
> as
> the object, and I've pasted the results below.
> 
> Essentially, I would like to run the two-sample independent test,
> comparing
> Group 5 (CD8.14 through CD8.17) to Group 6 (CD8.18 through CD8.21). So,
> for
> A, test Group 5 agains Group 6, for B, test Group 5 against Group 6,
> and so
> on.  I'm not going to muddy the waters by telling you what I've tried;
> suffice it to say that I'm looking for insights into how to structure R
> commands to compare groups of data of this format.
> 
> Many thanks in advance,
> 
> David
> 
> 
> structure(list(Gene = structure(c(1L, 12L, 19L, 20L, 21L, 22L,
> 23L, 24L, 25L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
> 14L, 15L, 16L, 17L, 18L), .Label = c("CD8.1", "CD8.10", "CD8.11",
> "CD8.12", "CD8.13", "CD8.14", "CD8.15", "CD8.16", "CD8.17", "CD8.18",
> "CD8.19", "CD8.2", "CD8.20", "CD8.21", "CD8.22", "CD8.23", "CD8.24",
> "CD8.25", "CD8.3", "CD8.4", "CD8.5", "CD8.6", "CD8.7", "CD8.8",
> "CD8.9"), class = "factor"), Group = structure(c(8L, 8L, 8L,
> 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L,
> 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L), .Label = c("Fabbf Ova
> CD40",
> "Fabbf Ova MHC2", "Fabbf Ova WT", "Fabbf WT", "Naïve CD40", "Naïve MHC
> II",
> "Naïve WT", "1", "2", "3", "4", "5", "6", "7"), class = "factor"),
>     A = c(19.4701946749544, 0.679440926463348, 0.69035683372563,
>     0.347105466158261, 0.435480792190284, 0.338699910286907,
>     0.651378057031152, 0.707065053752258, 0.685244609506316,
>     0.816673858871597, 0.597009097584509, 0.592331304482431,
>     0.709359033358704, 0.628406759227531, 0.78158729467231,
> 1.0422377526669,
>     0.61560003251142, 0.463755016733183, 0.419700860701392,
> 0.380946898502731,
>     0.41156961153081, 0.471790683365624, 0.552773224145722,
> 0.466787799928649,
>     0.767624372542755), B = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), C = c(0.914649979331863,
>     1.443086801592, 0.928280641141244, 0.467498974059775,
> 0.668742025741347,
>     0.568417298005388, 0.778907650835673, 0.778765181169635,
>     1.01103488277517, 0.674133483128923, 0.830400022230133,
> 0.652687178870627,
>     0.746893950266518, 0.765498308522646, 1.01273201749333,
> 1.00309616129672,
>     0.63015975419947, 0.615905247119739, 0.615687625199691,
> 0.503136087800137,
>     0.566164026974035, 0.631519467967541, 0.822126705285366,
>     0.543873075815645, 1.00596108625425), D = c(13.3272657341526,
>     3.08914950309865, 1.71836820240434, 0.723301573710509,
> 1.21014411624732,
>     1.92899377364865, 1.80280408189187, 2.25057819266424,
> 2.23876060313374,
>     1.30849425313072, 1.58782967140617, 1.19199809794126,
> 1.64151140806787,
>     0.241017500596534, 0.364896032519483, 0.322953808735804,
>     0.2052110581509, 0.927601295331376, 0.808910781520832,
> 0.538033121081646,
>     0.655348783504307, 0.564449549672088, 0.521729926793001,
>     0.414305517285192, 0.507084483980948), E = c(56.2830291897158,
>     9.76091939190267, 4.80922410182105, 17.0056576949022,
> 20.851046177766,
>     17.9057247086369, 5.93332779160845, 4.73058157592946,
> 5.59155211460608,
>     9.67484467290805, 5.92374864612388, 7.12393623733123,
> 5.33576126730867,
>     10.3943422629275, 10.8732527705049, 12.4861085370674,
> 12.0918705721064,
>     13.3210661695018, 10.9410344557684, 15.1298307761675,
> 13.0708078246191,
>     9.4445293976312, 6.94340249514349, 5.07888688780375,
> 8.33846787814466
>     ), F = c(15.0459568981729, 21.6362955612539, 9.66673955488981,
>     27.2276698483913, 18.1090094072926, 20.0952712980862,
> 24.9249499974856,
>     23.5540183530194, 29.6638363657906, 28.9779309040733,
> 42.0402820641407,
>     33.8068160394092, 51.7299064374737, 37.8306751403421,
> 43.1955470199259,
>     45.5125262939585, 40.3109474523637, 23.6341894633273,
> 23.9721353180788,
>     20.4920649252818, 24.8898447627354, 34.2686409607416,
> 31.3815198841165,
>     31.4947528368753, 43.2686436885025), G = c(15.9703031418086,
>     5.30495997585743, 3.07594974529074, 8.28703732907722,
> 10.9437825143868,
>     6.91196232523896, 2.97808148581742, 2.6386825521864,
> 2.2415006913088,
>     5.00747306438661, 2.65238188782831, 3.18277515130905,
> 3.14638620532385,
>     2.5149505923191, 2.48862112414046, 2.97170069886913,
> 1.91643165326171,
>     7.50682774199005, 5.39102206185423, 5.2498453524987,
> 7.17519969844757,
>     2.66448841457179, 2.78444235996995, 1.62286520735228,
> 2.48760726398266
>     ), H = c(-1, -1, -1, 0.0036561124481055, 0.010723007432761,
>     0.0196616746380801, 0.0371046164124276, -1, 3.27378673314144e-05,
>     -1, -1, -1, -1, 0.00816215783906802, 21.5424904701651,
> 9.57900616157724,
>     0.00735942043489242, 1.50346040901698, 0.0909450037365435,
>     1.49237001404701e-05, 0.000775741472561218, -1, 17.9023582944659,
>     0.0176891314806093, 33.4326253626981)), .Names = c("Gene",
> "Group", "A", "B", "C", "D", "E", "F", "G", "H"), row.names = c(NA,
> 25L), class = "data.frame")
> 
> --
> View this message in context: http://r.789695.n4.nabble.com/Mann-
> Whitney-by-group-tp4635618p4635667.html
> Sent from the R help mailing list archive at Nabble.com.
> 
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-
> guide.html
> and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list