[R] aggregate syntax for grouped column means

Juliet Hannah juliet.hannah at gmail.com
Tue Nov 29 21:21:49 CET 2011


I am calculating the mean of each column grouped by the variable 'id'.
I do this using aggregate, data.table, and plyr. My aggregate results
do not match the other two, and I am trying to figure out what is
incorrect with my syntax. Any suggestions? Thanks.

Here is the data.

myData <- structure(list(var1 = c(31.59, 32.21, 31.78, 31.34, 31.61, 31.61,
30.59, 30.84, 30.98, 30.79, 30.79, 30.94, 31.08, 31.27, 31.11,
30.42, 30.37, 30.29, 30.06, 30.3, 30.43, 30.61, 30.64, 30.75,
30.39, 30.1, 30.25, 31.55, 31.96, 31.87, 30.29, 30.15, 30.37,
29.59, 29.52, 28.96, 29.69, 29.58, 29.52, 30.21, 30.3, 30.25,
30.23, 30.29, 30.39), var2 = c(33.78, 33.25, NA, 32.05, 32.59,
NA, 32.24, NA, NA, 32.15, 32.39, NA, 32.4, 31.6, NA, 30.5, 30.66,
NA, 30.6, 29.95, NA, 31.24, 30.73, NA, 30.51, 30.43, 31.17, 31.44,
31.17, 31.18, 31.01, 30.98, 31.25, 30.44, 30.47, NA, 30.47, 30.56,
NA, 30.6, 30.57, NA, 31, 30.8, NA), id = c("0m4", "0m4", "0m4",
"0m5", "0m5", "0m5", "0m6", "0m6", "0m6", "0m11", "0m11", "0m11",
"0m12", "0m12", "0m12", "205m1", "205m1", "205m1", "205m4", "205m4",
"205m4", "205m5", "205m5", "205m5", "205m6", "205m6", "205m6",
"205m7", "205m7", "205m7", "600m1", "600m1", "600m1", "600m3",
"600m3", "600m3", "600m4", "600m4", "600m4", "600m5", "600m5",
"600m5", "600m7", "600m7", "600m7")), .Names = c("var1", "var2",
"id"), row.names = c(NA, -45L), class = "data.frame")

> head(myData)
   var1  var2  id
1 31.59 33.78 0m4
2 32.21 33.25 0m4
3 31.78    NA 0m4
4 31.34 32.05 0m5
5 31.61 32.59 0m5
6 31.61    NA 0m5



results1 <- aggregate(. ~  id ,data=myData,FUN=mean,na.rm=T)
 head(results1,1)
#    id  var1  var2
# 1 0m11 30.79 32.27

library(data.table)
mydt <- data.table(myData)
setkey(mydt,id)
results2 <- mydt[,lapply(.SD,mean,na.rm=TRUE),by=id]
 head(results2,1)
#       id  var1  var2
# [1,] 0m11 30.84 32.27

library(plyr)
results3 <- ddply(myData,.(id),colwise(mean),na.rm=TRUE)
 head(results3,1)
#    id  var1  var2
# 1 0m11 30.84 32.27

> sessionInfo()
R version 2.14.0 (2011-10-31)
Platform: i386-pc-mingw32/i386 (32-bit)

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United
States.1252    LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] plyr_1.6         data.table_1.7.3



More information about the R-help mailing list