[R] average and median values for each of the class

arun smartpink111 at yahoo.com
Mon Apr 28 12:54:27 CEST 2014



Hi,
I noticed that if ?mean" or ?median in fun1 is changed to other functions, for e.g. ?sum, it will show error message.
##Using shortened version that runs
library(plyr)
fun1 <- function(data, .group) {
    f1 <- function(x) c(x, mean(x, na.rm = TRUE), sum(x, na.rm = TRUE))
    res <- ddply(data, .group, sapply, FUN = f1)
    res
}
fun1(dat,"class")##check the value of class column. This creates error in the full version.

To be a bit more general, you can try this:
fun2 <- function(data, .group, funcVec) {
    data <- data[order(data[, .group]), ]
    f1 <- function(x) c(x, eval(parse(text = paste0("c(", paste(paste0(funcVec, "(", 
        "x,", "na.rm=TRUE", ")"), collapse = ","), ")"))))
    res <- ddply(data, .group, sapply, FUN = f1)[, -1]
    indx <- table(factor(data[, .group], levels = unique(data[, .group]))) + length(funcVec)
    res <- cbind(class = as.numeric(rep(names(indx), indx)), res)
    indxN <- as.numeric(sort(rep(cumsum(indx), length(funcVec)) - rep((seq(funcVec) - 
        1), each = length(indx))))
    UniqGroup <- unique(data[, .group])
    rownames(res)[indxN] <- paste0(rep(gsub("[.]", "", toupper(abbreviate(funcVec, 
        min = 4))), length(UniqGroup)), rep(UniqGroup, each = length(funcVec)))
    rownames(res)[-indxN] <- rownames(data)
    res
}

vec1 <- c("mean", "median", "sd", "sum")
vec2 <- "mean"
vec3 <- c("mean", "median", "min", "max", "sd")
vec4 <- c("mean", "median", "min", "max", "sd", "sum", "var")
library(plotrix)  ## for ?std.error
vec5 <- c("mean", "median", "min", "max", "var", "sd", "std.error", "prod")
library(psych)  ### for ?skew,?kurtosi
vec6 <- c("mean", "median", "min", "max", "var", "sd", "std.error", "prod", "skew", 
    "kurtosi")
fun2(dat, "class", vec1)
fun2(dat, "class", vec2)
fun2(dat, "class", vec3)
fun2(dat, "class", vec4)
fun2(dat, "class", vec5)
fun2(dat, "class", vec6)

#or running all the above in a loop
 lapply(paste0("vec",1:6),function(x) fun2(dat,"class",get(x)))
A.K.


On Sunday, April 27, 2014 7:11 AM, arun <smartpink111 at yahoo.com> wrote:

Hi,
You could also try:
library(plyr)


fun1 <- function(data, .group) {
    f1 <- function(x) c(x, mean(x, na.rm = TRUE), median(x, na.rm = TRUE))
    res <- ddply(data, .group, sapply, FUN = f1)
    vec1 <- as.vector(table(res[, .group]))
    indx <- sort(c(cumsum(vec1) - 1, cumsum(vec1)))
    UniqGroup <- unique(data[, .group])
    rownames(res)[indx] <- paste0(rep(c("Avg", "Med"), length(UniqGroup)), rep(UniqGroup, 
        each = 2))
    rownames(res)[-indx] <- rownames(data)
    res
}
fun1(dat,"class")
all.equal(res2,fun1(dat,"class"))
#[1] TRUE



A.K.





On Saturday, April 26, 2014 9:14 PM, arun <smartpink111 at yahoo.com> wrote:


Hi,
Your dput() suggests dat as data.frame.
##Using the results you got,

res2 <- do.call(rbind,lapply(unique(dat$class),function(i) {x1 <-rbind(dat[dat$class==i,], avg[avg$class==i,], med[med$class==i,]); rownames(x1)[!grepl("ara",rownames(x1))] <- paste0(c("Avg", "Med"), i); x1}))


A.K.




On Saturday, April 26, 2014 8:39 PM, Nico Met <nicomet80 at gmail.com> wrote:
Dear all,



I have a matrix (dimension, 16 x 12) where  2nd column represents class
(1,1,1,1,1,2,2,2, etc) information. I want to estimate average  and median
values for each of the class and add this information as a row at end of
the each classes.


for example:

dput(dat)

structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,

3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762,

2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729,

1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563,

2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278,

1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118,

0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856,

1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088

), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135,

0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976,

0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102,

0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426,

2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324,

1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493,

2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295,

1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389,

1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025,

1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452

), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008,

0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016,

0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255,

0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899,

1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587,

1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854,

1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997,

0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636,

0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291,

0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471

), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252,

2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155,

1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052,

1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422,

0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542,

0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891,

0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619,

0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073,

0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411,

0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632

)), .Names = c("class", "name1", "name2", "name3", "name4", "name5",

"name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame",
row.names = c("ara1",

"ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9",

"ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16"

))


I wrote this:



avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"],
function(x) mean(x,na.rm=T)) )


med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x)
median(x,na.rm=T)) )


# avg

#  class    name1     name2     name3    name4    name5     name6    name7
    name#8    name9    name10    name11

#1     1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922
#0.2741547 2.376609 0.7154955 0.3654845

#2     2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665
#0.2983373 1.908645 0.5731394 0.3566621

#3     3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971
#0.2935527 2.118543 0.6916275 0.3076734

#4     4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031
#0.2524853 1.941667 0.6283592 0.3592155

#5     5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
#0.1856815 1.807698 0.3916619 0.7229726

#> med

#  class    name1     name2     name3    name4    name5     name6    name7
    name#8    name9    name10    name11

#1     1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096
#0.2754850 2.289048 0.7230254 0.3637169

#2     2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908
#0.2972432 1.852571 0.5252870 0.3958789

#3     3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798
#0.2937263 2.002215 0.6442313 0.2976664

#4     4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735
#0.2042306 1.932144 0.6002406 0.3081887

#5     5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
#0.1856815 1.807698 0.3916619 0.7229726




But I do not know how can I add this information in the original data?


For example, for class 1, the output will look like this:

dput(res1)

structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 =
c(2.554923977,

2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378,

2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114,

1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798,

0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525,

0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548,

2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295,

1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049,

2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748,

0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133,

1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583,

1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245,

0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627,

2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733,

2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925,

0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619,

0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455,

0.363716904)), .Names = c("class", "name1", "name2", "name3",

"name4", "name5", "name6", "name7", "name8", "name9", "name10",

"name11"), class = "data.frame", row.names = c("ara1", "ara2",

"ara3", "ara4", "ara5", "Avg", "Med"))



And same will be for other classes.


Thanks a lot !!!!


Nico

    [[alternative HTML version deleted]]

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list