[R] average and median values for each of the class
arun
smartpink111 at yahoo.com
Mon Apr 28 12:54:27 CEST 2014
Hi,
I noticed that if ?mean" or ?median in fun1 is changed to other functions, for e.g. ?sum, it will show error message.
##Using shortened version that runs
library(plyr)
fun1 <- function(data, .group) {
f1 <- function(x) c(x, mean(x, na.rm = TRUE), sum(x, na.rm = TRUE))
res <- ddply(data, .group, sapply, FUN = f1)
res
}
fun1(dat,"class")##check the value of class column. This creates error in the full version.
To be a bit more general, you can try this:
fun2 <- function(data, .group, funcVec) {
data <- data[order(data[, .group]), ]
f1 <- function(x) c(x, eval(parse(text = paste0("c(", paste(paste0(funcVec, "(",
"x,", "na.rm=TRUE", ")"), collapse = ","), ")"))))
res <- ddply(data, .group, sapply, FUN = f1)[, -1]
indx <- table(factor(data[, .group], levels = unique(data[, .group]))) + length(funcVec)
res <- cbind(class = as.numeric(rep(names(indx), indx)), res)
indxN <- as.numeric(sort(rep(cumsum(indx), length(funcVec)) - rep((seq(funcVec) -
1), each = length(indx))))
UniqGroup <- unique(data[, .group])
rownames(res)[indxN] <- paste0(rep(gsub("[.]", "", toupper(abbreviate(funcVec,
min = 4))), length(UniqGroup)), rep(UniqGroup, each = length(funcVec)))
rownames(res)[-indxN] <- rownames(data)
res
}
vec1 <- c("mean", "median", "sd", "sum")
vec2 <- "mean"
vec3 <- c("mean", "median", "min", "max", "sd")
vec4 <- c("mean", "median", "min", "max", "sd", "sum", "var")
library(plotrix) ## for ?std.error
vec5 <- c("mean", "median", "min", "max", "var", "sd", "std.error", "prod")
library(psych) ### for ?skew,?kurtosi
vec6 <- c("mean", "median", "min", "max", "var", "sd", "std.error", "prod", "skew",
"kurtosi")
fun2(dat, "class", vec1)
fun2(dat, "class", vec2)
fun2(dat, "class", vec3)
fun2(dat, "class", vec4)
fun2(dat, "class", vec5)
fun2(dat, "class", vec6)
#or running all the above in a loop
lapply(paste0("vec",1:6),function(x) fun2(dat,"class",get(x)))
A.K.
On Sunday, April 27, 2014 7:11 AM, arun <smartpink111 at yahoo.com> wrote:
Hi,
You could also try:
library(plyr)
fun1 <- function(data, .group) {
f1 <- function(x) c(x, mean(x, na.rm = TRUE), median(x, na.rm = TRUE))
res <- ddply(data, .group, sapply, FUN = f1)
vec1 <- as.vector(table(res[, .group]))
indx <- sort(c(cumsum(vec1) - 1, cumsum(vec1)))
UniqGroup <- unique(data[, .group])
rownames(res)[indx] <- paste0(rep(c("Avg", "Med"), length(UniqGroup)), rep(UniqGroup,
each = 2))
rownames(res)[-indx] <- rownames(data)
res
}
fun1(dat,"class")
all.equal(res2,fun1(dat,"class"))
#[1] TRUE
A.K.
On Saturday, April 26, 2014 9:14 PM, arun <smartpink111 at yahoo.com> wrote:
Hi,
Your dput() suggests dat as data.frame.
##Using the results you got,
res2 <- do.call(rbind,lapply(unique(dat$class),function(i) {x1 <-rbind(dat[dat$class==i,], avg[avg$class==i,], med[med$class==i,]); rownames(x1)[!grepl("ara",rownames(x1))] <- paste0(c("Avg", "Med"), i); x1}))
A.K.
On Saturday, April 26, 2014 8:39 PM, Nico Met <nicomet80 at gmail.com> wrote:
Dear all,
I have a matrix (dimension, 16 x 12) where 2nd column represents class
(1,1,1,1,1,2,2,2, etc) information. I want to estimate average and median
values for each of the class and add this information as a row at end of
the each classes.
for example:
dput(dat)
structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762,
2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729,
1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563,
2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278,
1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118,
0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856,
1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088
), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135,
0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976,
0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102,
0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426,
2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324,
1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493,
2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295,
1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389,
1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025,
1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452
), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008,
0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016,
0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255,
0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899,
1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587,
1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854,
1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997,
0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636,
0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291,
0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471
), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252,
2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155,
1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052,
1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422,
0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542,
0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891,
0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619,
0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073,
0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411,
0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632
)), .Names = c("class", "name1", "name2", "name3", "name4", "name5",
"name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame",
row.names = c("ara1",
"ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9",
"ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16"
))
I wrote this:
avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"],
function(x) mean(x,na.rm=T)) )
med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x)
median(x,na.rm=T)) )
# avg
# class name1 name2 name3 name4 name5 name6 name7
name#8 name9 name10 name11
#1 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922
#0.2741547 2.376609 0.7154955 0.3654845
#2 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665
#0.2983373 1.908645 0.5731394 0.3566621
#3 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971
#0.2935527 2.118543 0.6916275 0.3076734
#4 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031
#0.2524853 1.941667 0.6283592 0.3592155
#5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
#0.1856815 1.807698 0.3916619 0.7229726
#> med
# class name1 name2 name3 name4 name5 name6 name7
name#8 name9 name10 name11
#1 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096
#0.2754850 2.289048 0.7230254 0.3637169
#2 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908
#0.2972432 1.852571 0.5252870 0.3958789
#3 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798
#0.2937263 2.002215 0.6442313 0.2976664
#4 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735
#0.2042306 1.932144 0.6002406 0.3081887
#5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
#0.1856815 1.807698 0.3916619 0.7229726
But I do not know how can I add this information in the original data?
For example, for class 1, the output will look like this:
dput(res1)
structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 =
c(2.554923977,
2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378,
2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114,
1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798,
0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525,
0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548,
2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295,
1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049,
2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748,
0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133,
1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583,
1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245,
0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627,
2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733,
2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925,
0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619,
0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455,
0.363716904)), .Names = c("class", "name1", "name2", "name3",
"name4", "name5", "name6", "name7", "name8", "name9", "name10",
"name11"), class = "data.frame", row.names = c("ara1", "ara2",
"ara3", "ara4", "ara5", "Avg", "Med"))
And same will be for other classes.
Thanks a lot !!!!
Nico
[[alternative HTML version deleted]]
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
More information about the R-help
mailing list