[R] Aggregate data frame across columns

arun smartpink111 at yahoo.com
Thu Nov 8 06:21:57 CET 2012


HI,

For the quarterly, this may help:
names(mySubset)[c(5,9)]<-c("X200509","X200706") #changed the column names to test as the subset included only first quarter data.
library(reshape)
dat2<-melt(mySubset)
dat2$variable<-gsub("[X]","",dat2$variable)
dat2$variable2<-gsub("(\\d{4}).*","\\1",dat2$variable)
dat2$yearqtr<-unlist(lapply(strsplit(as.character(as.yearqtr(dat2$variable,"%Y%m"))," "),function(x) paste(x,collapse="")))
res<- do.call(cbind,lapply(lapply(split(dat2,dat2$variable2),function(x) cbind(x[4],x[2])),function(x) do.call(cbind,lapply(split(x,x[1]),function(x) rowSums(matrix(x[,2],nrow=nrow(mySubset)),na.rm=TRUE)))))


head(res)
#         2004Q1     2005Q1     2005Q3     2006Q1     2007Q1      2007Q2
#[1,] 0.01599249 0.01754385 0.01386272 0.02634211 0.01165344 0.007609971
#[2,] 0.09146019 0.10903762 0.05479472 0.18906003 0.04446204 0.037655652
#[3,] 0.13832175 0.17358134 0.09179057 0.30786711 0.06806706 0.057828387
#[4,] 0.15606378 0.21148570 0.11452873 0.36147798 0.08463080 0.081474368
#[5,] 0.16840338 0.24968641 0.11854816 0.42124402 0.10600109 0.087370027
#[6,] 0.17196576 0.25992668 0.14820905 0.44368375 0.09780113 0.094327534
A.K.






----- Original Message -----
From: Keith Weintraub <kw1958 at gmail.com>
To: r-help at r-project.org
Cc: 
Sent: Wednesday, November 7, 2012 6:14 PM
Subject: Re: [R] Aggregate data frame across columns

Arun, Jeff, Bert,
   Thanks for your help.

I have put a subset of my data below in mySubset.

I would like to be able to sum the rows by year. In this case the results would be the result data.frame below.

How can I automate something like this and how would I do it quarterly if necessary.

The data has 9 columns with headers:
   "X200401" "X200402" "X200501" "X200502" "X200503" "X200601" "X200602" "X200701" "X200702"

Here is the code I used to create the result: 
  result<-data.frame(rowSums(mySubset[,1:2], na.rm = TRUE), rowSums(mySubset[,3:5], na.rm = TRUE), rowSums(mySubset[,6:7], na.rm = TRUE), rowSums(mySubset[,8:9], na.rm = TRUE))

Note that my full dataset goes from 200401 through 201208.

I assume there is a way to use one of the apply functions and an inline function to call rowSums with na.rm = TRUE.

Thanks again for your time,
KW

______________________________
Two datasets below: result and mySubset.

result<-structure(list(X2004 = c(0.0159924882870401, 0.0914601927232432, 
0.138321748009262, 0.156063783591084, 0.168403383789346, 0.171965759793573, 
0.177147721902362, 0.187522847481161, 0.166666541728156, 0.127352907374406, 
0.156908213362621, 0.175500673945803, 0.17516598558791, 0.176671361535308, 
0.174478461658455, 0.157756648535001, 0.180489661678831, 0.189127686535455, 
0.176267288362896, 0.167844722339248, 0.180507725071878, 0.169459551401114, 
0.165939970730443, 0.165709877723436, 0.17229145356651, 0.182795171134028, 
0.166283818929029, 0.15294456192766, 0.166780783496174, 0.181927809974243, 
0.177579619132214, 0.171811823922994, 0.158385247734671, 0.149479196737791, 
0.162477792074099, 0.150845832508427, 0.155104452310268, 0.162456727168325, 
0.155482804148341, 0.138967760361165, 0.146530081719247, 0.157417284793283, 
0.15859793000523, 0.14774834433617, 0.147320895948278, 0.14926677799197, 
0.14171723173142, 0.136909644046266, 0.0683144142254727, 0), 
    X2005 = c(0.0314065680219376, 0.163832345277566, 0.265371909518265, 
    0.326014428812549, 0.368234576027844, 0.408135729854325, 
    0.406609083944697, 0.416411672384248, 0.383445330771284, 
    0.303908689700078, 0.279111432968148, 0.299088365053801, 
    0.297231620329575, 0.301554936855911, 0.286267190479442, 
    0.285101593132908, 0.303617540120288, 0.30912628079129, 0.306740852364357, 
    0.288318399163201, 0.290849771380406, 0.284963182810939, 
    0.276913327940756, 0.281228989779383, 0.281587206458797, 
    0.282575344784775, 0.272750026504263, 0.275012113477194, 
    0.275308072336096, 0.2830402877904, 0.278126404864579, 0.270620664127706, 
    0.266703443412622, 0.269170883813461, 0.267706882000802, 
    0.16400030580057, 0.081430630811921, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), X2006 = c(0.0263421114412355, 0.189060034450041, 
    0.307867112279838, 0.36147798027164, 0.421244020838752, 0.443683746556757, 
    0.431455767826625, 0.42139989322102, 0.405361335329633, 0.410239754621491, 
    0.295699686523215, 0.286907309706468, 0.279272095208596, 
    0.30584849030251, 0.292930451848863, 0.2858086693289, 0.277977449378127, 
    0.312603778277765, 0.285799481868903, 0.272257225207343, 
    0.264193812603326, 0.260225982968127, 0.26286016640048, 0.246455494757128, 
    0.132509032599313, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X2007 = c(0.0192634102835919, 
    0.0821176964052597, 0.125895446649419, 0.166105172058653, 
    0.193371112440694, 0.192128664695495, 0.205402741722323, 
    0.214804686886319, 0.206944760826543, 0.201633705240255, 
    0.224370691628553, 0.213780729123737, 0.116800798312322, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("X2004", 
"X2005", "X2006", "X2007"), row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8", "9", "10", "65", "66", "67", "68", "69", 
"70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", 
"81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91", 
"92", "93", "94", "95", "96", "97", "98", "99", "100", "101", 
"102", "103", "104"), class = "data.frame")

mySubset<-structure(list(X200401 = c(0.00989521347529282, 0.0373090976259132, 
0.0589205312533588, 0.073423408836654, 0.0684862018994913, 0.0709590032723634, 
0.0659646021314206, 0.066731059852463, 0.0699763372273688, 0.0541142327051023, 
0.0747822118456574, 0.0796152619493638, 0.0898395772710088, 0.0799174340369894, 
0.0858618669044432, 0.0771658556270448, 0.0737537341871075, 0.0920394845122411, 
0.0919450414276911, 0.0766003513927418, 0.0853798302884387, 0.0906216842117783, 
0.0770655693805709, 0.0804880021458966, 0.0793736980924377, 0.0867350210905336, 
0.0844120272331623, 0.0719191481914751, 0.071331576585233, 0.0814113751491231, 
0.089572460115355, 0.078571913014948, 0.080089664942495, 0.0660045583281544, 
0.0757219707900531, 0.0726744688001806, 0.0696490161975653, 0.0771202956076235, 
0.0762013529234614, 0.0699300362070559, 0.0671216348093859, 0.0733447602438439, 
0.0802859940712496, 0.0725947875424977, 0.0720665984909216, 0.0698008151103992, 
0.0721384534894301, 0.063005095359257, 0.0683144142254727, NA
), X200402 = c(0.00609727481174727, 0.05415109509733, 0.0794012167559036, 
0.0826403747544301, 0.0999171818898546, 0.101006756521209, 0.111183119770942, 
0.120791787628698, 0.0966902045007876, 0.0732386746693035, 0.0821260015169637, 
0.0958854119964396, 0.0853264083169008, 0.0967539274983187, 0.0886165947540122, 
0.0805907929079561, 0.106735927491724, 0.0970882020232139, 0.0843222469352052, 
0.0912443709465066, 0.0951278947834392, 0.0788378671893359, 0.0888744013498726, 
0.0852218755775392, 0.0929177554740725, 0.0960601500434943, 0.0818717916958666, 
0.0810254137361847, 0.0954492069109415, 0.10051643482512, 0.0880071590168587, 
0.0932399109080464, 0.0782955827921764, 0.0834746384096371, 0.0867558212840463, 
0.078171363708246, 0.0854554361127032, 0.0853364315607014, 0.0792814512248797, 
0.0690377241541093, 0.0794084469098615, 0.084072524549439, 0.07831193593398, 
0.0751535567936721, 0.0752542974573565, 0.0794659628815707, 0.0695787782419903, 
0.0739045486870093, NA, NA), X200501 = c(0.00854080602823615, 
0.051364699994323, 0.08995539446396, 0.101717898920196, 0.118105922434386, 
0.135053927542703, 0.123127145513553, 0.13818362186389, 0.12917218785641, 
0.0983075389035003, 0.085624316432386, 0.0935282521645047, 0.0901991846639811, 
0.0993615010575896, 0.0992432000057031, 0.0868055460512466, 0.0849252866442858, 
0.0979102576281754, 0.105025596288152, 0.0922107375508941, 0.0998926273383806, 
0.0856774386841815, 0.0917163152031878, 0.0943033444118917, 0.0843853695036644, 
0.0887132791797814, 0.0906275671078504, 0.0863714298888374, 0.0776806054638799, 
0.0867678329677293, 0.0921845225495963, 0.084689327906438, 0.0825709543895964, 
0.0810031343066813, 0.0886852852447284, 0.077263574821099, 0.081430630811921, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X200502 = c(0.00900303968538731, 
0.0576729239379193, 0.0836259423521094, 0.109767801817612, 0.131580490198357, 
0.124872752392476, 0.141559955244379, 0.141158846480108, 0.128389110874103, 
0.0948122840229878, 0.0937750150831618, 0.0908510155889424, 0.0957285788019051, 
0.104504993357531, 0.0899944408457642, 0.086320781759903, 0.101031654546572, 
0.105342921723405, 0.0916835869354207, 0.100095762407223, 0.086099935991629, 
0.0914324544832966, 0.0927604232438684, 0.0843104393362966, 0.0932276110143389, 
0.0929390287918012, 0.089818903276638, 0.0858561637868873, 0.0888599236444742, 
0.0976175751572295, 0.0893681783753294, 0.089929018713794, 0.0866727155079737, 
0.0978000766866048, 0.085510546897911, 0.0867367309794715, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X200503 = c(0.0138627223083141, 
0.0547947213453243, 0.0917905727021959, 0.114528728074742, 0.118548163395101, 
0.148209049919146, 0.141921983186765, 0.137069204040251, 0.125884032040771, 
0.110788866773589, 0.0997121014525997, 0.114709097300354, 0.111303856863689, 
0.0976884424407909, 0.0970295496279745, 0.111975265321758, 0.11766059892943, 
0.10587310143971, 0.110031669140785, 0.096011899205084, 0.104857208050396, 
0.107853289643461, 0.0924365894936995, 0.102615206031195, 0.103974225940794, 
0.100923036813193, 0.0923035561197745, 0.10278451980147, 0.108767543227742, 
0.0986548796654409, 0.0965737039396537, 0.0960023175074738, 0.0974597735150518, 
0.0903676728201745, 0.0935110498581629, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), X200601 = c(0.0119631311150115, 
0.0841516805661423, 0.169418128881251, 0.180814158472457, 0.209453754648532, 
0.235012787890493, 0.216047206556313, 0.230628270480073, 0.206476563508043, 
0.209598857907476, 0.141434330368452, 0.157674725994462, 0.141750823371061, 
0.161725812068915, 0.153281932920387, 0.155579420440345, 0.142278309240007, 
0.159200313451883, 0.158293827115383, 0.149958204596761, 0.146979157902604, 
0.130807704685419, 0.142181637810765, 0.125674995994581, 0.132509032599313, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), X200602 = c(0.014378980326224, 
0.104908353883899, 0.138448983398587, 0.180663821799183, 0.211790266190221, 
0.208670958666264, 0.215408561270312, 0.190771622740946, 0.19888477182159, 
0.200640896714015, 0.154265356154763, 0.129232583712007, 0.137521271837535, 
0.144122678233594, 0.139648518928477, 0.130229248888555, 0.13569914013812, 
0.153403464825881, 0.127505654753521, 0.122299020610582, 0.117214654700722, 
0.129418278282708, 0.120678528589714, 0.120780498762547, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), X200701 = c(0.0116534395879406, 
0.0444620442788004, 0.0680670596774239, 0.0846308039108346, 0.106001085230692, 
0.0978011302123916, 0.0991769478222925, 0.11822840200494, 0.107317327863878, 
0.107666023314816, 0.123691926171544, 0.108097563158762, 0.116800798312322, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), X200702 = c(0.0076099706956513, 0.0376556521264593, 
0.0578283869719953, 0.0814743681478181, 0.0873700272100016, 0.0943275344831038, 
0.10622579390003, 0.0965762848813791, 0.0996274329626652, 0.0939676819254394, 
0.100678765457009, 0.105683165964975, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)), .Names = c("X200401", "X200402", "X200501", "X200502", "X200503", 
"X200601", "X200602", "X200701", "X200702"), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 65L, 66L, 67L, 68L, 69L, 
70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 
83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 
96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L), class = "data.frame")


--

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.





More information about the R-help mailing list