[R] Aggregate data frame across columns

jim holtman jholtman at gmail.com
Thu Nov 8 02:29:03 CET 2012


Here is one way of doing it:

# get subset of the column names
ss <- substring(names(mySubset), 1, 5)
# create the indices of common column
colIndx <- split(seq(ncol(mySubset)), ss)
rSums <- lapply(colIndx, function(x) rowSums(mySubset[, x], na.rm = TRUE))
# create dataframe
newResult <- as.data.frame(rSums)



On Wed, Nov 7, 2012 at 6:14 PM, Keith Weintraub <kw1958 at gmail.com> wrote:
> Arun, Jeff, Bert,
>    Thanks for your help.
>
> I have put a subset of my data below in mySubset.
>
> I would like to be able to sum the rows by year. In this case the results would be the result data.frame below.
>
> How can I automate something like this and how would I do it quarterly if necessary.
>
> The data has 9 columns with headers:
>    "X200401" "X200402" "X200501" "X200502" "X200503" "X200601" "X200602" "X200701" "X200702"
>
> Here is the code I used to create the result:
>   result<-data.frame(rowSums(mySubset[,1:2], na.rm = TRUE), rowSums(mySubset[,3:5], na.rm = TRUE), rowSums(mySubset[,6:7], na.rm = TRUE), rowSums(mySubset[,8:9], na.rm = TRUE))
>
> Note that my full dataset goes from 200401 through 201208.
>
> I assume there is a way to use one of the apply functions and an inline function to call rowSums with na.rm = TRUE.
>
> Thanks again for your time,
> KW
>
> ______________________________
> Two datasets below: result and mySubset.
>
> result<-structure(list(X2004 = c(0.0159924882870401, 0.0914601927232432,
> 0.138321748009262, 0.156063783591084, 0.168403383789346, 0.171965759793573,
> 0.177147721902362, 0.187522847481161, 0.166666541728156, 0.127352907374406,
> 0.156908213362621, 0.175500673945803, 0.17516598558791, 0.176671361535308,
> 0.174478461658455, 0.157756648535001, 0.180489661678831, 0.189127686535455,
> 0.176267288362896, 0.167844722339248, 0.180507725071878, 0.169459551401114,
> 0.165939970730443, 0.165709877723436, 0.17229145356651, 0.182795171134028,
> 0.166283818929029, 0.15294456192766, 0.166780783496174, 0.181927809974243,
> 0.177579619132214, 0.171811823922994, 0.158385247734671, 0.149479196737791,
> 0.162477792074099, 0.150845832508427, 0.155104452310268, 0.162456727168325,
> 0.155482804148341, 0.138967760361165, 0.146530081719247, 0.157417284793283,
> 0.15859793000523, 0.14774834433617, 0.147320895948278, 0.14926677799197,
> 0.14171723173142, 0.136909644046266, 0.0683144142254727, 0),
>     X2005 = c(0.0314065680219376, 0.163832345277566, 0.265371909518265,
>     0.326014428812549, 0.368234576027844, 0.408135729854325,
>     0.406609083944697, 0.416411672384248, 0.383445330771284,
>     0.303908689700078, 0.279111432968148, 0.299088365053801,
>     0.297231620329575, 0.301554936855911, 0.286267190479442,
>     0.285101593132908, 0.303617540120288, 0.30912628079129, 0.306740852364357,
>     0.288318399163201, 0.290849771380406, 0.284963182810939,
>     0.276913327940756, 0.281228989779383, 0.281587206458797,
>     0.282575344784775, 0.272750026504263, 0.275012113477194,
>     0.275308072336096, 0.2830402877904, 0.278126404864579, 0.270620664127706,
>     0.266703443412622, 0.269170883813461, 0.267706882000802,
>     0.16400030580057, 0.081430630811921, 0, 0, 0, 0, 0, 0, 0,
>     0, 0, 0, 0, 0, 0), X2006 = c(0.0263421114412355, 0.189060034450041,
>     0.307867112279838, 0.36147798027164, 0.421244020838752, 0.443683746556757,
>     0.431455767826625, 0.42139989322102, 0.405361335329633, 0.410239754621491,
>     0.295699686523215, 0.286907309706468, 0.279272095208596,
>     0.30584849030251, 0.292930451848863, 0.2858086693289, 0.277977449378127,
>     0.312603778277765, 0.285799481868903, 0.272257225207343,
>     0.264193812603326, 0.260225982968127, 0.26286016640048, 0.246455494757128,
>     0.132509032599313, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
>     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X2007 = c(0.0192634102835919,
>     0.0821176964052597, 0.125895446649419, 0.166105172058653,
>     0.193371112440694, 0.192128664695495, 0.205402741722323,
>     0.214804686886319, 0.206944760826543, 0.201633705240255,
>     0.224370691628553, 0.213780729123737, 0.116800798312322,
>     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
>     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("X2004",
> "X2005", "X2006", "X2007"), row.names = c("1", "2", "3", "4",
> "5", "6", "7", "8", "9", "10", "65", "66", "67", "68", "69",
> "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80",
> "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91",
> "92", "93", "94", "95", "96", "97", "98", "99", "100", "101",
> "102", "103", "104"), class = "data.frame")
>
> mySubset<-structure(list(X200401 = c(0.00989521347529282, 0.0373090976259132,
> 0.0589205312533588, 0.073423408836654, 0.0684862018994913, 0.0709590032723634,
> 0.0659646021314206, 0.066731059852463, 0.0699763372273688, 0.0541142327051023,
> 0.0747822118456574, 0.0796152619493638, 0.0898395772710088, 0.0799174340369894,
> 0.0858618669044432, 0.0771658556270448, 0.0737537341871075, 0.0920394845122411,
> 0.0919450414276911, 0.0766003513927418, 0.0853798302884387, 0.0906216842117783,
> 0.0770655693805709, 0.0804880021458966, 0.0793736980924377, 0.0867350210905336,
> 0.0844120272331623, 0.0719191481914751, 0.071331576585233, 0.0814113751491231,
> 0.089572460115355, 0.078571913014948, 0.080089664942495, 0.0660045583281544,
> 0.0757219707900531, 0.0726744688001806, 0.0696490161975653, 0.0771202956076235,
> 0.0762013529234614, 0.0699300362070559, 0.0671216348093859, 0.0733447602438439,
> 0.0802859940712496, 0.0725947875424977, 0.0720665984909216, 0.0698008151103992,
> 0.0721384534894301, 0.063005095359257, 0.0683144142254727, NA
> ), X200402 = c(0.00609727481174727, 0.05415109509733, 0.0794012167559036,
> 0.0826403747544301, 0.0999171818898546, 0.101006756521209, 0.111183119770942,
> 0.120791787628698, 0.0966902045007876, 0.0732386746693035, 0.0821260015169637,
> 0.0958854119964396, 0.0853264083169008, 0.0967539274983187, 0.0886165947540122,
> 0.0805907929079561, 0.106735927491724, 0.0970882020232139, 0.0843222469352052,
> 0.0912443709465066, 0.0951278947834392, 0.0788378671893359, 0.0888744013498726,
> 0.0852218755775392, 0.0929177554740725, 0.0960601500434943, 0.0818717916958666,
> 0.0810254137361847, 0.0954492069109415, 0.10051643482512, 0.0880071590168587,
> 0.0932399109080464, 0.0782955827921764, 0.0834746384096371, 0.0867558212840463,
> 0.078171363708246, 0.0854554361127032, 0.0853364315607014, 0.0792814512248797,
> 0.0690377241541093, 0.0794084469098615, 0.084072524549439, 0.07831193593398,
> 0.0751535567936721, 0.0752542974573565, 0.0794659628815707, 0.0695787782419903,
> 0.0739045486870093, NA, NA), X200501 = c(0.00854080602823615,
> 0.051364699994323, 0.08995539446396, 0.101717898920196, 0.118105922434386,
> 0.135053927542703, 0.123127145513553, 0.13818362186389, 0.12917218785641,
> 0.0983075389035003, 0.085624316432386, 0.0935282521645047, 0.0901991846639811,
> 0.0993615010575896, 0.0992432000057031, 0.0868055460512466, 0.0849252866442858,
> 0.0979102576281754, 0.105025596288152, 0.0922107375508941, 0.0998926273383806,
> 0.0856774386841815, 0.0917163152031878, 0.0943033444118917, 0.0843853695036644,
> 0.0887132791797814, 0.0906275671078504, 0.0863714298888374, 0.0776806054638799,
> 0.0867678329677293, 0.0921845225495963, 0.084689327906438, 0.0825709543895964,
> 0.0810031343066813, 0.0886852852447284, 0.077263574821099, 0.081430630811921,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X200502 = c(0.00900303968538731,
> 0.0576729239379193, 0.0836259423521094, 0.109767801817612, 0.131580490198357,
> 0.124872752392476, 0.141559955244379, 0.141158846480108, 0.128389110874103,
> 0.0948122840229878, 0.0937750150831618, 0.0908510155889424, 0.0957285788019051,
> 0.104504993357531, 0.0899944408457642, 0.086320781759903, 0.101031654546572,
> 0.105342921723405, 0.0916835869354207, 0.100095762407223, 0.086099935991629,
> 0.0914324544832966, 0.0927604232438684, 0.0843104393362966, 0.0932276110143389,
> 0.0929390287918012, 0.089818903276638, 0.0858561637868873, 0.0888599236444742,
> 0.0976175751572295, 0.0893681783753294, 0.089929018713794, 0.0866727155079737,
> 0.0978000766866048, 0.085510546897911, 0.0867367309794715, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X200503 = c(0.0138627223083141,
> 0.0547947213453243, 0.0917905727021959, 0.114528728074742, 0.118548163395101,
> 0.148209049919146, 0.141921983186765, 0.137069204040251, 0.125884032040771,
> 0.110788866773589, 0.0997121014525997, 0.114709097300354, 0.111303856863689,
> 0.0976884424407909, 0.0970295496279745, 0.111975265321758, 0.11766059892943,
> 0.10587310143971, 0.110031669140785, 0.096011899205084, 0.104857208050396,
> 0.107853289643461, 0.0924365894936995, 0.102615206031195, 0.103974225940794,
> 0.100923036813193, 0.0923035561197745, 0.10278451980147, 0.108767543227742,
> 0.0986548796654409, 0.0965737039396537, 0.0960023175074738, 0.0974597735150518,
> 0.0903676728201745, 0.0935110498581629, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA), X200601 = c(0.0119631311150115,
> 0.0841516805661423, 0.169418128881251, 0.180814158472457, 0.209453754648532,
> 0.235012787890493, 0.216047206556313, 0.230628270480073, 0.206476563508043,
> 0.209598857907476, 0.141434330368452, 0.157674725994462, 0.141750823371061,
> 0.161725812068915, 0.153281932920387, 0.155579420440345, 0.142278309240007,
> 0.159200313451883, 0.158293827115383, 0.149958204596761, 0.146979157902604,
> 0.130807704685419, 0.142181637810765, 0.125674995994581, 0.132509032599313,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA), X200602 = c(0.014378980326224,
> 0.104908353883899, 0.138448983398587, 0.180663821799183, 0.211790266190221,
> 0.208670958666264, 0.215408561270312, 0.190771622740946, 0.19888477182159,
> 0.200640896714015, 0.154265356154763, 0.129232583712007, 0.137521271837535,
> 0.144122678233594, 0.139648518928477, 0.130229248888555, 0.13569914013812,
> 0.153403464825881, 0.127505654753521, 0.122299020610582, 0.117214654700722,
> 0.129418278282708, 0.120678528589714, 0.120780498762547, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA), X200701 = c(0.0116534395879406,
> 0.0444620442788004, 0.0680670596774239, 0.0846308039108346, 0.106001085230692,
> 0.0978011302123916, 0.0991769478222925, 0.11822840200494, 0.107317327863878,
> 0.107666023314816, 0.123691926171544, 0.108097563158762, 0.116800798312322,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA), X200702 = c(0.0076099706956513, 0.0376556521264593,
> 0.0578283869719953, 0.0814743681478181, 0.0873700272100016, 0.0943275344831038,
> 0.10622579390003, 0.0965762848813791, 0.0996274329626652, 0.0939676819254394,
> 0.100678765457009, 0.105683165964975, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
> )), .Names = c("X200401", "X200402", "X200501", "X200502", "X200503",
> "X200601", "X200602", "X200701", "X200702"), row.names = c(1L,
> 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 65L, 66L, 67L, 68L, 69L,
> 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L,
> 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L,
> 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L), class = "data.frame")
>
>
> --
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.



-- 
Jim Holtman
Data Munger Guru

What is the problem that you are trying to solve?
Tell me what you want to do, not how you want to do it.




More information about the R-help mailing list