[R] Problem with reshaping data from wide to long format

Lauri Nikkinen lauri.nikkinen at iki.fi
Thu Mar 25 19:04:56 CET 2010


Hi,

I have a data.frame in wide format which I would like to reshape into
a long format:

example (nonsense) data:

> dput(perus2)
structure(list(id = c(30L, 38L, 21L, 12L, 22L, 28L, 31L, 44L,
8L, 47L, 23L, 20L, 41L, 42L, 29L, 50L, 5L, 33L, 4L, 17L, 11L,
1L, 18L, 6L, 9L, 32L, 16L, 14L, 39L, 48L, 37L, 43L, 25L, 27L,
35L, 40L, 45L, 49L, 2L, 34L, 10L, 15L, 24L, 26L, 46L, 7L, 13L,
3L, 19L, 36L), height1 = c(157.10664436, 162.77883643, 162.57095167,
155.82784611, 158.17430233, 156.21247668, 159.33063095, 160.47626229,
157.54657592, 155.01414518, 158.5172723, 162.89026674, 156.08622305,
159.32250375, 162.23610226, 160.21190346, NA, 155.54527115, 155.94912976,
160.49239576, 159.29210577, 158.93378272, 159.56724207, 162.36425594,
161.60135907, 161.75075865, 162.17477449, 157.24285415, NA, NA,
160.51351242, 159.91685827, NA, 161.20032847, 163.04696627, 161.12942938,
NA, 158.80416016, 155.23140708, 156.31264334, 162.37744195, 160.90847312,
NA, NA, 161.25101061, 158.11268844, 157.49338051, 156.92212718,
155.53316098, 157.68964234), height2 = c(168.21926725, 161.56637515,
167.67702789, NA, 162.30948163, 167.82148015, NA, 167.22602729,
163.12053538, NA, 165.14942295, 167.87354665, 160.38044351, 164.10900509,
168.07467582, NA, NA, 163.03725294, 167.76514949, 161.69014727,
161.5809546, 161.14341254, 164.80047183, 162.05997825, 164.11475164,
166.40195203, NA, 166.0826708, 165.7232283, 164.05199508, 167.3839719,
166.66838331, 162.50198958, 167.25560972, 167.67882525, 160.43653059,
163.93406069, 160.84337152, 163.74551267, 163.51428188, 163.29711762,
NA, 168.345659, NA, 168.00421653, NA, 167.63059459, NA, NA, 168.30745505
), height3 = c(170.90466092, NA, 167.38512765, 172.00014248,
171.84958505, NA, 169.97286094, NA, NA, 169.53023204, 167.64452841,
170.94253421, NA, NA, 169.11010524, 172.06679578, NA, 168.09847664,
169.42974919, 171.17927523, 166.7736197, 169.12926314, 166.34368104,
173.18168378, NA, 170.345414, NA, 172.09205926, NA, NA, NA, NA,
170.59135788, 167.48001013, 170.83121727, 167.90871568, 169.7154018,
NA, 171.11203348, 165.25337916, NA, NA, NA, 167.2210484, 166.26445432,
167.3911325, NA, NA, 169.20495164, 171.19481305), weight1 = c(NA,
42.12117886, 35.006739718, 30.104617789, 37.596524918, 32.681661769,
43.273125397, 40.919884581, 41.417712325, 32.363735308, 33.492860521,
40.679869782, 41.051721029, 39.156918258, 35.594038489, 44.788378759,
31.706213547, 37.600275868, 30.562918803, NA, 37.842347921, 34.049919571,
35.390284683, 41.169443504, 36.314510306, NA, 32.315887067, 34.815182022,
43.753041021, 42.631667022, 43.964726468, 38.665918845, 35.758478283,
31.642950399, 33.31580307, 41.171896621, 34.11696779, 33.872158362,
42.70435576, 41.153390371, 43.830697643, 35.223807744, NA, 37.210599185,
44.775833501, 30.659131352, 40.399049686, 44.368921711, 41.850601771,
37.850971621), weight2 = c(46.614122964, 49.810266131, NA, 51.600870342,
49.009289571, 44.041339142, 52.877022993, NA, 49.99494035, 40.401355811,
43.25422231, 53.470358871, 52.762958304, 52.586173237, 50.286215216,
NA, 44.273412353, 40.473174437, 51.956636192, 46.237083243, 40.679582358,
NA, NA, 53.008553857, 47.919812716, 48.191047925, 54.024111477,
NA, 45.548940909, NA, 53.838601463, NA, 50.50111976, 53.918903127,
45.481404898, 54.465265395, 50.656535116, 53.037833591, 53.346378286,
NA, 40.582047152, 44.724081659, 50.858338403, NA, NA, 40.679801628,
44.8645364, 44.011082672, 51.955119361, NA), weight3 = c(55.198326807,
54.174241705, NA, 54.069526393, 52.746444632, NA, NA, 61.469501304,
51.508762087, 58.266821929, 58.647671581, 59.09318286, 52.884083243,
63.804598792, 58.559146181, NA, 56.556534826, 51.071810157, NA,
62.371304581, NA, 55.025181044, 55.16444531, 59.571659821, 52.516719702,
NA, 57.547876457, 61.692374636, 57.952322011, 64.959944764, 61.631626094,
50.46350485, 51.336554259, 60.036043059, NA, NA, 52.306778299,
50.312191646, 57.281096013, 52.174733394, 51.751740252, NA, 61.142851937,
64.93667359, 57.041472991, 56.285936174, 63.046773681, NA, NA,
55.571892474), sp = c(0L, 0L, 0L, 0L, 1L, 0L, NA, 0L, 0L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, NA, 1L, 0L, 0L, 0L, 1L, 1L), bmi1 = c(NA, 15.8966143320452,
13.245421082447, 12.3977530712043, 15.027123283702, 13.3928404493200,
17.0458911682568, 15.8895939465852, 16.6866120773622, 13.4683992248259,
13.3290457842967, 15.3316637234161, 16.8500900221585, 15.4260331046336,
13.5232867850418, 17.4492104953802, NA, 15.540944398053, 12.5669219098308,
NA, 14.9138430142300, 13.4798062642435, 13.8994168072589, 15.6168753286232,
13.9056141468016, NA, 12.2871023625512, 14.0807839118743, NA,
NA, 17.0640132231600, 15.1195838206639, NA, 12.1771351358901,
12.5321285552922, 15.8580993895571, NA, 13.4313331175885, 17.7220103647077,
16.8429207495369, 16.6236731966879, 13.6043712362307, NA, NA,
17.2202236903033, 12.2638378494873, 16.2872041214028, 18.0181625678781,
17.3003826973727, 15.2219641887793), bmi2 = c(16.4727475841863,
19.081692780389, NA, NA, 18.6033272435455, 15.6374315460333,
NA, NA, 18.7892211535392, NA, 15.8589484308730, 18.9735487397628,
20.5128647833983, 19.5257058569701, 17.8009993860965, NA, NA,
15.2262715351502, 18.4602479341394, 17.6857432790636, 15.5810293774179,
NA, NA, 20.1834035524235, 17.7917993591549, 17.4040185299102,
NA, NA, 16.5848486941011, NA, 19.2161345801121, NA, 19.1242161059593,
19.2743465419037, 16.1762342002915, 21.1598751272938, 18.8493807769291,
20.5012074556641, 19.8960161913625, NA, 15.2186774698114, NA,
17.9456140418110, NA, NA, NA, 15.9660179598009, NA, NA, NA),
    bmi3 = c(18.8980981166013, NA, NA, 18.2765808591596, 17.8606070520157,
    NA, NA, NA, NA, 20.2734200134073, 20.8675761771662, 20.2226053401634,
    NA, NA, 20.4764948398200, NA, NA, 18.0739757601718, NA, 21.2854316398646,
    NA, 19.2364074182253, 19.9363954704867, 19.8625857537345,
    NA, NA, NA, 20.8309858751195, NA, NA, NA, NA, 17.6405719924585,
    21.4035643852817, NA, NA, 18.1599838638817, NA, 19.563664852469,
    19.1055498419989, NA, NA, NA, 23.2224562874904, 20.6344030442757,
    20.087921316982, NA, NA, NA, 18.9615564192124), spf = structure(c(2L,
    2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
    2L, NA, 2L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L,
    2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor")),
.Names = c("id",
"height1", "height2", "height3", "weight1", "weight2", "weight3",
"sp", "bmi1", "bmi2", "bmi3", "spf"), row.names = c(NA, -50L), class =
"data.frame")
>

I'm doing this but I get an error:

> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),
+                                c("weight1", "weight2", "weight3"),
+                                c("bmi1", "bmi2", "bmi3")),
+                 v.names=c("heigth","weigth", "bmi"),
+                 timevar = "time",
+                 times = factor(c(1, 2, 3)),
+                 idvar = c("id", "sp"),
+                 direction = "long")
Error in `row.names<-.data.frame`(`*tmp*`, value = c("30.0.1", "38.0.1",  :
  duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘NA.1’
>

This will fix this error, but I would like to avoid this kind of tweak
for NA values:

> perus2$sp[is.na(perus2$sp)] <- 999
> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),
+                                c("weight1", "weight2", "weight3"),
+                                c("bmi1", "bmi2", "bmi3")),
+                 v.names=c("heigth","weigth", "bmi"),
+                 timevar = "time",
+                 times = factor(c(1, 2, 3)),
+                 idvar = c("id", "sp"),
+                 direction = "long")
> perus2_trans <- perus2_trans[order(perus2_trans$id, perus2_trans$time),]
> perus2_trans$sp[perus2_trans$sp == 999] <- NA

How to avoid this <- 999 substitute? Thanks in advance.

-Lauri



More information about the R-help mailing list