[R] reshape: melt and cast

Tue Sep 1 17:04:15 CEST 2015

Thanks, Stephen. I've looked into the fun.aggregate argument. I don't want
to aggregate, so I thought leaving it blank (allowing it to default to
NULL) would do that.

Here's a corrected post (with further explanation):

Hi,

I have data that looks like this:

>dput(head(ratings))
structure(list(QCode = structure(c(5L, 7L, 5L, 7L, 5L, 7L), .Label =
c("APPEAR",
"FEAR", "FUN", "GRAT", "GUILT", "Joy", "LOVE", "UNGRAT"), class =
"factor"),
    PID = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("1123",
    "1136", "1137", "1142", "1146", "1147", "1148", "1149", "1152",
    "1153", "1154", "1156", "1158", "1161", "1164", "1179", "1182",
    "1183", "1191", "1196", "1197", "1198", "1199", "1200", "1201",
    "1203", "1205", "1207", "1208", "1209", "1214", "1216", "1219",
    "1220", "1222", "1223", "1224", "1225", "1226", "1229", "1236",
    "1237", "1238", "1240", "1241", "1243", "1245", "1246", "1248",
    "1254", "1255", "1256", "1257", "1260", "1262", "1264", "1268",
    "1270", "1272", "1278", "1279", "1280", "1282", "1283", "1287",
    "1288", "1292", "1293", "1297", "1310", "1311", "1315", "1329",
    "1332", "1333", "1343", "1346", "1347", "1352", "1354", "1355",
    "1356", "1360", "1368", "1369", "1370", "1378", "1398", "1400",
    "1403", "1404", "1411", "1412", "1420", "1421", "1423", "1424",
    "1426", "1428", "1432", "1433", "1435", "1436", "1438", "1439",
    "1440", "1441", "1443", "1444", "1446", "1447", "1448", "1449",
    "1450", "1453", "1454", "1456", "1459", "1460", "1461", "1462",
    "1463", "1468", "1471", "1475", "1478", "1481", "1482", "1487",
    "1488", "1490", "1493", "1495", "1497", "1503", "1504", "1508",
    "1509", "1511", "1513", "1514", "1515", "1522", "1524", "1525",
    "1526", "1527", "1528", "1529", "1532", "1534", "1536", "1538",
    "1539", "1540", "1543", "1550", "1551", "1552", "1554", "1555",
    "1556", "1558", "1559"), class = "factor"), RaterName = structure(c(1L,
    1L, 1L, 1L, 1L, 1L), .Label = c("cwormhoudt", "zspeidel"), class =
"factor"),
    SI1 = c(2L, 1L, 1L, 1L, 2L, 1L), SI2 = c(2L, 2L, 2L, 2L,
    2L, 3L), SI3 = c(3L, 3L, 3L, 3L, 2L, 4L), SI4 = c(1L, 2L,
    1L, 1L, 1L, 1L), SI5 = c(1L, 1L, 1L, 1L, 1L, 1L), SI6 = c(1L,
    1L, 1L, 1L, 1L, 1L), SI7 = c(3L, 1L, 2L, 1L, 2L, 1L), SI8 = c(3L,
    1L, 3L, 1L, 3L, 1L), SI9 = c(3L, 1L, 2L, 1L, 1L, 1L), SI10 = c(2L,
    1L, 2L, 1L, 2L, 1L), SI11 = c(1L, 3L, 1L, 2L, 1L, 4L)), .Names =
c("QCode",
"PID", "RaterName", "SI1", "SI2", "SI3", "SI4", "SI5", "SI6",
"SI7", "SI8", "SI9", "SI10", "SI11"), row.names = c(NA, 6L), class =
"data.frame")

> dput(tail(ratings))
structure(list(QCode = structure(c(3L, 3L, 3L, 1L, 1L, 3L), .Label =
c("APPEAR",
"FEAR", "FUN", "GRAT", "GUILT", "Joy", "LOVE", "UNGRAT"), class =
"factor"),
    PID = structure(c(161L, 162L, 163L, 163L, 164L, 164L), .Label =
c("1123",
    "1136", "1137", "1142", "1146", "1147", "1148", "1149", "1152",
    "1153", "1154", "1156", "1158", "1161", "1164", "1179", "1182",
    "1183", "1191", "1196", "1197", "1198", "1199", "1200", "1201",
    "1203", "1205", "1207", "1208", "1209", "1214", "1216", "1219",
    "1220", "1222", "1223", "1224", "1225", "1226", "1229", "1236",
    "1237", "1238", "1240", "1241", "1243", "1245", "1246", "1248",
    "1254", "1255", "1256", "1257", "1260", "1262", "1264", "1268",
    "1270", "1272", "1278", "1279", "1280", "1282", "1283", "1287",
    "1288", "1292", "1293", "1297", "1310", "1311", "1315", "1329",
    "1332", "1333", "1343", "1346", "1347", "1352", "1354", "1355",
    "1356", "1360", "1368", "1369", "1370", "1378", "1398", "1400",
    "1403", "1404", "1411", "1412", "1420", "1421", "1423", "1424",
    "1426", "1428", "1432", "1433", "1435", "1436", "1438", "1439",
    "1440", "1441", "1443", "1444", "1446", "1447", "1448", "1449",
    "1450", "1453", "1454", "1456", "1459", "1460", "1461", "1462",
    "1463", "1468", "1471", "1475", "1478", "1481", "1482", "1487",
    "1488", "1490", "1493", "1495", "1497", "1503", "1504", "1508",
    "1509", "1511", "1513", "1514", "1515", "1522", "1524", "1525",
    "1526", "1527", "1528", "1529", "1532", "1534", "1536", "1538",
    "1539", "1540", "1543", "1550", "1551", "1552", "1554", "1555",
    "1556", "1558", "1559"), class = "factor"), RaterName = structure(c(2L,
    2L, 2L, 2L, 2L, 2L), .Label = c("cwormhoudt", "zspeidel"), class =
"factor"),
    SI1 = c(1L, 1L, 1L, 1L, 1L, 1L), SI2 = c(3L, 2L, 2L, 3L,
    3L, 2L), SI3 = c(3L, 2L, 3L, 3L, 3L, 2L), SI4 = c(1L, 1L,
    1L, 1L, 1L, 1L), SI5 = c(1L, 1L, 1L, 1L, 1L, 1L), SI6 = c(1L,
    1L, 1L, 1L, 1L, 1L), SI7 = c(1L, 1L, 1L, 2L, 2L, 1L), SI8 = c(1L,
    1L, 1L, 1L, 1L, 1L), SI9 = c(1L, 1L, 1L, 1L, 1L, 1L), SI10 = c(1L,
    1L, 1L, 2L, 2L, 1L), SI11 = c(1L, 1L, 1L, 1L, 1L, 1L)), .Names =
c("QCode",
"PID", "RaterName", "SI1", "SI2", "SI3", "SI4", "SI5", "SI6",
"SI7", "SI8", "SI9", "SI10", "SI11"), row.names = 2456:2461, class =
"data.frame")

I am trying to use the melt and cast functions to re-arrange to have column
names QCode, PID, sItem, cwormhoudt, zpeidel.  Under each of the last two
columns I want the values that correspond to each of RaterNames.

So, I melt the data like this:

mratings = melt(ratings, variable_name="sItem")

Then cast the data like this:

> outData = cast(mratings, QCode + PID + sItem ~ RaterName)
Aggregation requires fun.aggregate: length used as default

But the value columns appear to be displaying counts and not the original
values.

> dput(head(outData))
structure(list(QCode = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("APPEAR",
"FEAR", "FUN", "GRAT", "GUILT", "Joy", "LOVE", "UNGRAT"), class =
"factor"),
    PID = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1123",
    "1136", "1137", "1142", "1146", "1147", "1148", "1149", "1152",
    "1153", "1154", "1156", "1158", "1161", "1164", "1179", "1182",
    "1183", "1191", "1196", "1197", "1198", "1199", "1200", "1201",
    "1203", "1205", "1207", "1208", "1209", "1214", "1216", "1219",
    "1220", "1222", "1223", "1224", "1225", "1226", "1229", "1236",
    "1237", "1238", "1240", "1241", "1243", "1245", "1246", "1248",
    "1254", "1255", "1256", "1257", "1260", "1262", "1264", "1268",
    "1270", "1272", "1278", "1279", "1280", "1282", "1283", "1287",
    "1288", "1292", "1293", "1297", "1310", "1311", "1315", "1329",
    "1332", "1333", "1343", "1346", "1347", "1352", "1354", "1355",
    "1356", "1360", "1368", "1369", "1370", "1378", "1398", "1400",
    "1403", "1404", "1411", "1412", "1420", "1421", "1423", "1424",
    "1426", "1428", "1432", "1433", "1435", "1436", "1438", "1439",
    "1440", "1441", "1443", "1444", "1446", "1447", "1448", "1449",
    "1450", "1453", "1454", "1456", "1459", "1460", "1461", "1462",
    "1463", "1468", "1471", "1475", "1478", "1481", "1482", "1487",
    "1488", "1490", "1493", "1495", "1497", "1503", "1504", "1508",
    "1509", "1511", "1513", "1514", "1515", "1522", "1524", "1525",
    "1526", "1527", "1528", "1529", "1532", "1534", "1536", "1538",
    "1539", "1540", "1543", "1550", "1551", "1552", "1554", "1555",
    "1556", "1558", "1559"), class = "factor"), sItem = structure(1:6,
.Label = c("SI1",
    "SI2", "SI3", "SI4", "SI5", "SI6", "SI7", "SI8", "SI9", "SI10",
    "SI11"), class = "factor"), cwormhoudt = c(1L, 1L, 1L, 1L,
    1L, 1L), zspeidel = c(1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("QCode",
"PID", "sItem", "cwormhoudt", "zspeidel"), row.names = c(NA,
6L), class = "data.frame")

The correct output would have 3s, 4s, and 5s in the RaterName columns:

> which(outData$zpeidel==3)
integer(0)

I don't want it to aggregate the data, I simply want it to insert the
original values into the two RaterName columns.  In cast, the fun.aggregate
defaults to NULL, which I assumed was no function.  But the output says it
uses length as default.

On Mon, Aug 31, 2015 at 10:14 PM, Matt Pickard <
matthew.david.pickard at gmail.com> wrote:

> Hi,
>
> I have data that looks like this:
>
>
>
>
>
>
>
>
>
> *> head(ratings)  QCode  PID  RaterName SI1 SI2 SI3 SI4 SI5 SI6 SI7 SI8
> SI9 SI10 SI111 GUILT 1123 cwormhoudt   2   2   3   1   1   1   3   3   3
> 2    12  LOVE 1123 cwormhoudt   1   2   3   2   1   1   1   1   1    1
> 33 GUILT 1136 cwormhoudt   1   2   3   1   1   1   2   3   2    2    14
> LOVE 1136 cwormhoudt   1   2   3   1   1   1   1   1   1    1    25 GUILT
> 1137 cwormhoudt   2   2   2   1   1   1   2   3   1    2    16  LOVE 1137
> cwormhoudt   1   3   4   1   1   1   1   1   1    1    4*
>
>
>
>
>
>
>
>
> *> tail(ratings)      QCode  PID RaterName SI1 SI2 SI3 SI4 SI5 SI6 SI7 SI8
> SI9 SI10 SI112456    FUN 1555  zspeidel   1   3   3   1   1   1   1   1
> 1    1    12457    FUN 1556  zspeidel   1   2   2   1   1   1   1   1
> 1    1    12458    FUN 1558  zspeidel   1   2   3   1   1   1   1   1
> 1    1    12459 APPEAR 1558  zspeidel   1   3   3   1   1   1   2   1
> 1    2    12460 APPEAR 1559  zspeidel   1   3   3   1   1   1   2   1
> 1    2    12461    FUN 1559  zspeidel   1   2   2   1   1   1   1   1
> 1    1    1*
> I am trying to use the melt and cast functions to re-arrange it to look
> like this:
>
>
>
>
>
>
>
>
> *   QCode  PID sItem cwormhoudt zspeidel1 APPEAR 1123   SI1
> 1        12 APPEAR 1123   SI2          4        13 APPEAR 1123
> SI3          1        24 APPEAR 1123   SI4          3        15 APPEAR
> 1123   SI5          1        16 APPEAR 1123   SI6          1        3*
> So, I melt the data like this:
>
>
>
> *mratings = melt(ratings, variable_name="sItem")*
> Then cast the data like this:
>
>
> *> outData = cast(mratings, QCode + PID + sItem ~ RaterName)Aggregation
> requires fun.aggregate: length used as default*
>
> But the value columns appear to be displaying counts and not the original
> values:
>
>
>
>
>
>
>
>
>
>
>
>
>
> *> head(outData)   QCode  PID sItem cwormhoudt zspeidel1 APPEAR 1123
> SI1          1        12 APPEAR 1123   SI2          1        13 APPEAR
> 1123   SI3          1        14 APPEAR 1123   SI4          1        15
> APPEAR 1123   SI5          1        16 APPEAR 1123   SI6          1
> 1> which(outData$zpeidel==3)integer(0)*
> How to I prevent cast from aggregating the data according to counts?  Am I
> doing something wrong?
>
> Thanks in advance.
>
> MP
>

	[[alternative HTML version deleted]]