[R] R help
arun
smartpink111 at yahoo.com
Thu Mar 20 16:11:14 CET 2014
Hi,
It is better to use ?dput() to show the data.dput(dataset)
dat <-
structure(list(customer_id = c(8L, 33L, 12L), CountryName = c("US",
"CA", "UK"), RevenueWeekN00 = c(2.28, 0, 30.18), RevenueWeekN01 = c(9.57,
14.69, 43.9), RevenueWeekN02 = c(7.54, 3.31, 90.4), RevenueWeekN03 = c(8.99,
5.21, 45), RevenueWeekN04 = c(21.61, 1.95, 2.9), RevenueWeekN05 = c(24.46,
1.51, 4.12), RevenueWeekN06 = c(19.45, 1.85, 19.72), RevenueWeekN07 = c(120.56,
1.96, 30.8), RevenueWeekN08 = c(0.02, 4.88, 102.6), RevenueWeekN09 = c(0.15,
3.55, 55.09), RevenueWeekN10 = c(0, 3.74, 25.3), RevenueWeekN11 = c(0,
4.5, 4.6)), .Names = c("customer_id", "CountryName", "RevenueWeekN00",
"RevenueWeekN01", "RevenueWeekN02", "RevenueWeekN03", "RevenueWeekN04",
"RevenueWeekN05", "RevenueWeekN06", "RevenueWeekN07", "RevenueWeekN08",
"RevenueWeekN09", "RevenueWeekN10", "RevenueWeekN11"), class = "data.frame", row.names = c(NA,
-3L))
###Your expected output
res <- structure(list(customer_id = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L,
33L, 33L, 33L), CountryName = c("US", "US", "US", "US", "US",
"US", "US", "US", "US", "US", "US", "US", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA"), weekdatesunday = c(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 0L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), RevenueWeekN00 = c(2.28, 9.57,
7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0.02, 0.15, 0, 0, 0,
14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96, 4.88, 3.55, 3.74,
4.5), RevenueWeekN01 = c(0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46,
19.45, 120.56, 0.02, 0.15, 0, 0, 0, 14.69, 3.31, 5.21, 1.95,
1.51, 1.85, 1.96, 4.88, 3.55, 3.74), RevenueWeekN02 = c(0, 0,
2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0.02, 0.15,
0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96, 4.88, 3.55
), RevenueWeekN03 = c(0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61,
24.46, 19.45, 120.56, 0.02, 0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95,
1.51, 1.85, 1.96, 4.88), RevenueWeekN04 = c(0, 0, 0, 0, 2.28,
9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0, 0, 0, 0, 0,
14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96), RevenueWeekN05 = c(0,
0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 0, 0,
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51, 1.85), RevenueWeekN06 = c(0,
0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 0, 0, 0,
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51), RevenueWeekN07 = c(0,
0, 0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 0, 0, 0, 0,
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95), RevenueWeekN08 = c(0, 0,
0, 0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 0, 0, 0, 0, 0, 0, 0,
0, 0, 14.69, 3.31, 5.21), RevenueWeekN09 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 2.28, 9.57, 7.54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14.69,
3.31), RevenueWeekN10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.28,
9.57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14.69), RevenueWeekN11 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.28, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0)), .Names = c("customer_id", "CountryName", "weekdatesunday",
"RevenueWeekN00", "RevenueWeekN01", "RevenueWeekN02", "RevenueWeekN03",
"RevenueWeekN04", "RevenueWeekN05", "RevenueWeekN06", "RevenueWeekN07",
"RevenueWeekN08", "RevenueWeekN09", "RevenueWeekN10", "RevenueWeekN11"
), class = "data.frame", row.names = c(NA, -24L))
dat1 <- dat
names(dat1)[-(1:2)] <- gsub("([[:alpha:]]+)(\\d+)","\\1_\\2",names(dat1)[-(1:2)])
dat2 <- reshape(dat1,idvar=1:2,sep="_",direction="long",varying=names(dat1)[-(1:2)],timevar="weekdatesunday")
dat3 <- dat2[with(dat2,order(factor(CountryName,levels=dat1$CountryName),customer_id)),]
row.names(dat3) <- 1:nrow(dat3)
colnames(dat3)[4] <- paste0(colnames(dat3)[4], "00")
#Better would be to use a ?for() loop. If you only need 12 lags:
library(plyr)
dat4 <- ddply(dat3,.(CountryName),mutate,RevenueWeekN01=c(0,head(RevenueWeekN00,-1)), RevenueWeekN02=c(0,head(RevenueWeekN01,-1)), RevenueWeekN03=c(0,head(RevenueWeekN02,-1)), RevenueWeekN04=c(0,head(RevenueWeekN03,-1)), RevenueWeekN05=c(0,head(RevenueWeekN04,-1)), RevenueWeekN06=c(0,head(RevenueWeekN05,-1)), RevenueWeekN07=c(0,head(RevenueWeekN06,-1)), RevenueWeekN08=c(0,head(RevenueWeekN07,-1)), RevenueWeekN09=c(0,head(RevenueWeekN08,-1)), RevenueWeekN10=c(0,head(RevenueWeekN09,-1)), RevenueWeekN11=c(0,head(RevenueWeekN10,-1)))
dat5 <- dat4[with(dat4,order(factor(CountryName,levels=dat1$CountryName),customer_id)),]
row.names(dat5) <- 1:nrow(dat5)
all.equal(res, dat5[1:24,])
#[1] TRUE
A.K.
On Thursday, March 20, 2014 6:22 AM, Malyadri Putchakayala <malyadri.putchakayala at nuevora.com> wrote:
Hi,
if u doen't mind plz...help me lagitude Transpose,the data is give below
customer_id CountryName RevenueWeekN00 RevenueWeekN01
RevenueWeekN02 RevenueWeekN03 RevenueWeekN04 RevenueWeekN05
RevenueWeekN06 RevenueWeekN07 RevenueWeekN08 RevenueWeekN09
RevenueWeekN10 RevenueWeekN11
8 US 2.28 9.57 7.54 8.99 21.61 24.46 19.45
120.56 0.02 0.15 0 0
33 CA 0 14.69 3.31 5.21 1.95 1.51 1.85 1.96
4.88 3.55 3.74 4.5
12 UK 30.18 43.9 90.4 45 2.9 4.12 19.72 30.8
102.6 55.09 25.30 4.6
after transpose output is
customer_id CountryName weekdatesunday RevenueWeekN00
RevenueWeekN01 RevenueWeekN02 RevenueWeekN03 RevenueWeekN04
RevenueWeekN05 RevenueWeekN06 RevenueWeekN07 RevenueWeekN08
RevenueWeekN09 RevenueWeekN10 RevenueWeekN11
8 US 0 2.28 0 0 0 0 0 0
0 0 0 0 0
8 US 1 9.57 2.28 0 0 0 0 0
0 0 0 0 0
8 US 2 7.54 9.57 2.28 0 0 0 0
0 0 0 0 0
8 US 3 8.99 7.54 9.57 2.28 0 0 0
0 0 0 0 0
8 US 4 21.61 8.99 7.54 9.57 2.28 0 0
0 0 0 0 0
8 US 5 24.46 21.61 8.99 7.54 9.57 2.28 0
0 0 0 0 0
8 US 6 19.45 24.46 21.61 8.99 7.54 9.57 2.28
0 0 0 0 0
8 US 7 120.56 19.45 24.46 21.61 8.99 7.54 9.57
2.28 0 0 0 0
8 US 8 0.02 120.56 19.45 24.46 21.61 8.99 7.54
9.57 2.28 0 0 0
8 US 9 0.15 0.02 120.56 19.45 24.46 21.61 8.99
7.54 9.57 2.28 0 0
8 US 10 0 0.15 0.02 120.56 19.45 24.46
21.61 8.99 7.54 9.57 2.28 0
8 US 11 0 0 0.15 0.02 120.56 19.45
24.46 21.61 8.99 7.54 9.57 2.28
33 CA 0 0 0 0 0 0 0 0
0 0 0 0 0
33 CA 1 14.69 0 0 0 0 0 0
0 0 0 0 0
33 CA 2 3.31 14.69 0 0 0 0 0
0 0 0 0 0
33 CA 3 5.21 3.31 14.69 0 0 0 0
0 0 0 0 0
33 CA 4 1.95 5.21 3.31 14.69 0 0 0
0 0 0 0 0
33 CA 5 1.51 1.95 5.21 3.31 14.69 0 0
0 0 0 0 0
33 CA 6 1.85 1.51 1.95 5.21 3.31 14.69 0
0 0 0 0 0
33 CA 7 1.96 1.85 1.51 1.95 5.21 3.31
14.69 0 0 0 0 0
33 CA 8 4.88 1.96 1.85 1.51 1.95 5.21 3.31
14.69 0 0 0 0
33 CA 9 3.55 4.88 1.96 1.85 1.51 1.95 5.21
3.31 14.69 0 0 0
33 CA 10 3.74 3.55 4.88 1.96 1.85 1.51 1.95
5.21 3.31 14.69 0 0
33 CA 11 4.5 3.74 3.55 4.88 1.96 1.85 1.51
1.95 5.21 3.31 14.69 0
above output add newcolumn weekdatesunday is seq of 0:11 each record
More information about the R-help
mailing list