[R] Obtaining data from a different row of data frame
arun
smartpink111 at yahoo.com
Sun Sep 22 08:27:41 CEST 2013
HI,
A modified code to avoid the ?sapply()
df1<- structure(list(Dates = structure(c(13151, 13152, 13153, 13154,
13157, 13158, 13159, 13160, 13161, 13164), class = "Date"), P1 = c(10,
13, 16, 19, 22, 25, 28, 31, 34, 37), P2 = c(100, 102, 104, 106,
108, 110, 112, 114, 116, 118), P3 = c(90, 94, 98, 102, 106, 110,
114, 118, 122, 126), P4 = c(70, 75, 80, 85, 90, 95, 100, 105,
110, 115), OF1 = c(3, 3, 4, 5, 2, 2, 2, 1, 1, 5), OF2 = c(5,
3, 4, 2, 1, 2, 2, 1, 1, 0), OF3 = c(4, 3, 4, 1, 3, 2, 2, 1, 1,
0), OF4 = c(3, 5, 4, 2, 3, 1, 2, 1, 1, 0)), .Names = c("Dates",
"P1", "P2", "P3", "P4", "OF1", "OF2", "OF3", "OF4"), row.names = c(NA,
-10L), class = "data.frame")
df1$OF2[9]<-4
df2<- df1
df2[,10:13]<- NA
colnames(df2)[10:13]<- paste0("newPrice",1:4)
##your code
for(j in 2:5) {
df2[j+8] = df2[df2[,j+4] + row(df2)[,j], j]
}
indx1<- unlist(df1[,grep("OF",colnames(df1))],use.names=FALSE)
indx1[rep(seq(nrow(df1)),4)%in% 6:10][indx1[rep(seq(nrow(df1)),4)%in% 6:10]- rep(5:1,4)>=0]<- NA
val1<- unlist(df1[,grep("P",colnames(df1))],use.names=FALSE)
df1[,10:13]<- val1[indx1+seq_along(indx1)]
colnames(df1)[10:13]<- colnames(df2)[10:13]
identical(df1[,10:13],df2[,10:13])
#[1] TRUE
###On a bigger dataset:
set.seed(29)
df2<- data.frame(Dates=seq(as.Date("2006-01-03"),length.out=2000,by="1 day"),cbind(matrix(sample(10:120,2000*300,replace=TRUE),ncol=300),matrix(sample(0:6,2000*300,replace=TRUE),ncol=300)))
colnames(df2)[2:301]<- paste0("P",1:300)
colnames(df2)[302:601]<- paste0("OF",1:300)
df3<- df2
df2[,602:901]<-NA
colnames(df2)[602:901]<- paste0("newPrice",1:300)
system.time({
for(j in grep("^P",colnames(df2))) {
df2[j+600] = df2[df2[,j+300] + row(df2)[,j], j]
}
})
# user system elapsed
# 8.508 0.000 8.523
colN_OF<- ncol(df3[,grep("OF",colnames(df3))])
system.time({
indx1<- unlist(df3[,grep("OF",colnames(df3))],use.names=FALSE)
indx1[rep(seq(nrow(df3)),colN_OF) %in% 1995:2000][indx1[rep(seq(nrow(df3)),colN_OF) %in% 1995:2000] - rep(6:1,colN_OF)>=0] <-NA
val1<- unlist(df3[,grep("P",colnames(df3))],use.names=FALSE)
df3[,602:901]<- val1[indx1+seq_along(indx1)]
colnames(df3)[602:901]<- colnames(df2)[602:901]
})
# user system elapsed
# 0.568 0.000 0.569
identical(df2,df3)
#[1] TRUE
A.K.
----- Original Message -----
From: arun <smartpink111 at yahoo.com>
To: Ira Sharenow <irasharenow100 at yahoo.com>
Cc:
Sent: Sunday, September 22, 2013 1:28 AM
Subject: Re: [R] Obtaining data from a different row of data frame
Ira,
I tried with a bigger dataset to look for any errors in the code:
set.seed(29)
df2<- data.frame(Dates=seq(as.Date("2006-01-03"),length.out=2000,by="1 day"),cbind(matrix(sample(10:120,2000*300,replace=TRUE),ncol=300),matrix(sample(0:6,2000*300,replace=TRUE),ncol=300)))
colnames(df2)[2:301]<- paste0("P",1:300)
colnames(df2)[302:601]<- paste0("OF",1:300)
df3<- df2
df2[,602:901]<-NA
colnames(df2)[602:901]<- paste0("newPrice",1:300)
system.time({
for(j in grep("^P",colnames(df2))) {
df2[j+600] = df2[df2[,j+300] + row(df2)[,j], j]
}
})
# user system elapsed
# 9.584 0.000 9.601
vec1<- 6:1 ##change values according to the range of actual values in your rows.
vec2<- 1995:2000 ##change accordingly. If the maximum value is say 100, take 100 rows from the tail end. Change the vec1 also so that both are of the same length
system.time({
df3[vec2,grep("OF",colnames(df3))]<- t(sapply(seq_along(vec1),function(i) {x1<-as.matrix(df3[vec2[i],grep("OF",colnames(df3))]); x1[x1>=vec1[i]]<-NA; x1}))
indx1<- unlist(df3[,grep("OF",colnames(df3))],use.names=FALSE)
val1<- unlist(df3[,grep("P",colnames(df3))],use.names=FALSE)
df3[,602:901]<- val1[indx1+seq_along(indx1)]
colnames(df3)[602:901]<- colnames(df2)[602:901]
})
# user system elapsed
# 0.552 0.000 0.553
identical(df2[,602:901],df3[,602:901])
#[1] TRUE
A.K.
More information about the R-help
mailing list