[R] dates and time series management
arun
smartpink111 at yahoo.com
Thu Jun 6 06:13:16 CEST 2013
Hi,
I think it is due to the missing values:
I get warnings()
z.5.annualMax<- daily2annual(z, FUN=max, na.rm=TRUE,dates=1)
#There were 50 or more warnings (use warnings() to see the first 50)
write.csv(z.5max.annual, file = "Stations.csv")
Just to validate the result:
I tried this:
res3<- lapply(seq_len(ncol(res1[,-1])),function(i) {x<-data.frame(res1[,1],res1[,i+1]); names(x)<- c(names(res1)[1],names(res1)[i+1]);x})
res4<-lapply(res3,function(x) {x1<-x[!is.na(x$dates),]; na.omit(x1)})
library(zoo)
zl<- lapply(res4,function(x) zoo(x[,-1],order.by=x[,1]))
zl.max.annual<- lapply(zl,function(x) daily2annual(x,FUN=max,na.rm=TRUE)) #no warnings()
#na.rm=TRUE inside the daily2annual() didn't show any effect.
sapply(zl.max.annual,length)
# [1] 45 42 44 45 37 45 44 44 41 45 25 45 45 45 45 45 45 45 45 45 45 41 40 41 45
#[26] 44 45 45 45 45 44 42 45 38 45 45 45 38 44 31 45 45 45 42 45 36 42 45 42 45
#[51] 45 45 44 45 40 41 45 45 45 45 34 45 34 45 45 41 41 45 45 45 45 45 45 45 45
#[76] 45 43 40 29 44 29 42 45 40 44 33 45 45 43 40 45 45 45 45 43 45 34 45 44 45
#[101] 45 44 30 44 44 42 45 45 43 42 44 45 45 45 45 42 45 39 39
library(xts)
zl.max.annual1<-lapply(zl.max.annual, as.xts)
zl.merge<- Reduce(function(...) merge(...),zl.max.annual1))
zl.merge[1:3,1:8]
# ..1 ..2 ..2.1 ..2.2 ..2.3 ..2.4 ..2.5 ..2.6
#1961-01-01 35.37 NA 13.43 40.88 17.69 38.44 50.56 36.93
#1962-01-01 34.54 34.85 102.97 39.84 73.43 68.88 63.88 22.89
#1963-01-01 18.32 NA 64.18 51.49 14.61 40.79 127.74 25.07
z.5.annualMax[1:3,1:8]
# dt3011120.txt dt3011240.txt dt3011887.txt dt3012205.txt
#1961-01-01 35.37 NA 13.43 40.88
#1962-01-01 34.54 34.85 102.97 39.84
#1963-01-01 18.32 NA 64.18 51.49
# dt3012280.txt dt3015405.txt dt3015523.txt dt3015960.txt
#1961-01-01 17.69 38.44 50.56 36.93
#1962-01-01 73.43 68.88 63.88 22.89
#1963-01-01 14.61 40.79 127.74 25.07
Looks like this is the same result as above.
A.K.
________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 11:04 PM
Subject: Re: dates and time series management
Hi A.K,
Here is the final code:
*******************************************************************
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
fun2<- function(lstf){
lst1<-lapply(lstf,function(x) readLines(x))
lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
lst7<- lapply(lst6,function(x) {
if((min(x$V1)>1961)|(max(x$V1)<2005)){
n1<- (min(x$V1)-1961)*12
x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
n2<- (2005-max(x$V1))*12
x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
x3<- rbind(x1,x,x2)
}
else {
x
} })
lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed
lst9<- lapply(seq_along(lst8),function(i){
x<- lst8[[i]]
colnames(x)<- lstf1[i]
row.names(x)<- 1:nrow(x)
x
})
do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
res[res==-9999.99]<-NA
which(res==-9999.99)
dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day")
dates2<- as.character(dates1)
sldat<- split(dates2,list(gsub("-.*","",dates2)))
lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} ))
any(sapply(lst1,function(x) any(lapply(x,length)!=31)))
lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
dates3<-unlist(lst22,use.names=FALSE)
length(dates3)
res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE)
str(res1)
res1$dates<-as.Date(res1$dates)
res2<-res1[!is.na(res1$dates),]
res2[1:3,1:3]
dim(res2)
z <- zoo(res2[,-1], order.by=res2[,1])
library(hydroTSM)
z.5max.annual <- daily2annual(z, dates=1, FUN=max) # dates=1 refers to year-month-day format
write.table(z.5max.annual, file = "Stations.csv", sep = ",")# write results file to current directory
At the second before the last line of the code, I transform from daily to annual values and keep only the maximum value in each year. i.e max value in 365 days.
MINOR PROBLEM: My output does contains some 'NA'. Does it mean that for that station, for that year, all data was NA or missing?
Thanks so much.
Atem.
________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Wednesday, June 5, 2013 5:16 PM
Subject: Re: dates and time series management
Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
#I changed the function a little bit to unlist by rows to match the dates column I created.
fun2<- function(lstf){
lst1<-lapply(lstf,function(x) readLines(x))
lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<-
lapply(lst5,function(x) x[!is.na(x$V1),])
lst7<- lapply(lst6,function(x) {
if((min(x$V1)>1961)|(max(x$V1)<2005)){
n1<- (min(x$V1)-1961)*12
x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
n2<- (2005-max(x$V1))*12
x2<-
as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
x3<- rbind(x1,x,x2)
}
else {
x
} })
lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed
lst9<-
lapply(seq_along(lst8),function(i){
x<- lst8[[i]]
colnames(x)<- lstf1[i]
row.names(x)<- 1:nrow(x)
x
})
do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740
119
res[res==-9999.99]<-NA
which(res==-9999.99)
#integer(0)
dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day")
dates2<- as.character(dates1)
sldat<- split(dates2,list(gsub("-.*","",dates2)))
lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} ))
any(sapply(lst1,function(x) any(lapply(x,length)!=31)))
#[1] FALSE
lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
#1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
# 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372
#1977
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992
# 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372 372
#1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
# 372 372 372 372 372 372 372 372 372 372 372 372 372
dates3<-unlist(lst22,use.names=FALSE)
length(dates3)
#[1] 16740
res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE)
str(res1)
'data.frame': 16740 obs. of 120 variables:
$ dates : chr "1961-01-01" "1961-01-02" "1961-01-03" "1961-01-04" ...
$ dt3011120.txt: num 1.67 0 0 0 0 0 4.17 0 0 0 ...
$ dt3011240.txt: num NA NA NA NA NA NA NA NA NA NA ...
$ dt3011887.txt:
num 0.17 0.28 0 0.3 0 0 1.78 0 0.3 0 ...
$ dt3012205.txt: num 0.34 0.21 0 0.51 0 0 2.82 0 0.3 0 ...
-----------------------------------------------------------
res1$dates<-as.Date(res1$dates)
res2<-res1[!is.na(res1$dates),]
res2[1:3,1:3]
# dates dt3011120.txt dt3011240.txt
#1 1961-01-01 1.67 NA
#2 1961-01-02 0.00 NA
#3 1961-01-03 0.00 NA
dim(res2)
#[1] 16436 120
Now, you can try the reshape() and the zoo().
Hope it
helps.
A.K.
________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 5:17 PM
Subject: Re: dates and time series management
Hi A.K,
I am gradually improving my R skills thanks to your support.
I have this code for the attached data.
********************************************************************************************************
library(hydroTSM)
# Reading the data with 21 daily simulations, from 1961-01-01 up to 2005-12-31
x <- read.csv("data2.csv")
# Creating a single variable with all the dates
dates <- as.Date(paste0(x$year, "-", x$month, "-", x$day), format="%Y-%m-%d")
#
Creating a data.frame with 3 columns: simulation number, Date, Rainfall values
x.new <- data.frame(s.num=x[,1], Date=dates, Rainfall=x[,5])
# Creating a data.frame with 22 columns: Dates + Rainfall values for21 simulations
x.wide <- reshape(x.new, idvar = "Date", timevar = "s.num", direction = "wide")
# Creating a zoo variable
z <- zoo(x.wide[,-1], order.by=x.wide[,1])
# 5-day total rainfall for each one of the simulations
z.5tot <- rollapply(data=z, width=5, FUN=sum, fill=NA, partial= TRUE,
align="center")# to get the total of 5-day precipitation
# Maximum value per year of 5-day total rainfall for each one of the simulations
z.5max.annual <- daily2annual(z.5max, dates=1, FUN=max)
*********************************************************************************************************
Problem: I am trying to do a similar
thing with 'res' from our previous problem (see below). However, instead of width=5, I need something like
Max.Daily<-rollapply(data=z, width=372, FUN=max, by.column = TRUE, partial= TRUE, align="center")
# width=1961 to 2005=45years, 16740/45=372
To do this, I need a date column vector just as I did above. Can you show me how to generate daily dates with format="%Y-%m-%d"?
Days range from 1 to 31 for all months since we filled for example February having 28/29 days with NA.
Months from 1 to 12 and years from 1961 to 2005.
If column 1 of 'res' contains dates, then we can use parts of the code above to extract the Maximum value for each year and for each column.
So, my final output will be 45 * 119.
Thanks so much A.K. I keep learning hard though slowly.
________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Wednesday, June 5, 2013 9:44 AM
Subject: Re: dates and time series management
Hi Atem,
No problem.
which(res==-9999.99)
# [1] 18246 397379 420059 426569 427109 603659 604199 662518 664678
#[10] 698982 699522 700062 701142 754745 1289823 1500490 1589487 1716011
#[19] 1837083
which(res==-9999.99,arr.ind=TRUE)
# row col
#1506 1506 2
#12359 12359 24
#1559 1559 26
#8069
8069 26
#----------------------
res[ which(res==-9999.99,arr.ind=TRUE)]<-NA
#or
res[res==-9999.99]<-NA
which(res==-9999.99)
#integer(0)
A.K.
________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 10:56 AM
Subject: Re: dates and time series management
Hi A.K,
It works as expected. You are too smart.
Can you find all -9999.99 and replace with NA, if only it exists?
lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
Thanks so much
A.K.
________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Wednesday, June 5, 2013 7:44 AM
Subject: Re: dates and time series management
Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
fun2<- function(lstf){
lst1<-lapply(lstf,function(x) readLines(x))
lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1
\\2",x)})
lst4<- lapply(lst3,function(x)
read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
lst7<- lapply(lst6,function(x) {
if((min(x$V1)>1961)|(max(x$V1)<2005)){
n1<- (min(x$V1)-1961)*12
x1<-
as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
n2<-
(2005-max(x$V1))*12
x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
x3<-
rbind(x1,x,x2)
}
else
{
x
} })
lst8<-
lapply(lst7,function(x) data.frame(col1=unlist(x[,-c(1:2)])))
lst9<- lapply(seq_along(lst8),function(i){
x<-
lst8[[i]]
colnames(x)<- lstf1[i]
row.names(x)<-
1:nrow(x)
x
})
do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740 119
res[1:5,1:3]
# dt3011120.txt dt3011240.txt dt3011887.txt
#1 1.67
NA 0.17
#2 0.00 NA 0.28
#3
0.00 NA 0.00
#4 0.00 NA
0.30
#5 0.00 NA 0.00
########################################
There are some formatting issues in your files:
For eg. If I run the
function line by line:
lst1<-lapply(lstf1,function(x) readLines(x))
sapply(lst1,function(x) any(grepl("\\d+-9999.99",x)))
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[97] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
[109]
FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE
###means some rows in the a few files have:
#-9999.99 0 0 0 0.00-9999.99 0 0.00-9999.99 0 0 0 0.00-9999.99 (no space before -9999.99)
lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1
\\2",x)})
sapply(lst2,function(x) any(grepl("\\d+-9999.99",x))) #still a few files had the problem
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
any(sapply(lst3,function(x) any(grepl("\\d+-9999.99",x))))
#[1] FALSE
lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
any(sapply(lst4,function(x)
any(sapply(x,is.character))))
#[1] FALSE
lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
sapply(lst6,nrow)
# [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540
540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 528 492 528 540 348 540 540 480 540 540 540 540 540 540
# [91] 540 540 540 540 540 540
540 540 540 540 540 540 540 540 528 540 540 540
#[109] 540 540 540 540 540 540 540 540 540 468 540
lst7<- lapply(lst6,function(x) {
if((min(x$V1)>1961)|(max(x$V1)<2005)){
n1<-
(min(x$V1)-1961)*12
x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
n2<-
(2005-max(x$V1))*12
x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
x3<-
rbind(x1,x,x2)
}
else {
x
}
})
sapply(lst7,nrow)
# [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
540
#[109] 540 540 540 540 540
540 540 540 540 540 540
Hope this helps.
A.K.
________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 2:05
AM
Subject: Re: dates and time series management
Hi A.K,
Sorry my internet connection was so bad last evening.
I have attached all the files as .zip.
Below is the output you requested.
As I explained, the start date in 'res' should be 1961 and end date should be 2005 in all 119 files.
Thanks A.K
> lapply(lst1,head,3)
[[1]]
V1.V2.V3.V4.V5.V6.V7.V8.V9.V10.V11.V12.V13.V14.V15.V16.V17.V18.V19.V20.V21.V22.V23.V24.V25.V26.V27.V28.V29.V30.V31.V32.V33
1
1915 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
2 1915 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
3
1915 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[
More information about the R-help
mailing list