[R] dates and time series management

Thu Jun 6 06:13:16 CEST 2013

Hi,
I think it is due to the missing values:
I get warnings()
 z.5.annualMax<- daily2annual(z, FUN=max, na.rm=TRUE,dates=1)
#There were 50 or more warnings (use warnings() to see the first 50)
write.csv(z.5max.annual, file = "Stations.csv")
Just to validate  the result:

I tried this:
res3<- lapply(seq_len(ncol(res1[,-1])),function(i) {x<-data.frame(res1[,1],res1[,i+1]); names(x)<- c(names(res1)[1],names(res1)[i+1]);x})
res4<-lapply(res3,function(x) {x1<-x[!is.na(x$dates),]; na.omit(x1)})
library(zoo)
zl<- lapply(res4,function(x) zoo(x[,-1],order.by=x[,1]))
zl.max.annual<- lapply(zl,function(x) daily2annual(x,FUN=max,na.rm=TRUE)) #no warnings()
#na.rm=TRUE inside the daily2annual() didn't show any effect.

 sapply(zl.max.annual,length)
#  [1] 45 42 44 45 37 45 44 44 41 45 25 45 45 45 45 45 45 45 45 45 45 41 40 41 45
 #[26] 44 45 45 45 45 44 42 45 38 45 45 45 38 44 31 45 45 45 42 45 36 42 45 42 45
 #[51] 45 45 44 45 40 41 45 45 45 45 34 45 34 45 45 41 41 45 45 45 45 45 45 45 45
 #[76] 45 43 40 29 44 29 42 45 40 44 33 45 45 43 40 45 45 45 45 43 45 34 45 44 45
#[101] 45 44 30 44 44 42 45 45 43 42 44 45 45 45 45 42 45 39 39
library(xts)
zl.max.annual1<-lapply(zl.max.annual, as.xts)
 zl.merge<- Reduce(function(...) merge(...),zl.max.annual1))
 zl.merge[1:3,1:8]
#             ..1   ..2  ..2.1 ..2.2 ..2.3 ..2.4  ..2.5 ..2.6
#1961-01-01 35.37    NA  13.43 40.88 17.69 38.44  50.56 36.93
#1962-01-01 34.54 34.85 102.97 39.84 73.43 68.88  63.88 22.89
#1963-01-01 18.32    NA  64.18 51.49 14.61 40.79 127.74 25.07
 z.5.annualMax[1:3,1:8]
 #          dt3011120.txt dt3011240.txt dt3011887.txt dt3012205.txt
#1961-01-01         35.37            NA         13.43         40.88
#1962-01-01         34.54         34.85        102.97         39.84
#1963-01-01         18.32            NA         64.18         51.49
 #          dt3012280.txt dt3015405.txt dt3015523.txt dt3015960.txt
#1961-01-01         17.69         38.44         50.56         36.93
#1962-01-01         73.43         68.88         63.88         22.89
#1963-01-01         14.61         40.79        127.74         25.07

Looks like this is the same result as above.
A.K.

________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Wednesday, June 5, 2013 11:04 PM
Subject: Re: dates and time series management

Hi A.K,

Here is the final code:
*******************************************************************
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
fun2<- function(lstf){
  lst1<-lapply(lstf,function(x) readLines(x))
  lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
  lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
  lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
  lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
  lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
  lst7<- lapply(lst6,function(x) {
    if((min(x$V1)>1961)|(max(x$V1)<2005)){
      n1<- (min(x$V1)-1961)*12
      x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
      n2<- (2005-max(x$V1))*12
      x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
      x3<- rbind(x1,x,x2)
    }
    else {
      x
    } })
  lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed
  lst9<- lapply(seq_along(lst8),function(i){
    x<- lst8[[i]]
    colnames(x)<- lstf1[i]
    row.names(x)<- 1:nrow(x)
    x
  })
  do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
res[res==-9999.99]<-NA
which(res==-9999.99)

dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day")
dates2<- as.character(dates1)
sldat<- split(dates2,list(gsub("-.*","",dates2)))
lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} ))
any(sapply(lst1,function(x) any(lapply(x,length)!=31)))
lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
dates3<-unlist(lst22,use.names=FALSE)
length(dates3)
res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE)
str(res1)
res1$dates<-as.Date(res1$dates)
res2<-res1[!is.na(res1$dates),]
res2[1:3,1:3]
dim(res2)
z <- zoo(res2[,-1], order.by=res2[,1])
library(hydroTSM)
z.5max.annual <- daily2annual(z, dates=1, FUN=max) # dates=1 refers to year-month-day format
write.table(z.5max.annual, file = "Stations.csv", sep = ",")# write results file to current directory

At the second before the last line of the code, I transform from daily to annual values and keep only the maximum value in each year. i.e max value in 365 days.

MINOR PROBLEM: My output does contains some 'NA'. Does it mean that for that station, for that year, all data was NA or missing? 

Thanks so much.
Atem.

________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com> 
Cc: R help <r-help at r-project.org> 
Sent: Wednesday, June 5, 2013 5:16 PM
Subject: Re: dates and time series management

Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
#I changed the function a little bit to unlist by rows to match the dates column I created.

fun2<- function(lstf){
 lst1<-lapply(lstf,function(x) readLines(x))
 lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
 lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
 lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
 lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
 lst6<-
lapply(lst5,function(x) x[!is.na(x$V1),])
 lst7<- lapply(lst6,function(x) {
                     if((min(x$V1)>1961)|(max(x$V1)<2005)){
                         n1<- (min(x$V1)-1961)*12
                         x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
                         n2<- (2005-max(x$V1))*12
                         x2<-
as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
                         x3<- rbind(x1,x,x2)
                        }
                          else {
                    x
                    } })
    lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed
     lst9<-
lapply(seq_along(lst8),function(i){
                        x<- lst8[[i]]
                        colnames(x)<- lstf1[i]
                        row.names(x)<- 1:nrow(x)
                        x
                        })
 do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740  
119
 res[res==-9999.99]<-NA
which(res==-9999.99)
#integer(0)

dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day")
dates2<- as.character(dates1)
sldat<- split(dates2,list(gsub("-.*","",dates2)))
 lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} ))
any(sapply(lst1,function(x) any(lapply(x,length)!=31)))
#[1] FALSE
lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
#1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 
# 372  372  372  372  372  372  372  372  372  372  372  372  372  372  372  372 
#1977
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 
# 372  372  372  372  372  372  372  372  372  372  372  372  372  372  372  372 
#1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 
# 372  372  372  372  372  372  372  372  372  372  372  372  372 

 dates3<-unlist(lst22,use.names=FALSE)
 length(dates3)
#[1] 16740
res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE)
str(res1)
'data.frame':    16740 obs. of  120 variables:
 $ dates        : chr  "1961-01-01" "1961-01-02" "1961-01-03" "1961-01-04" ...
 $ dt3011120.txt: num  1.67 0 0 0 0 0 4.17 0 0 0 ...
 $ dt3011240.txt: num  NA NA NA NA NA NA NA NA NA NA ...
 $ dt3011887.txt:
num  0.17 0.28 0 0.3 0 0 1.78 0 0.3 0 ...
 $ dt3012205.txt: num  0.34 0.21 0 0.51 0 0 2.82 0 0.3 0 ...
-----------------------------------------------------------
res1$dates<-as.Date(res1$dates)
 res2<-res1[!is.na(res1$dates),]
res2[1:3,1:3]
#       dates dt3011120.txt dt3011240.txt
#1 1961-01-01          1.67            NA
#2 1961-01-02          0.00            NA
#3 1961-01-03          0.00            NA
 dim(res2)
#[1] 16436   120

Now, you can try the reshape() and the zoo().
Hope it
helps.
A.K.

________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Wednesday, June 5, 2013 5:17 PM
Subject: Re: dates and time series management

Hi A.K,
I am gradually improving my R skills thanks to your support.
I have this code for the attached data.
********************************************************************************************************
library(hydroTSM)

# Reading the data with 21 daily simulations, from 1961-01-01 up to 2005-12-31
x <- read.csv("data2.csv")

# Creating a single variable with all the dates
dates <- as.Date(paste0(x$year, "-", x$month, "-", x$day), format="%Y-%m-%d")

#
Creating a data.frame with 3 columns: simulation number, Date, Rainfall values
x.new <- data.frame(s.num=x[,1], Date=dates, Rainfall=x[,5])

# Creating a data.frame with 22 columns: Dates + Rainfall values for21 simulations
x.wide <- reshape(x.new, idvar = "Date", timevar = "s.num", direction = "wide")
# Creating a zoo variable
z <- zoo(x.wide[,-1], order.by=x.wide[,1])
# 5-day total rainfall for each one of the simulations

z.5tot <- rollapply(data=z, width=5, FUN=sum, fill=NA, partial= TRUE,
                    align="center")# to get the total of 5-day precipitation
# Maximum value per year of 5-day total rainfall for each one of the simulations
z.5max.annual <- daily2annual(z.5max, dates=1, FUN=max)
*********************************************************************************************************

Problem: I am trying to do a similar
thing with 'res' from our previous problem (see below). However, instead of width=5, I need something like 
Max.Daily<-rollapply(data=z, width=372, FUN=max, by.column = TRUE, partial= TRUE, align="center")
# width=1961 to 2005=45years, 16740/45=372

To do this, I need a date column vector just as I did above. Can you show me how to generate daily dates with  format="%Y-%m-%d"? 
Days range from 1 to 31 for all months since we filled for example February having 28/29 days with NA. 
Months from 1 to 12 and years from 1961 to 2005.

If column 1 of 'res' contains dates, then we can use parts of the code above to extract the Maximum value for each year and for each column.
So, my final output will be 45 * 119. 

Thanks so much A.K. I keep learning hard though slowly.  

________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com> 
Cc: R help <r-help at r-project.org> 
Sent: Wednesday, June 5, 2013 9:44 AM
Subject: Re: dates and time series management

Hi Atem,
No problem.

 which(res==-9999.99)
# [1]   18246  397379  420059  426569  427109  603659  604199  662518  664678
#[10]  698982  699522  700062  701142  754745 1289823 1500490 1589487 1716011
#[19] 1837083
 which(res==-9999.99,arr.ind=TRUE)
#        row col
#1506   1506   2
#12359 12359  24
#1559   1559  26
#8069  
8069  26

#----------------------
res[ which(res==-9999.99,arr.ind=TRUE)]<-NA
#or

res[res==-9999.99]<-NA
 which(res==-9999.99)
#integer(0)
A.K.

________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Wednesday, June 5, 2013 10:56 AM
Subject: Re: dates and time series management

Hi A.K,

It works as expected. You are too smart.
Can you find all -9999.99 and replace with NA, if only it exists?

lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
 lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)}) 

Thanks so much
A.K.

________________________________
From: arun <smartpink111 at yahoo.com>
To: Zilefac Elvis <zilefacelvis at yahoo.com> 
Cc: R help <r-help at r-project.org> 
Sent: Wednesday, June 5, 2013 7:44 AM
Subject: Re: dates and time series management

Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
fun2<- function(lstf){
 lst1<-lapply(lstf,function(x) readLines(x))
 lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
 lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1
\\2",x)})
 lst4<- lapply(lst3,function(x)
read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
 lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
 lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
 lst7<- lapply(lst6,function(x) {

if((min(x$V1)>1961)|(max(x$V1)<2005)){
                         n1<- (min(x$V1)-1961)*12
                         x1<-
as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
                         n2<-
(2005-max(x$V1))*12
                         x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
                         x3<-
rbind(x1,x,x2)
                        }
                          else
{
                    x
                    } })

lst8<-
lapply(lst7,function(x) data.frame(col1=unlist(x[,-c(1:2)])))
     lst9<- lapply(seq_along(lst8),function(i){
                        x<-
lst8[[i]]
                        colnames(x)<- lstf1[i]
                        row.names(x)<-
1:nrow(x)
                        x

})
 do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740   119
res[1:5,1:3]
 # dt3011120.txt dt3011240.txt dt3011887.txt
#1          1.67           
NA          0.17
#2          0.00            NA          0.28
#3         
0.00            NA          0.00
#4          0.00            NA         
0.30
#5          0.00            NA          0.00

########################################

There are some formatting issues in your files:
For eg. If I run the
function line by line:

 lst1<-lapply(lstf1,function(x) readLines(x))
sapply(lst1,function(x) any(grepl("\\d+-9999.99",x)))
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
 [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [37]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
 [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [73] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
 [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
 [97] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
[109]
FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE

###means some rows in the a few files have:
#-9999.99 0 0 0 0.00-9999.99 0 0.00-9999.99 0 0 0 0.00-9999.99 (no space before -9999.99)

 lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1
\\2",x)})
sapply(lst2,function(x) any(grepl("\\d+-9999.99",x))) #still a few files had the problem
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [25] FALSE FALSE FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE
 [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [73] FALSE FALSE FALSE FALSE
FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
 [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
 [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
any(sapply(lst3,function(x) any(grepl("\\d+-9999.99",x))))
#[1] FALSE
lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))

any(sapply(lst4,function(x)
any(sapply(x,is.character))))
#[1] FALSE

 lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
sapply(lst6,nrow)
 # [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540
540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 528 492 528 540 348 540 540 480 540 540 540 540 540 540
# [91] 540 540 540 540 540 540
540 540 540 540 540 540 540 540 528 540 540 540
#[109] 540 540 540 540 540 540 540 540 540 468 540

     lst7<- lapply(lst6,function(x) {

if((min(x$V1)>1961)|(max(x$V1)<2005)){
                         n1<-
(min(x$V1)-1961)*12
                         x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
                         n2<-
(2005-max(x$V1))*12
                         x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
                         x3<-
rbind(x1,x,x2)

}
                          else {
                    x
                    }
})

 sapply(lst7,nrow)
#  [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
540
#[109] 540 540 540 540 540
540 540 540 540 540 540

Hope this helps.
A.K.

________________________________
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Wednesday, June 5, 2013 2:05
AM
Subject: Re: dates and time series management

Hi A.K,
Sorry my internet connection was so bad last evening.

I have attached all the files as .zip.
Below is the output you requested.
As I explained, the start date in 'res' should be 1961 and end date should be 2005 in all 119 files.

Thanks A.K

> lapply(lst1,head,3)
[[1]]
  V1.V2.V3.V4.V5.V6.V7.V8.V9.V10.V11.V12.V13.V14.V15.V16.V17.V18.V19.V20.V21.V22.V23.V24.V25.V26.V27.V28.V29.V30.V31.V32.V33
1  

           1915 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
2                        1915 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
3                    
   1915 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

[