[R-SIG-Finance] efficient extraction of local extrema and zero-crossings in large multivariate zoo?
Gabor Grothendieck
ggrothendieck at gmail.com
Fri Jun 26 14:46:32 CEST 2009
Try this. It runs about 10x faster. (Note that the na.rm = FALSE
argument has been added to the na.approx line in find.extrema
to correspond to your description.)
find.extrema = function(myzoo) {
days = as.POSIXct(levels(as.factor(as.Date(index(myzoo)))))
ret = myzoo
ret[] <- NA
for (tcol in 1:dim(myzoo)[2]) {
for (day in days) {
this <- window(myzoo[,tcol], start=day, end=day+24*60*60-1)
thismax <- this[which.max(this)]
this[this<0] <- 0 ## remove negative values first
thiszero <- this[which.min(this)]
ret[index(ret) == index(thismax), tcol] <- coredata(thismax)
ret[index(ret) == index(thiszero), tcol] <- coredata(thiszero)
### ret[index(thismax), tcol] <- coredata(thismax) ### gives an error
} # end days
} # end tcol
ret = na.approx(ret, na.rm = FALSE) #### added na.rm=
return(ret)
}
hours=1e4 # about 2 year's worth of hours
tmp=(as.POSIXct('2001-01-01')+1:hours*60*60)
tmpz=zoo(cbind(a=sin(as.integer(tmp)/1e4), b=sin(as.integer(tmp)/1.1e4)), tmp)
system.time(tmpextrema <- find.extrema(tmpz))
# 25.06 0.05 25.37
f <- function(x) {
ret <- NA * x
ix <- ave(x, as.Date(time(tmpz)), FUN = function(x) {
logic <- rep(FALSE, length(x))
logic[which.max(x)] <- TRUE
logic
})
ix <- as.logical(ix)
ret[ix] <- x[ix]
a <- x
a[a < 0] <- 0
ix <- ave(a, as.Date(time(tmpz)), FUN = function(x) {
logic <- rep(FALSE, length(x))
logic[which.min(x)] <- TRUE
logic
})
ix <- as.logical(ix)
ret[ix] <- a[ix]
na.approx(ret, na.rm = FALSE)
}
system.time(out <- do.call(cbind, lapply(tmpz, f)))
# 2.39 0.02 2.48
all.equal(tmpextrema, out, check.attributes = FALSE)
# TRUE
> find.extrema = function(myzoo) {
+ days = as.POSIXct(levels(as.factor(as.Date(index(myzoo)))))
+ ret = myzoo
+ ret[] <- NA
+ for (tcol in 1:dim(myzoo)[2]) {
+ for (day in days) {
+ this <- window(myzoo[,tcol], start=day, end=day+24*60*60-1)
+ thismax <- this[which.max(this)]
+ this[this<0] <- 0 ## remove negative values first
+ thiszero <- this[which.min(this)]
+ ret[index(ret) == index(thismax), tcol] <- coredata(thismax)
+ ret[index(ret) == index(thiszero), tcol] <- coredata(thiszero)
+ ### ret[index(thismax), tcol] <- coredata(thismax) ### gives an error
+ } # end days
+ } # end tcol
+ ret = na.approx(ret, na.rm = FALSE)
+ return(ret)
+ }
>
> hours=1e4 # about 2 year's worth of hours
> # hours = 27
> tmp=(as.POSIXct('2001-01-01')+1:hours*60*60)
> tmpz=zoo(cbind(a=sin(as.integer(tmp)/1e4), b=sin(as.integer(tmp)/1.1e4)), tmp)
> system.time(tmpextrema <- find.extrema(tmpz))
user system elapsed
25.06 0.05 25.37
>
> f <- function(x) {
+ ret <- NA * x
+ ix <- ave(x, as.Date(time(tmpz)), FUN = function(x) {
+ logic <- rep(FALSE, length(x))
+ logic[which.max(x)] <- TRUE
+ logic
+ })
+ ix <- as.logical(ix)
+ ret[ix] <- x[ix]
+ a <- x
+ a[a < 0] <- 0
+ ix <- ave(a, as.Date(time(tmpz)), FUN = function(x) {
+ logic <- rep(FALSE, length(x))
+ logic[which.min(x)] <- TRUE
+ logic
+ })
+ ix <- as.logical(ix)
+ ret[ix] <- a[ix]
+ na.approx(ret, na.rm = FALSE)
+ }
> system.time(out <- do.call(cbind, lapply(tmpz, f)))
user system elapsed
2.39 0.02 2.48
>
> all.equal(tmpextrema, out, check.attributes = FALSE)
[1] TRUE
On Thu, Jun 25, 2009 at 6:37 AM, Christian Gunning<icos.atropa at gmail.com> wrote:
> I have an multi-year hour-sampled multivariate zoo from which i'm
> trying to extract index() and coredata() of daily max and
> zero-crossing for each variable and use na.approx() to fill in
> intervening values, so that the resulting zoo has the same dimensions
> of the input zoo. Currently i'm looping over each column and day,
> extracting a day's worth of data with window(), and using which.max()
> to extract the "record". For short zoos it works fine. It doesn't
> scale well, though - for example, 2e4 points takes ~4-6 times longer
> than 1e4 points with 2 variables. Am i missing major bottlenecks or
> vectorization potentials here?
>
> find.extrema = function(myzoo) {
> days = as.POSIXct(levels(as.factor(as.Date(index(myzoo)))))
> ret = myzoo
> ret[] <- NA
> for (tcol in 1:dim(myzoo)[2]) {
> for (day in days) {
> this <- window(myzoo[,tcol], start=day, end=day+24*60*60-1)
> thismax <- this[which.max(this)]
> this[this<0] <- 0 ## remove negative values first
> thiszero <- this[which.min(this)]
> ret[index(ret) == index(thismax), tcol] <- coredata(thismax)
> ret[index(ret) == index(thiszero), tcol] <- coredata(thiszero)
> ### ret[index(thismax), tcol] <- coredata(thismax) ### gives an error
> } # end days
> } # end tcol
> ret = na.approx(ret)
> return(ret)
> }
>
> hours=1e4 # about 2 year's worth of hours
> tmp=(as.POSIXct('2001-01-01')+1:hours*60*60)
> tmpz=zoo(cbind(a=sin(as.integer(tmp)/1e4), b=sin(as.integer(tmp)/1.1e4)), tmp)
> system.time(tmpextrema <- find.extrema(tmpz))
>
> thanks,
> christian gunning
> university of new mexico
>
> _______________________________________________
> R-SIG-Finance at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-sig-finance
> -- Subscriber-posting only.
> -- If you want to post, subscribe first.
>
More information about the R-SIG-Finance
mailing list