[Rd] unsorted - suggestion for performance improvement and ALTREP support for POSIXct
Harvey Smith
h@rvey13131 @ending from gm@il@com
Sat Jan 5 05:37:30 CET 2019
I believe the performance of isUnsorted() in sort.c could be improved by
calling REAL() once (outside of the for loop), rather than calling it twice
inside the loop. As an aside, it is implemented in the faster way in
doSort() (sort.c line 401). The example below shows the performance
improvement for a vectors of double of moving REAL() outside the for loop.
# example as implemented in isUnsorted
body = "
R_xlen_t n, i;
n = XLENGTH(x);
for(i = 0; i+1 < n ; i++)
if(REAL(x)[i] > REAL(x)[i+1])
return ScalarLogical(TRUE);
return ScalarLogical(FALSE);";
f1 = inline::cfunction(sig = signature(x='numeric'), body=body)
# example updated with only one call to REAL()
body = "
R_xlen_t n, i;
n = XLENGTH(x);
double* real_x = REAL(x);
for(i = 0; i+1 < n ; i++)
if(real_x[i] > real_x[i+1])
return ScalarLogical(TRUE);
return ScalarLogical(FALSE);";
f2 = inline::cfunction(sig = signature(x='numeric'), body=body)
# unsorted
x.double = as.double(1:1e7) + 0
x.posixct = Sys.time() + x.double
microbenchmark::microbenchmark(
f1(x.double),
f2(x.double), # faster due to one REAL()
f1(x.posixct),
f2(x.posixct), # faster due to one REAL()
unit='ms', times=10)
Unit: milliseconds
expr min lq mean median uq max
neval
f1(x.double) 35.737629 37.991785 43.004432 38.575525 39.198533 80.85625
10
f2(x.double) 6.053373 6.064323 7.238750 6.092453 8.438550 10.69384
10
f1(x.posixct) 36.315705 36.542253 42.349745 38.355395 39.378262 81.59857
10
f2(x.posixct) 6.063946 6.070741 7.579176 6.138518 7.063024 13.94141
10
I would also like to suggest ALTREP support for POSIXct vectors, which are
interpreted as type REAL in the c code, but do not gain the performance
benefits of real vectors. Sorted vectors of timestamps are important for
joining time series and in calls to findInterval().
# unsorted vectors
x.double = as.double(1:1e7) + 0
x.posixct = Sys.time() + x.double
# sort for altrep benefit
x.double.sort <- sort(x.double)
x.posixct.sort <- sort(x.posixct)
microbenchmark::microbenchmark(
is.unsorted(x.double),
is.unsorted(x.double.sort), # faster due to altrep
is.unsorted(x.posixct),
is.unsorted(x.posixct.sort), # no altrep benefit
unit='ms', times=10)
Unit: milliseconds
expr min lq mean median
uq max neval
is.unsorted(x.double) 16.987730 17.010008 17.1577173 17.0862785
17.308674 17.474432 10
is.unsorted(x.double.sort) 0.000378 0.000756 0.0065327 0.0075525
0.010195 0.011706 10
is.unsorted(x.posixct) 36.925876 37.084837 43.4125593 37.4695915
41.858589 78.742174 10
is.unsorted(x.posixct.sort) 36.966654 37.031975 51.1228686 37.1235380
37.777319 153.270170 10
Since there do not appear to be any tests for is.unsorted() these are some
tests to be added for some types.
# integer sequence
x <- -10L:10L
stopifnot(!is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# integer not strictly
x <- -10L:10L
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# integer with NA
x <- -10L:10L
x[2] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))
# double
x <- seq(from = -10, to = 10, by=0.01)
stopifnot(!is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# double not strictly
x <- seq(from = -10, to = 10, by=0.01)
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# double with NA
x <- seq(from = -10, to = 10, by=0.01)
x[length(x)] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))
# logical
stopifnot(!is.unsorted( c(F, T, T), strictly = F))
stopifnot( is.unsorted( c(F, T, T), strictly = T))
stopifnot( is.unsorted( c(T, T, F), strictly = F))
stopifnot( is.unsorted( c(T, T, F), strictly = T))
# POSIXct
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# POSIXct not strictly
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# POSIXct with NA
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
x[length(x)] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))
[[alternative HTML version deleted]]
More information about the R-devel
mailing list