[Rd] unsorted - suggestion for performance improvement and ALTREP support for POSIXct

Harvey Smith h@rvey13131 @ending from gm@il@com
Sat Jan 5 05:37:30 CET 2019


I believe the performance of isUnsorted() in sort.c could be improved by
calling REAL() once (outside of the for loop), rather than calling it twice
inside the loop.   As an aside, it is implemented in the faster way in
doSort() (sort.c line 401).  The example below shows the performance
improvement for a vectors of double of moving REAL() outside the for loop.

# example as implemented in isUnsorted
body = "
R_xlen_t n, i;
n = XLENGTH(x);
for(i = 0; i+1 < n ; i++)
  if(REAL(x)[i] > REAL(x)[i+1])
    return ScalarLogical(TRUE);
return ScalarLogical(FALSE);";
f1 = inline::cfunction(sig = signature(x='numeric'), body=body)
# example updated with only one call to REAL()
body = "
R_xlen_t n, i;
n = XLENGTH(x);
double* real_x = REAL(x);
for(i = 0; i+1 < n ; i++)
  if(real_x[i] > real_x[i+1])
    return ScalarLogical(TRUE);
return ScalarLogical(FALSE);";
f2 = inline::cfunction(sig = signature(x='numeric'), body=body)
# unsorted
x.double = as.double(1:1e7) + 0
x.posixct = Sys.time() + x.double
microbenchmark::microbenchmark(
  f1(x.double),
  f2(x.double),  # faster due to one REAL()
  f1(x.posixct),
  f2(x.posixct), # faster due to one REAL()
  unit='ms', times=10)
Unit: milliseconds
          expr       min        lq      mean    median        uq      max
neval
  f1(x.double) 35.737629 37.991785 43.004432 38.575525 39.198533 80.85625
 10
  f2(x.double)  6.053373  6.064323  7.238750  6.092453  8.438550 10.69384
 10
 f1(x.posixct) 36.315705 36.542253 42.349745 38.355395 39.378262 81.59857
 10
 f2(x.posixct)  6.063946  6.070741  7.579176  6.138518  7.063024 13.94141
 10



I would also like to suggest ALTREP support for POSIXct vectors, which are
interpreted as type REAL in the c code, but do not gain the performance
benefits of real vectors.  Sorted vectors of timestamps are important for
joining time series and in calls to findInterval().

# unsorted vectors
x.double = as.double(1:1e7) + 0
x.posixct = Sys.time() + x.double
# sort for altrep benefit
x.double.sort <- sort(x.double)
x.posixct.sort <- sort(x.posixct)
microbenchmark::microbenchmark(
  is.unsorted(x.double),
  is.unsorted(x.double.sort), # faster due to altrep
  is.unsorted(x.posixct),
  is.unsorted(x.posixct.sort), # no altrep benefit
  unit='ms', times=10)
Unit: milliseconds
                        expr       min        lq       mean     median
   uq        max neval
       is.unsorted(x.double) 16.987730 17.010008 17.1577173 17.0862785
17.308674  17.474432    10
  is.unsorted(x.double.sort)  0.000378  0.000756  0.0065327  0.0075525
 0.010195   0.011706    10
      is.unsorted(x.posixct) 36.925876 37.084837 43.4125593 37.4695915
41.858589  78.742174    10
 is.unsorted(x.posixct.sort) 36.966654 37.031975 51.1228686 37.1235380
37.777319 153.270170    10


Since there do not appear to be any tests for is.unsorted() these are some
tests to be added for some types.

# integer sequence
x <- -10L:10L
stopifnot(!is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# integer not strictly
x <- -10L:10L
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# integer with NA
x <- -10L:10L
x[2] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))
# double
x <- seq(from = -10, to = 10, by=0.01)
stopifnot(!is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# double not strictly
x <- seq(from = -10, to = 10, by=0.01)
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# double with NA
x <- seq(from = -10, to = 10, by=0.01)
x[length(x)] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))
# logical
stopifnot(!is.unsorted( c(F, T, T), strictly = F))
stopifnot( is.unsorted( c(F, T, T), strictly = T))
stopifnot( is.unsorted( c(T, T, F), strictly = F))
stopifnot( is.unsorted( c(T, T, F), strictly = T))
# POSIXct
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# POSIXct not strictly
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
x[2] <- x[3]
stopifnot( is.unsorted(x, na.rm = F, strictly = T))
stopifnot(!is.unsorted(x, na.rm = F, strictly = F))
# POSIXct with NA
x <- seq(from=as.POSIXct('2018-1-1'), to=as.POSIXct('2019-1-1'), by='day')
x[length(x)] <- NA
stopifnot(!is.unsorted(x, na.rm = T, strictly = F))
stopifnot(is.na(is.unsorted(x, na.rm = F, strictly = F)))

	[[alternative HTML version deleted]]



More information about the R-devel mailing list