Last updated on 2025-12-31 03:51:37 CET.
| Package | ERROR | NOTE | OK |
|---|---|---|---|
| glow | 2 | 11 | |
| qs | 8 | 5 | |
| qs2 | 8 | 5 | |
| seqtrie | 3 | 10 | |
| stringfish | 8 | 5 |
Current CRAN status: NOTE: 2, OK: 11
Version: 0.13.0
Check: installed package size
Result: NOTE
installed size is 11.5Mb
sub-directories of 1Mb or more:
doc 1.3Mb
libs 9.9Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Current CRAN status: ERROR: 8, NOTE: 5
Version: 0.27.3
Check: compiled code
Result: WARN
File ‘qs/libs/qs.so’:
Found non-API calls to R: ‘ATTRIB’, ‘CLOENV’, ‘ENCLOS’, ‘FRAME’,
‘HASHTAB’, ‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’,
‘Rf_allocSExp’, ‘SETLEVELS’, ‘SET_ATTRIB’, ‘SET_CLOENV’,
‘SET_ENCLOS’, ‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_OBJECT’,
‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’
These entry points may be removed soon:
‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_ENCLOS’, ‘SET_S4_OBJECT’, ‘FRAME’, ‘HASHTAB’, ‘IS_S4_OBJECT’, ‘CLOENV’, ‘ENCLOS’, ‘OBJECT’, ‘SET_CLOENV’, ‘LEVELS’, ‘SETLEVELS’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [181s/204s]
Running ‘qattributes_testing.R’ [41s/50s]
Running ‘qsavemload_testing.R’ [2s/3s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01868 s
strings: 1, 0.0017 s
strings: 2, 0.001171 s
strings: 4, 0.00117 s
strings: 8, 0.006366 s
strings: 31, 0.005348 s
strings: 33, 0.0075 s
strings: 32, 0.02449 s
strings: 255, 0.004352 s
strings: 257, 0.0132 s
strings: 256, 0.01869 s
strings: 65535, 0.001276 s
strings: 65537, 0.005886 s
strings: 65536, 0.001058 s
strings: 1e+06, 0.01041 s
Character Vectors: 0, 0.02795 s
Character Vectors: 1, 0.001427 s
Character Vectors: 2, 0.001441 s
Character Vectors: 4, 0.0001808 s
Character Vectors: 8, 0.004243 s
Character Vectors: 31, 0.00513 s
Character Vectors: 33, 0.000136 s
Character Vectors: 32, 0.002042 s
Character Vectors: 255, 0.001428 s
Character Vectors: 257, 0.008457 s
Character Vectors: 256, 0.007243 s
Character Vectors: 65535, 0.004303 s
Character Vectors: 65537, 0.007849 s
Character Vectors: 65536, 0.006248 s
Stringfish: 0, 0.005284 s
Stringfish: 1, 0.003982 s
Stringfish: 2, 0.002518 s
Stringfish: 4, 0.006115 s
Stringfish: 8, 0.003498 s
Stringfish: 31, 0.000124 s
Stringfish: 33, 0.001588 s
Stringfish: 32, 0.0008395 s
Stringfish: 255, 0.002192 s
Stringfish: 257, 0.001387 s
Stringfish: 256, 0.004096 s
Stringfish: 65535, 0.003763 s
Stringfish: 65537, 0.003113 s
Stringfish: 65536, 0.007076 s
Integers: 0, 0.01274 s
Integers: 1, 0.01033 s
Integers: 2, 0.002556 s
Integers: 4, 0.005176 s
Integers: 8, 0.01934 s
Integers: 31, 0.004113 s
Integers: 33, 0.01176 s
Integers: 32, 0.01143 s
Integers: 255, 0.006557 s
Integers: 257, 0.002034 s
Integers: 256, 0.00629 s
Integers: 65535, 0.01359 s
Integers: 65537, 0.02571 s
Integers: 65536, 0.0062 s
Integers: 1e+06, 0.07506 s
Numeric: 0, 0.01111 s
Numeric: 1, 0.01003 s
Numeric: 2, 0.01328 s
Numeric: 4, 0.002917 s
Numeric: 8, 0.006902 s
Numeric: 31, 0.003796 s
Numeric: 33, 0.005308 s
Numeric: 32, 0.005921 s
Numeric: 255, 0.002519 s
Numeric: 257, 0.0008633 s
Numeric: 256, 0.004995 s
Numeric: 65535, 0.02918 s
Numeric: 65537, 0.02839 s
Numeric: 65536, 0.01186 s
Numeric: 1e+06, 0.04033 s
Logical: 0, 0.09387 s
Logical: 1, 0.04951 s
Logical: 2, 0.003968 s
Logical: 4, 0.005989 s
Logical: 8, 0.01335 s
Logical: 31, 0.01522 s
Logical: 33, 0.002097 s
Logical: 32, 0.009212 s
Logical: 255, 0.0005702 s
Logical: 257, 0.01556 s
Logical: 256, 0.01186 s
Logical: 65535, 0.06172 s
Logical: 65537, 0.0433 s
Logical: 65536, 0.02476 s
Logical: 1e+06, 0.4037 s
List: 0, 0.0006732 s
List: 1, 0.01061 s
List: 2, 0.0008037 s
List: 4, 0.005811 s
List: 8, 0.004332 s
List: 31, 0.005385 s
List: 33, 0.004966 s
List: 32, 0.0112 s
List: 255, 0.003836 s
List: 257, 0.008684 s
List: 256, 0.001527 s
List: 65535, 0.03311 s
List: 65537, 0.06998 s
List: 65536, 0.0393 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-debian-clang
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [159s/157s]
Running ‘qattributes_testing.R’ [36s/44s]
Running ‘qsavemload_testing.R’ [1s/2s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01306 s
strings: 1, 0.007585 s
strings: 2, 0.001635 s
strings: 4, 0.002374 s
strings: 8, 0.005748 s
strings: 31, 0.004363 s
strings: 33, 0.01024 s
strings: 32, 0.0005736 s
strings: 255, 0.002707 s
strings: 257, 0.001241 s
strings: 256, 0.004923 s
strings: 65535, 0.003417 s
strings: 65537, 0.02457 s
strings: 65536, 0.0006835 s
strings: 1e+06, 0.01349 s
Character Vectors: 0, 0.003744 s
Character Vectors: 1, 0.004089 s
Character Vectors: 2, 6.501e-05 s
Character Vectors: 4, 0.001603 s
Character Vectors: 8, 0.004014 s
Character Vectors: 31, 0.002779 s
Character Vectors: 33, 8.965e-05 s
Character Vectors: 32, 0.004317 s
Character Vectors: 255, 0.006688 s
Character Vectors: 257, 0.03285 s
Character Vectors: 256, 0.004786 s
Character Vectors: 65535, 0.00778 s
Character Vectors: 65537, 0.005225 s
Character Vectors: 65536, 0.006758 s
Stringfish: 0, 0.001373 s
Stringfish: 1, 0.0002126 s
Stringfish: 2, 0.002523 s
Stringfish: 4, 0.00404 s
Stringfish: 8, 0.005134 s
Stringfish: 31, 0.002422 s
Stringfish: 33, 0.005888 s
Stringfish: 32, 0.0001361 s
Stringfish: 255, 0.006665 s
Stringfish: 257, 0.001773 s
Stringfish: 256, 0.002375 s
Stringfish: 65535, 0.007579 s
Stringfish: 65537, 0.004106 s
Stringfish: 65536, 0.004373 s
Integers: 0, 0.02502 s
Integers: 1, 0.0133 s
Integers: 2, 0.0003766 s
Integers: 4, 0.003473 s
Integers: 8, 0.002197 s
Integers: 31, 0.001887 s
Integers: 33, 0.0007488 s
Integers: 32, 0.003082 s
Integers: 255, 0.007397 s
Integers: 257, 0.0003664 s
Integers: 256, 0.01066 s
Integers: 65535, 0.002766 s
Integers: 65537, 0.007342 s
Integers: 65536, 0.003524 s
Integers: 1e+06, 0.042 s
Numeric: 0, 0.008231 s
Numeric: 1, 0.002504 s
Numeric: 2, 0.004801 s
Numeric: 4, 0.00752 s
Numeric: 8, 0.00968 s
Numeric: 31, 0.001829 s
Numeric: 33, 0.003615 s
Numeric: 32, 0.003279 s
Numeric: 255, 0.007881 s
Numeric: 257, 0.007017 s
Numeric: 256, 0.01477 s
Numeric: 65535, 0.002993 s
Numeric: 65537, 0.00646 s
Numeric: 65536, 0.02704 s
Numeric: 1e+06, 0.03425 s
Logical: 0, 0.007073 s
Logical: 1, 0.005746 s
Logical: 2, 0.001814 s
Logical: 4, 0.002752 s
Logical: 8, 0.002902 s
Logical: 31, 0.006459 s
Logical: 33, 0.004525 s
Logical: 32, 0.006494 s
Logical: 255, 0.001531 s
Logical: 257, 0.004447 s
Logical: 256, 0.01314 s
Logical: 65535, 0.01083 s
Logical: 65537, 0.01331 s
Logical: 65536, 0.007325 s
Logical: 1e+06, 0.06553 s
List: 0, 0.006481 s
List: 1, 0.005225 s
List: 2, 0.006564 s
List: 4, 0.009366 s
List: 8, 0.004268 s
List: 31, 0.00449 s
List: 33, 0.004781 s
List: 32, 0.002663 s
List: 255, 0.007026 s
List: 257, 0.01423 s
List: 256, 0.01898 s
List: 65535, 0.01547 s
List: 65537, 0.01648 s
List: 65536, 0.0212 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-debian-gcc
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [265s/354s]
Running ‘qattributes_testing.R’ [57s/73s]
Running ‘qsavemload_testing.R’
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.03002 s
strings: 1, 0.009603 s
strings: 2, 0.007507 s
strings: 4, 0.001152 s
strings: 8, 0.007605 s
strings: 31, 0.01061 s
strings: 33, 0.001795 s
strings: 32, 0.005755 s
strings: 255, 0.005023 s
strings: 257, 0.008942 s
strings: 256, 0.005154 s
strings: 65535, 0.01246 s
strings: 65537, 0.004623 s
strings: 65536, 0.01116 s
strings: 1e+06, 0.0111 s
Character Vectors: 0, 0.0008359 s
Character Vectors: 1, 0.003309 s
Character Vectors: 2, 0.002368 s
Character Vectors: 4, 0.003778 s
Character Vectors: 8, 0.001159 s
Character Vectors: 31, 0.0004169 s
Character Vectors: 33, 0.003439 s
Character Vectors: 32, 0.0002722 s
Character Vectors: 255, 0.002782 s
Character Vectors: 257, 0.001211 s
Character Vectors: 256, 0.005099 s
Character Vectors: 65535, 0.005992 s
Character Vectors: 65537, 0.0077 s
Character Vectors: 65536, 0.006215 s
Stringfish: 0, 0.001074 s
Stringfish: 1, 0.007044 s
Stringfish: 2, 0.0001624 s
Stringfish: 4, 0.0001747 s
Stringfish: 8, 0.002396 s
Stringfish: 31, 0.003171 s
Stringfish: 33, 0.003076 s
Stringfish: 32, 0.0004567 s
Stringfish: 255, 0.002023 s
Stringfish: 257, 0.0008605 s
Stringfish: 256, 0.003166 s
Stringfish: 65535, 0.002653 s
Stringfish: 65537, 0.005199 s
Stringfish: 65536, 0.004957 s
Integers: 0, 0.006887 s
Integers: 1, 0.001464 s
Integers: 2, 0.003595 s
Integers: 4, 0.0007495 s
Integers: 8, 0.007794 s
Integers: 31, 0.01693 s
Integers: 33, 0.00375 s
Integers: 32, 0.01152 s
Integers: 255, 0.00675 s
Integers: 257, 0.00436 s
Integers: 256, 0.005943 s
Integers: 65535, 0.01373 s
Integers: 65537, 0.003816 s
Integers: 65536, 0.00916 s
Integers: 1e+06, 0.05327 s
Numeric: 0, 0.001094 s
Numeric: 1, 0.005408 s
Numeric: 2, 0.006115 s
Numeric: 4, 0.0007989 s
Numeric: 8, 0.003113 s
Numeric: 31, 0.005278 s
Numeric: 33, 0.00801 s
Numeric: 32, 0.001482 s
Numeric: 255, 0.00753 s
Numeric: 257, 0.006523 s
Numeric: 256, 0.006103 s
Numeric: 65535, 0.007591 s
Numeric: 65537, 0.01428 s
Numeric: 65536, 0.01321 s
Numeric: 1e+06, 0.2131 s
Logical: 0, 0.01131 s
Logical: 1, 0.005397 s
Logical: 2, 0.004055 s
Logical: 4, 0.002563 s
Logical: 8, 0.00691 s
Logical: 31, 0.0047 s
Logical: 33, 0.007184 s
Logical: 32, 0.002822 s
Logical: 255, 0.00255 s
Logical: 257, 0.002524 s
Logical: 256, 0.005227 s
Logical: 65535, 0.01163 s
Logical: 65537, 0.01284 s
Logical: 65536, 0.003144 s
Logical: 1e+06, 0.02753 s
List: 0, 0.002595 s
List: 1, 0.006332 s
List: 2, 0.002796 s
List: 4, 0.007051 s
List: 8, 0.005828 s
List: 31, 0.005061 s
List: 33, 0.003982 s
List: 32, 0.00236 s
List: 255, 0.003144 s
List: 257, 0.02368 s
List: 256, 0.005819 s
List: 65535, 0.04752 s
List: 65537, 0.02165 s
List: 65536, 0.03846 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-fedora-clang
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [259s/264s]
Running ‘qattributes_testing.R’ [50s/49s]
Running ‘qsavemload_testing.R’
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01877 s
strings: 1, 0.00233 s
strings: 2, 0.001731 s
strings: 4, 0.001663 s
strings: 8, 0.002074 s
strings: 31, 0.002256 s
strings: 33, 0.001094 s
strings: 32, 0.0069 s
strings: 255, 0.001523 s
strings: 257, 0.006322 s
strings: 256, 0.002874 s
strings: 65535, 0.002745 s
strings: 65537, 0.002542 s
strings: 65536, 0.002555 s
strings: 1e+06, 0.005651 s
Character Vectors: 0, 0.0007025 s
Character Vectors: 1, 0.0005259 s
Character Vectors: 2, 0.0007988 s
Character Vectors: 4, 0.0005541 s
Character Vectors: 8, 0.00188 s
Character Vectors: 31, 0.00111 s
Character Vectors: 33, 0.0007223 s
Character Vectors: 32, 0.0002493 s
Character Vectors: 255, 0.0001628 s
Character Vectors: 257, 0.0004144 s
Character Vectors: 256, 0.0001899 s
Character Vectors: 65535, 0.004162 s
Character Vectors: 65537, 0.006137 s
Character Vectors: 65536, 0.005242 s
Stringfish: 0, 0.0005091 s
Stringfish: 1, 0.0004363 s
Stringfish: 2, 0.001141 s
Stringfish: 4, 0.0003858 s
Stringfish: 8, 0.0008759 s
Stringfish: 31, 0.0002047 s
Stringfish: 33, 0.001546 s
Stringfish: 32, 0.0007424 s
Stringfish: 255, 0.0004779 s
Stringfish: 257, 0.0005674 s
Stringfish: 256, 0.0005625 s
Stringfish: 65535, 0.004842 s
Stringfish: 65537, 0.004768 s
Stringfish: 65536, 0.003957 s
Integers: 0, 0.007171 s
Integers: 1, 0.003663 s
Integers: 2, 0.002543 s
Integers: 4, 0.003133 s
Integers: 8, 0.002388 s
Integers: 31, 0.001636 s
Integers: 33, 0.001823 s
Integers: 32, 0.0008195 s
Integers: 255, 0.002802 s
Integers: 257, 0.002472 s
Integers: 256, 0.0007814 s
Integers: 65535, 0.005651 s
Integers: 65537, 0.01358 s
Integers: 65536, 0.004114 s
Integers: 1e+06, 0.1235 s
Numeric: 0, 0.00308 s
Numeric: 1, 0.002168 s
Numeric: 2, 0.001306 s
Numeric: 4, 0.002583 s
Numeric: 8, 0.001827 s
Numeric: 31, 0.001171 s
Numeric: 33, 0.002374 s
Numeric: 32, 0.001065 s
Numeric: 255, 0.002766 s
Numeric: 257, 0.002006 s
Numeric: 256, 0.00285 s
Numeric: 65535, 0.008034 s
Numeric: 65537, 0.01131 s
Numeric: 65536, 0.02376 s
Numeric: 1e+06, 0.1543 s
Logical: 0, 0.00469 s
Logical: 1, 0.002046 s
Logical: 2, 0.0009995 s
Logical: 4, 0.003164 s
Logical: 8, 0.001053 s
Logical: 31, 0.003974 s
Logical: 33, 0.003265 s
Logical: 32, 0.001825 s
Logical: 255, 0.002292 s
Logical: 257, 0.001415 s
Logical: 256, 0.003399 s
Logical: 65535, 0.004209 s
Logical: 65537, 0.005805 s
Logical: 65536, 0.01047 s
Logical: 1e+06, 0.09702 s
List: 0, 0.00271 s
List: 1, 0.01169 s
List: 2, 0.00126 s
List: 4, 0.004836 s
List: 8, 0.001077 s
List: 31, 0.002509 s
List: 33, 0.002096 s
List: 32, 0.00148 s
List: 255, 0.0008624 s
List: 257, 0.001487 s
List: 256, 0.001679 s
List: 65535, 0.03178 s
List: 65537, 0.02155 s
List: 65536, 0.02548 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-fedora-gcc
Version: 0.27.3
Check: compiled code
Result: WARN
File 'qs/libs/x64/qs.dll':
Found non-API calls to R: 'ATTRIB', 'CLOENV', 'ENCLOS', 'FRAME',
'HASHTAB', 'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV',
'Rf_allocSExp', 'SETLEVELS', 'SET_ATTRIB', 'SET_CLOENV',
'SET_ENCLOS', 'SET_FRAME', 'SET_HASHTAB', 'SET_OBJECT',
'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH'
These entry points may be removed soon:
'SET_FRAME', 'SET_HASHTAB', 'SET_ENCLOS', 'SET_S4_OBJECT', 'FRAME', 'HASHTAB', 'IS_S4_OBJECT', 'CLOENV', 'ENCLOS', 'OBJECT', 'SET_CLOENV', 'LEVELS', 'SETLEVELS'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [153s]
Running 'qattributes_testing.R' [36s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01315 s
strings: 1, 0.003996 s
strings: 2, 0.002995 s
strings: 4, 0.00208 s
strings: 8, 0.008843 s
strings: 31, 0.002315 s
strings: 33, 0.00242 s
strings: 32, 0.001685 s
strings: 255, 0.0008064 s
strings: 257, 0.01091 s
strings: 256, 0.004255 s
strings: 65535, 0.001755 s
strings: 65537, 0.00199 s
strings: 65536, 0.006007 s
strings: 1e+06, 0.003587 s
Character Vectors: 0, 0.0007226 s
Character Vectors: 1, 0.002256 s
Character Vectors: 2, 0.00138 s
Character Vectors: 4, 0.0005542 s
Character Vectors: 8, 0.001098 s
Character Vectors: 31, 0.001046 s
Character Vectors: 33, 0.0009681 s
Character Vectors: 32, 0.002892 s
Character Vectors: 255, 0.0004367 s
Character Vectors: 257, 0.000919 s
Character Vectors: 256, 0.0005956 s
Character Vectors: 65535, 0.004472 s
Character Vectors: 65537, 0.005317 s
Character Vectors: 65536, 0.004643 s
Stringfish: 0, 0.00244 s
Stringfish: 1, 0.00133 s
Stringfish: 2, 0.00123 s
Stringfish: 4, 0.002335 s
Stringfish: 8, 0.0006501 s
Stringfish: 31, 0.0004984 s
Stringfish: 33, 0.001602 s
Stringfish: 32, 0.0002011 s
Stringfish: 255, 0.0002353 s
Stringfish: 257, 0.001376 s
Stringfish: 256, 0.0001796 s
Stringfish: 65535, 0.003084 s
Stringfish: 65537, 0.002844 s
Stringfish: 65536, 0.003673 s
Integers: 0, 0.003723 s
Integers: 1, 0.006372 s
Integers: 2, 0.01095 s
Integers: 4, 0.007666 s
Integers: 8, 0.002531 s
Integers: 31, 0.0006533 s
Integers: 33, 0.003714 s
Integers: 32, 0.005466 s
Integers: 255, 0.002904 s
Integers: 257, 0.002647 s
Integers: 256, 0.00366 s
Integers: 65535, 0.005186 s
Integers: 65537, 0.009242 s
Integers: 65536, 0.003951 s
Integers: 1e+06, 0.1007 s
Numeric: 0, 0.003897 s
Numeric: 1, 0.001703 s
Numeric: 2, 0.003185 s
Numeric: 4, 0.004673 s
Numeric: 8, 0.005562 s
Numeric: 31, 0.00271 s
Numeric: 33, 0.008532 s
Numeric: 32, 0.006161 s
Numeric: 255, 0.00453 s
Numeric: 257, 0.004287 s
Numeric: 256, 0.002045 s
Numeric: 65535, 0.01529 s
Numeric: 65537, 0.01737 s
Numeric: 65536, 0.01106 s
Numeric: 1e+06, 0.1876 s
Logical: 0, 0.005957 s
Logical: 1, 0.00139 s
Logical: 2, 0.003599 s
Logical: 4, 0.002475 s
Logical: 8, 0.003119 s
Logical: 31, 0.002288 s
Logical: 33, 0.002086 s
Logical: 32, 0.002466 s
Logical: 255, 0.002221 s
Logical: 257, 0.002084 s
Logical: 256, 0.001855 s
Logical: 65535, 0.008848 s
Logical: 65537, 0.007393 s
Logical: 65536, 0.00984 s
Logical: 1e+06, 0.07634 s
List: 0, 0.003207 s
List: 1, 0.002978 s
List: 2, 0.004362 s
List: 4, 0.00573 s
List: 8, 0.008202 s
List: 31, 0.004249 s
List: 33, 0.0038 s
List: 32, 0.005193 s
List: 255, 0.001513 s
List: 257, 0.001666 s
List: 256, 0.00442 s
List: 65535, 0.02498 s
List: 65537, 0.01204 s
List: 65536, 0.03116 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-windows-x86_64
Version: 0.27.3
Check: compiled code
Result: NOTE
File ‘qs/libs/qs.so’:
Found non-API calls to R: ‘CLOENV’, ‘ENCLOS’, ‘FRAME’, ‘HASHTAB’,
‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’, ‘Rf_allocSExp’,
‘SETLEVELS’, ‘SET_CLOENV’, ‘SET_ENCLOS’, ‘SET_FRAME’,
‘SET_HASHTAB’, ‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-patched-linux-x86_64, r-release-linux-x86_64, r-release-macos-arm64, r-release-macos-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [189s/194s]
Running ‘qattributes_testing.R’ [39s/44s]
Running ‘qsavemload_testing.R’ [2s/2s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.009266 s
strings: 1, 0.002223 s
strings: 2, 0.007671 s
strings: 4, 0.00265 s
strings: 8, 0.007241 s
strings: 31, 0.005385 s
strings: 33, 0.004859 s
strings: 32, 0.006556 s
strings: 255, 0.009068 s
strings: 257, 0.002912 s
strings: 256, 0.006309 s
strings: 65535, 0.004612 s
strings: 65537, 0.004343 s
strings: 65536, 0.005225 s
strings: 1e+06, 0.005279 s
Character Vectors: 0, 0.0007679 s
Character Vectors: 1, 0.005265 s
Character Vectors: 2, 0.001912 s
Character Vectors: 4, 0.005302 s
Character Vectors: 8, 0.002699 s
Character Vectors: 31, 0.002589 s
Character Vectors: 33, 0.002123 s
Character Vectors: 32, 0.00557 s
Character Vectors: 255, 0.002804 s
Character Vectors: 257, 0.0004973 s
Character Vectors: 256, 0.004507 s
Character Vectors: 65535, 0.003491 s
Character Vectors: 65537, 0.003233 s
Character Vectors: 65536, 0.006017 s
Stringfish: 0, 0.002958 s
Stringfish: 1, 0.002406 s
Stringfish: 2, 0.002227 s
Stringfish: 4, 0.002035 s
Stringfish: 8, 0.001661 s
Stringfish: 31, 0.0002929 s
Stringfish: 33, 0.004649 s
Stringfish: 32, 0.0006292 s
Stringfish: 255, 0.0002771 s
Stringfish: 257, 0.001251 s
Stringfish: 256, 0.002665 s
Stringfish: 65535, 0.003706 s
Stringfish: 65537, 0.002324 s
Stringfish: 65536, 0.006042 s
Integers: 0, 0.02357 s
Integers: 1, 0.01597 s
Integers: 2, 0.004495 s
Integers: 4, 0.004973 s
Integers: 8, 0.004492 s
Integers: 31, 0.008321 s
Integers: 33, 0.002936 s
Integers: 32, 0.01706 s
Integers: 255, 0.003017 s
Integers: 257, 0.001349 s
Integers: 256, 0.003095 s
Integers: 65535, 0.01793 s
Integers: 65537, 0.014 s
Integers: 65536, 0.007182 s
Integers: 1e+06, 0.04113 s
Numeric: 0, 0.01579 s
Numeric: 1, 0.0007196 s
Numeric: 2, 0.002466 s
Numeric: 4, 0.006141 s
Numeric: 8, 0.002724 s
Numeric: 31, 0.007196 s
Numeric: 33, 0.003422 s
Numeric: 32, 0.002835 s
Numeric: 255, 0.001236 s
Numeric: 257, 0.006217 s
Numeric: 256, 0.005832 s
Numeric: 65535, 0.00266 s
Numeric: 65537, 0.02158 s
Numeric: 65536, 0.01703 s
Numeric: 1e+06, 0.07503 s
Logical: 0, 0.006371 s
Logical: 1, 0.03009 s
Logical: 2, 0.01412 s
Logical: 4, 0.001675 s
Logical: 8, 0.003439 s
Logical: 31, 0.008325 s
Logical: 33, 0.05828 s
Logical: 32, 0.04441 s
Logical: 255, 0.03292 s
Logical: 257, 0.008382 s
Logical: 256, 0.007674 s
Logical: 65535, 0.007509 s
Logical: 65537, 0.0198 s
Logical: 65536, 0.007301 s
Logical: 1e+06, 0.01145 s
List: 0, 0.006958 s
List: 1, 0.0006555 s
List: 2, 0.01578 s
List: 4, 0.0007408 s
List: 8, 0.02397 s
List: 31, 0.0005113 s
List: 33, 0.008809 s
List: 32, 0.0007137 s
List: 255, 0.003401 s
List: 257, 0.008428 s
List: 256, 0.0007162 s
List: 65535, 0.03993 s
List: 65537, 0.02159 s
List: 65536, 0.03678 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-release-linux-x86_64
Version: 0.27.3
Check: compiled code
Result: NOTE
File 'qs/libs/x64/qs.dll':
Found non-API calls to R: 'CLOENV', 'ENCLOS', 'FRAME', 'HASHTAB',
'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV', 'Rf_allocSExp',
'SETLEVELS', 'SET_CLOENV', 'SET_ENCLOS', 'SET_FRAME',
'SET_HASHTAB', 'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-release-windows-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [154s]
Running 'qattributes_testing.R' [34s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.0106 s
strings: 1, 0.008283 s
strings: 2, 0.004269 s
strings: 4, 0.005753 s
strings: 8, 0.005574 s
strings: 31, 0.002247 s
strings: 33, 0.009361 s
strings: 32, 0.003393 s
strings: 255, 0.001864 s
strings: 257, 0.001429 s
strings: 256, 0.00136 s
strings: 65535, 0.002476 s
strings: 65537, 0.001956 s
strings: 65536, 0.007701 s
strings: 1e+06, 0.006205 s
Character Vectors: 0, 0.00142 s
Character Vectors: 1, 0.0005451 s
Character Vectors: 2, 0.0007296 s
Character Vectors: 4, 0.0005297 s
Character Vectors: 8, 0.0007843 s
Character Vectors: 31, 0.0009487 s
Character Vectors: 33, 0.0007296 s
Character Vectors: 32, 0.0008783 s
Character Vectors: 255, 0.0005128 s
Character Vectors: 257, 0.000485 s
Character Vectors: 256, 0.001994 s
Character Vectors: 65535, 0.001983 s
Character Vectors: 65537, 0.002551 s
Character Vectors: 65536, 0.002857 s
Stringfish: 0, 0.001294 s
Stringfish: 1, 0.0008726 s
Stringfish: 2, 0.002131 s
Stringfish: 4, 0.0001967 s
Stringfish: 8, 0.001501 s
Stringfish: 31, 0.001254 s
Stringfish: 33, 0.0003486 s
Stringfish: 32, 0.0005236 s
Stringfish: 255, 0.0005213 s
Stringfish: 257, 0.001738 s
Stringfish: 256, 0.0006554 s
Stringfish: 65535, 0.002706 s
Stringfish: 65537, 0.00316 s
Stringfish: 65536, 0.003185 s
Integers: 0, 0.003843 s
Integers: 1, 0.002611 s
Integers: 2, 0.001732 s
Integers: 4, 0.003502 s
Integers: 8, 0.001636 s
Integers: 31, 0.002827 s
Integers: 33, 0.002001 s
Integers: 32, 0.0009127 s
Integers: 255, 0.00718 s
Integers: 257, 0.0009964 s
Integers: 256, 0.00296 s
Integers: 65535, 0.02443 s
Integers: 65537, 0.002784 s
Integers: 65536, 0.00515 s
Integers: 1e+06, 0.0408 s
Numeric: 0, 0.006081 s
Numeric: 1, 0.003539 s
Numeric: 2, 0.0009766 s
Numeric: 4, 0.003765 s
Numeric: 8, 0.002266 s
Numeric: 31, 0.002184 s
Numeric: 33, 0.002085 s
Numeric: 32, 0.001454 s
Numeric: 255, 0.002458 s
Numeric: 257, 0.002992 s
Numeric: 256, 0.002185 s
Numeric: 65535, 0.003392 s
Numeric: 65537, 0.01443 s
Numeric: 65536, 0.02439 s
Numeric: 1e+06, 0.02478 s
Logical: 0, 0.001316 s
Logical: 1, 0.002531 s
Logical: 2, 0.001236 s
Logical: 4, 0.004502 s
Logical: 8, 0.002527 s
Logical: 31, 0.002143 s
Logical: 33, 0.002053 s
Logical: 32, 0.001166 s
Logical: 255, 0.0008576 s
Logical: 257, 0.002974 s
Logical: 256, 0.004734 s
Logical: 65535, 0.003047 s
Logical: 65537, 0.01405 s
Logical: 65536, 0.01174 s
Logical: 1e+06, 0.08285 s
List: 0, 0.001613 s
List: 1, 0.003009 s
List: 2, 0.002302 s
List: 4, 0.002401 s
List: 8, 0.002243 s
List: 31, 0.0009689 s
List: 33, 0.004668 s
List: 32, 0.001657 s
List: 255, 0.002199 s
List: 257, 0.001996 s
List: 256, 0.002073 s
List: 65535, 0.02277 s
List: 65537, 0.03213 s
List: 65536, 0.01141 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-release-windows-x86_64
Version: 0.27.3
Check: installed package size
Result: NOTE
installed size is 9.2Mb
sub-directories of 1Mb or more:
doc 1.1Mb
libs 7.8Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [205s]
Running 'qattributes_testing.R' [43s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.008537 s
strings: 1, 0.001649 s
strings: 2, 0.006568 s
strings: 4, 0.00267 s
strings: 8, 0.005243 s
strings: 31, 0.002143 s
strings: 33, 0.01419 s
strings: 32, 0.002626 s
strings: 255, 0.003956 s
strings: 257, 0.00209 s
strings: 256, 0.002396 s
strings: 65535, 0.005157 s
strings: 65537, 0.002933 s
strings: 65536, 0.003362 s
strings: 1e+06, 0.009924 s
Character Vectors: 0, 0.001141 s
Character Vectors: 1, 0.002306 s
Character Vectors: 2, 0.0005837 s
Character Vectors: 4, 0.0006274 s
Character Vectors: 8, 0.00127 s
Character Vectors: 31, 0.0004357 s
Character Vectors: 33, 0.001764 s
Character Vectors: 32, 0.000776 s
Character Vectors: 255, 0.0007923 s
Character Vectors: 257, 0.000905 s
Character Vectors: 256, 0.0002642 s
Character Vectors: 65535, 0.004226 s
Character Vectors: 65537, 0.003935 s
Character Vectors: 65536, 0.004124 s
Stringfish: 0, 0.001223 s
Stringfish: 1, 0.001766 s
Stringfish: 2, 0.0009613 s
Stringfish: 4, 0.0002616 s
Stringfish: 8, 0.00163 s
Stringfish: 31, 0.001347 s
Stringfish: 33, 0.000353 s
Stringfish: 32, 0.002303 s
Stringfish: 255, 0.0003321 s
Stringfish: 257, 0.00213 s
Stringfish: 256, 0.001259 s
Stringfish: 65535, 0.003172 s
Stringfish: 65537, 0.004254 s
Stringfish: 65536, 0.004923 s
Integers: 0, 0.00541 s
Integers: 1, 0.01513 s
Integers: 2, 0.007418 s
Integers: 4, 0.001537 s
Integers: 8, 0.000792 s
Integers: 31, 0.001241 s
Integers: 33, 0.003528 s
Integers: 32, 0.003048 s
Integers: 255, 0.009212 s
Integers: 257, 0.001898 s
Integers: 256, 0.003342 s
Integers: 65535, 0.01337 s
Integers: 65537, 0.01073 s
Integers: 65536, 0.005286 s
Integers: 1e+06, 0.06844 s
Numeric: 0, 0.003362 s
Numeric: 1, 0.0122 s
Numeric: 2, 0.004826 s
Numeric: 4, 0.0109 s
Numeric: 8, 0.002211 s
Numeric: 31, 0.007922 s
Numeric: 33, 0.003749 s
Numeric: 32, 0.001787 s
Numeric: 255, 0.002158 s
Numeric: 257, 0.001655 s
Numeric: 256, 0.002478 s
Numeric: 65535, 0.02699 s
Numeric: 65537, 0.00238 s
Numeric: 65536, 0.007882 s
Numeric: 1e+06, 0.1271 s
Logical: 0, 0.002299 s
Logical: 1, 0.002085 s
Logical: 2, 0.003672 s
Logical: 4, 0.003025 s
Logical: 8, 0.003731 s
Logical: 31, 0.002957 s
Logical: 33, 0.00458 s
Logical: 32, 0.001505 s
Logical: 255, 0.004793 s
Logical: 257, 0.00379 s
Logical: 256, 0.005469 s
Logical: 65535, 0.02144 s
Logical: 65537, 0.006062 s
Logical: 65536, 0.004985 s
Logical: 1e+06, 0.0317 s
List: 0, 0.006927 s
List: 1, 0.003968 s
List: 2, 0.001787 s
List: 4, 0.001803 s
List: 8, 0.01042 s
List: 31, 0.002367 s
List: 33, 0.00245 s
List: 32, 0.001894 s
List: 255, 0.002773 s
List: 257, 0.004207 s
List: 256, 0.00264 s
List: 65535, 0.0329 s
List: 65537, 0.02765 s
List: 65536, 0.05235 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-oldrel-windows-x86_64
Current CRAN status: NOTE: 8, OK: 5
Version: 0.1.6
Check: compiled code
Result: NOTE
File ‘qs2/libs/qs2.so’:
Found non-API calls to R: ‘ATTRIB’, ‘SET_ATTRIB’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.1.6
Check: compiled code
Result: NOTE
File 'qs2/libs/x64/qs2.dll':
Found non-API calls to R: 'ATTRIB', 'SET_ATTRIB'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.1.6
Check: installed package size
Result: NOTE
installed size is 8.8Mb
sub-directories of 1Mb or more:
libs 8.6Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.1.6
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64
Current CRAN status: NOTE: 3, OK: 10
Version: 0.3.5
Check: installed package size
Result: NOTE
installed size is 6.0Mb
sub-directories of 1Mb or more:
data 1.1Mb
libs 4.4Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.3.5
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64
Version: 0.3.5
Check: package dependencies
Result: NOTE
Package suggested but not available for checking: ‘pwalign’
Flavor: r-oldrel-macos-x86_64
Current CRAN status: NOTE: 8, OK: 5
Version: 0.17.0
Check: compiled code
Result: NOTE
File ‘stringfish/libs/stringfish.so’:
Found non-API call to R: ‘ATTRIB’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.17.0
Check: compiled code
Result: NOTE
File 'stringfish/libs/x64/stringfish.dll':
Found non-API call to R: 'ATTRIB'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.17.0
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64