Last updated on 2025-12-26 12:49:51 CET.
| Flavor | Version | Tinstall | Tcheck | Ttotal | Status | Flags |
|---|---|---|---|---|---|---|
| r-devel-linux-x86_64-debian-clang | 1.1.2 | 11.57 | 109.94 | 121.51 | OK | |
| r-devel-linux-x86_64-debian-gcc | 1.1.2 | 8.31 | 81.14 | 89.45 | OK | |
| r-devel-linux-x86_64-fedora-clang | 1.1.2 | 20.00 | 174.63 | 194.63 | OK | |
| r-devel-linux-x86_64-fedora-gcc | 1.1.2 | 21.00 | 178.48 | 199.48 | OK | |
| r-devel-windows-x86_64 | 1.1.2 | 13.00 | 114.00 | 127.00 | ERROR | |
| r-patched-linux-x86_64 | 1.1.2 | 12.30 | 97.79 | 110.09 | OK | |
| r-release-linux-x86_64 | 1.1.2 | 10.91 | 98.52 | 109.43 | OK | |
| r-release-macos-arm64 | 1.1.2 | OK | ||||
| r-release-macos-x86_64 | 1.1.2 | 8.00 | 105.00 | 113.00 | OK | |
| r-release-windows-x86_64 | 1.1.2 | 13.00 | 126.00 | 139.00 | OK | |
| r-oldrel-macos-arm64 | 1.1.2 | OK | ||||
| r-oldrel-macos-x86_64 | 1.1.2 | 8.00 | 98.00 | 106.00 | OK | |
| r-oldrel-windows-x86_64 | 1.1.2 | 17.00 | 156.00 | 173.00 | OK |
Version: 1.1.2
Check: examples
Result: ERROR
Running examples in 'dataPreparation-Ex.R' failed
The error most likely occurred in:
> ### Name: build_encoding
> ### Title: Compute encoding
> ### Aliases: build_encoding
>
> ### ** Examples
>
> # Get a data set
> data(adult)
> encoding <- build_encoding(adult, cols = "auto", verbose = TRUE)
[1] "age" "fnlwgt" "education_num" "capital_gain"
[5] "capital_loss" "hr_per_week"
[1] "build_encoding: c(\"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor or character i do nothing for those variables."
[1] "build_encoding: I will compute encoding on 9 character and factor columns."
[1] "build_encoding: it took me: 0.01s to compute encoding for 9 character and factor columns."
>
> print(encoding)
$type_employer
$type_employer$new_cols
type_employer.? type_employer.Federal-gov
"type.employer.." "type.employer.Federal.gov"
type_employer.Local-gov type_employer.Never-worked
"type.employer.Local.gov" "type.employer.Never.worked"
type_employer.Private type_employer.Self-emp-inc
"type.employer.Private" "type.employer.Self.emp.inc"
type_employer.Self-emp-not-inc type_employer.State-gov
"type.employer.Self.emp.not.inc" "type.employer.State.gov"
type_employer.Without-pay
"type.employer.Without.pay"
$type_employer$values
[1] "?" "Federal-gov" "Local-gov" "Never-worked"
[5] "Private" "Self-emp-inc" "Self-emp-not-inc" "State-gov"
[9] "Without-pay"
$education
$education$new_cols
education.10th education.11th education.12th
"education.10th" "education.11th" "education.12th"
education.1st-4th education.5th-6th education.7th-8th
"education.1st.4th" "education.5th.6th" "education.7th.8th"
education.9th education.Assoc-acdm education.Assoc-voc
"education.9th" "education.Assoc.acdm" "education.Assoc.voc"
education.Bachelors education.Doctorate education.HS-grad
"education.Bachelors" "education.Doctorate" "education.HS.grad"
education.Masters education.Preschool education.Prof-school
"education.Masters" "education.Preschool" "education.Prof.school"
education.Some-college
"education.Some.college"
$education$values
[1] "10th" "11th" "12th" "1st-4th" "5th-6th"
[6] "7th-8th" "9th" "Assoc-acdm" "Assoc-voc" "Bachelors"
[11] "Doctorate" "HS-grad" "Masters" "Preschool" "Prof-school"
[16] "Some-college"
$marital
$marital$new_cols
marital.Divorced marital.Married-AF-spouse
"marital.Divorced" "marital.Married.AF.spouse"
marital.Married-civ-spouse marital.Married-spouse-absent
"marital.Married.civ.spouse" "marital.Married.spouse.absent"
marital.Never-married marital.Separated
"marital.Never.married" "marital.Separated"
marital.Widowed
"marital.Widowed"
$marital$values
[1] "Divorced" "Married-AF-spouse" "Married-civ-spouse"
[4] "Married-spouse-absent" "Never-married" "Separated"
[7] "Widowed"
$occupation
$occupation$new_cols
occupation.? occupation.Adm-clerical
"occupation.." "occupation.Adm.clerical"
occupation.Armed-Forces occupation.Craft-repair
"occupation.Armed.Forces" "occupation.Craft.repair"
occupation.Exec-managerial occupation.Farming-fishing
"occupation.Exec.managerial" "occupation.Farming.fishing"
occupation.Handlers-cleaners occupation.Machine-op-inspct
"occupation.Handlers.cleaners" "occupation.Machine.op.inspct"
occupation.Other-service occupation.Priv-house-serv
"occupation.Other.service" "occupation.Priv.house.serv"
occupation.Prof-specialty occupation.Protective-serv
"occupation.Prof.specialty" "occupation.Protective.serv"
occupation.Sales occupation.Tech-support
"occupation.Sales" "occupation.Tech.support"
occupation.Transport-moving
"occupation.Transport.moving"
$occupation$values
[1] "?" "Adm-clerical" "Armed-Forces"
[4] "Craft-repair" "Exec-managerial" "Farming-fishing"
[7] "Handlers-cleaners" "Machine-op-inspct" "Other-service"
[10] "Priv-house-serv" "Prof-specialty" "Protective-serv"
[13] "Sales" "Tech-support" "Transport-moving"
$relationship
$relationship$new_cols
relationship.Husband relationship.Not-in-family
"relationship.Husband" "relationship.Not.in.family"
relationship.Other-relative relationship.Own-child
"relationship.Other.relative" "relationship.Own.child"
relationship.Unmarried relationship.Wife
"relationship.Unmarried" "relationship.Wife"
$relationship$values
[1] "Husband" "Not-in-family" "Other-relative" "Own-child"
[5] "Unmarried" "Wife"
$race
$race$new_cols
race.Amer-Indian-Eskimo race.Asian-Pac-Islander race.Black
"race.Amer.Indian.Eskimo" "race.Asian.Pac.Islander" "race.Black"
race.Other race.White
"race.Other" "race.White"
$race$values
[1] "Amer-Indian-Eskimo" "Asian-Pac-Islander" "Black"
[4] "Other" "White"
$sex
$sex$new_cols
sex.Female sex.Male
"sex.Female" "sex.Male"
$sex$values
[1] "Female" "Male"
$country
$country$new_cols
country.? country.Cambodia
"country.." "country.Cambodia"
country.Canada country.China
"country.Canada" "country.China"
country.Columbia country.Cuba
"country.Columbia" "country.Cuba"
country.Dominican-Republic country.Ecuador
"country.Dominican.Republic" "country.Ecuador"
country.El-Salvador country.England
"country.El.Salvador" "country.England"
country.France country.Germany
"country.France" "country.Germany"
country.Greece country.Guatemala
"country.Greece" "country.Guatemala"
country.Haiti country.Holand-Netherlands
"country.Haiti" "country.Holand.Netherlands"
country.Honduras country.Hong
"country.Honduras" "country.Hong"
country.Hungary country.India
"country.Hungary" "country.India"
country.Iran country.Ireland
"country.Iran" "country.Ireland"
country.Italy country.Jamaica
"country.Italy" "country.Jamaica"
country.Japan country.Laos
"country.Japan" "country.Laos"
country.Mexico country.Nicaragua
"country.Mexico" "country.Nicaragua"
country.Outlying-US(Guam-USVI-etc) country.Peru
"country.Outlying.US.Guam.USVI.etc." "country.Peru"
country.Philippines country.Poland
"country.Philippines" "country.Poland"
country.Portugal country.Puerto-Rico
"country.Portugal" "country.Puerto.Rico"
country.Scotland country.South
"country.Scotland" "country.South"
country.Taiwan country.Thailand
"country.Taiwan" "country.Thailand"
country.Trinadad&Tobago country.United-States
"country.Trinadad.Tobago" "country.United.States"
country.Vietnam country.Yugoslavia
"country.Vietnam" "country.Yugoslavia"
$country$values
[1] "?" "Cambodia"
[3] "Canada" "China"
[5] "Columbia" "Cuba"
[7] "Dominican-Republic" "Ecuador"
[9] "El-Salvador" "England"
[11] "France" "Germany"
[13] "Greece" "Guatemala"
[15] "Haiti" "Holand-Netherlands"
[17] "Honduras" "Hong"
[19] "Hungary" "India"
[21] "Iran" "Ireland"
[23] "Italy" "Jamaica"
[25] "Japan" "Laos"
[27] "Mexico" "Nicaragua"
[29] "Outlying-US(Guam-USVI-etc)" "Peru"
[31] "Philippines" "Poland"
[33] "Portugal" "Puerto-Rico"
[35] "Scotland" "South"
[37] "Taiwan" "Thailand"
[39] "Trinadad&Tobago" "United-States"
[41] "Vietnam" "Yugoslavia"
$income
$income$new_cols
income.<=50K income.>50K
"income...50K" "income..50K"
$income$values
[1] "<=50K" ">50K"
>
> # To limit the number of generated columns, one can use min_frequency parameter:
> build_encoding(adult, cols = "auto", verbose = TRUE, min_frequency = 0.1)
[1] "age" "fnlwgt" "education_num" "capital_gain"
[5] "capital_loss" "hr_per_week"
[1] "build_encoding: c(\"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor or character i do nothing for those variables."
[1] "build_encoding: I will compute encoding on 9 character and factor columns."
Error in `[.data.table`(data_set, , `:=`(c("freq"), (.N/nrow(data_set))), :
attempt access index 15/15 in VECTOR_ELT
Calls: build_encoding -> [ -> [.data.table
Execution halted
Flavor: r-devel-windows-x86_64
Version: 1.1.2
Check: tests
Result: ERROR
Running 'testthat.R' [14s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> if (requireNamespace("testthat", quietly = TRUE)) {
+ library(testthat)
+ library(dataPreparation)
+ test_check("dataPreparation")
+ }
dataPreparation 1.1.2
Type data_preparation_news() to see new features/changes/bug fixes.
[1] "aggregate_by_key: I start to aggregate"
[1] "aggregate_by_key: 6 columns have been constructed. It took 0.02 seconds. "
[1] "find_and_transform_dates: It took me 1.92s to identify formats"
[1] "find_and_transform_dates: It took me 0.16s to transform 4 columns to a Date format."
[1] "find_and_transform_dates: It took me 0s to identify formats"
[1] "find_and_transform_dates: There are no dates to transform.\n (If i missed something please provide the date format in inputs or\n consider using set_col_as_date to transform it)."
[1] "identify_dates: column date_col seems to have an ambiguity, I try to solve it."
[1] "V2"
[1] "fast_discretization: V2 aren't columns of types numeric i do nothing for those variables."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_freq method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 0 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 0 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "equal_width_splits: constant_col can't provide 10 equal width bins; instead you will have 0 bins."
[1] "fast_discretization: column constant_col seems to be constant, I do nothing."
[1] "fast_discretization: it took me: 0s to build splits for 0 numeric columns."
[1] "equal_width_splits: data_set can't provide 10 equal width bins; instead you will have 0 bins."
[1] "equal_freq_splits: data_set can't provide 10 equal freq bins; instead you will have 2 bins."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will discretize 1 numeric columns using, bins."
[1] "fast_discretization: it took me: 0s to transform 1 numeric columns into, binary columns."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor false_factor."
[1] "un_factor: It took me 0s to un-factor 1 column(s)."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor true_factor."
[1] "un_factor: I un-factor false_factor."
[1] "un_factor: It took me 0s to un-factor 2 column(s)."
[1] "fast_filter_variables: I check for constant columns."
[1] "fast_filter_variables: I delete 1 constant column(s) in data_set."
[1] "fast_filter_variables: I check for columns in double."
[1] "fast_filter_variables: I delete 1 column(s) that are in double in data_set."
[1] "fast_filter_variables: I check for columns that are bijections of another column."
[1] "fast_filter_variables: I delete 3 column(s) that are bijections of another column in data_set."
[1] "fast_filter_variables: I check for columns that are included in another column."
[1] "fast_filter_variables: I delete 1 column(s) that are bijections of another column in data_set."
[1] "string_column"
[1] "fast_round: string_column aren't columns of types numeric or integer i do nothing for those variables."
[1] "string_column"
[1] "fast_round: string_column aren't columns of types numeric or integer i do nothing for those variables."
Saving _problems/test_generate_from_character-13.R
Saving _problems/test_generate_from_character-26.R
Saving _problems/test_generate_from_character-40.R
[1] "generate_factor_from_date: I will create a factor column from each date column."
[1] "generate_factor_from_date: It took me 0s to transform 1 column(s)."
[1] "ID"
[1] "generate_date_diffs: ID aren't columns of types date i do nothing for those variables."
[1] "generate_date_diffs: I will generate difference between dates."
[1] "generate_date_diffs: It took me 0s to create 3 column(s)."
[1] "date1" "date2" "date3" "date4"
[5] "num1" "num2" "constant" "num3"
[9] "age" "fnlwgt" "education_num" "capital_gain"
[13] "capital_loss" "hr_per_week"
[1] "generate_from_factor: c(\"date1\", \"date2\", \"date3\", \"date4\", \"num1\", \"num2\", \"constant\", \"num3\", \"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor i do nothing for those variables."
Saving _problems/test_generate_from_factor-14.R
Saving _problems/test_generate_from_factor-27.R
[1] "one_hot_encoder: Since you didn't provide encoding, I compute them with build_encoding."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
[1] "build_encoding: it took me: 0s to compute encoding for 1 character and factor columns."
[1] "one_hot_encoder: I will one hot encode some columns."
[1] "one_hot_encoder: I am doing column: character_col"
[1] "one_hot_encoder: It took me 0s to transform 1 column(s)."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
[1] "build_encoding: it took me: 0s to compute encoding for 1 character and factor columns."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
Saving _problems/test_generate_from_factor-80.R
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: grades."
[1] "target_encode: Start to encode columns according to target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: grades."
[1] "target_encode: Start to encode columns according to target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "real_cols: col_2 aren't columns of the table, i do nothing for those variables"
[1] "col_2"
[1] "real_cols: col_2 aren't columns of types numeric i do nothing for those variables."
[1] "find_and_transform_numerics: It took me 0s to identify 2 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: It took me 0s to transform 2 column(s) to a numeric format."
[1] "find_and_transform_numerics: It took me 0s to identify 0 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: There are no numerics to transform.(If i missed something consider using set_col_as_numeric to transform it)"
[1] "prepare_set: step one: correcting mistakes."
[1] "fast_filter_variables: I check for constant columns."
[1] "fast_filter_variables: I check for columns in double."
[1] "fast_filter_variables: I check for columns that are bijections of another column."
[1] "fast_filter_variables: I delete 1 column(s) that are bijections of another column in data_set."
[1] "age" "fnlwgt" "capital_gain" "capital_loss" "hr_per_week"
[1] "un_factor: c(\"age\", \"fnlwgt\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor i do nothing for those variables."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor education."
[1] "un_factor: I un-factor occupation."
[1] "un_factor: I un-factor country."
[1] "un_factor: It took me 0s to un-factor 3 column(s)."
[1] "find_and_transform_numerics: It took me 0s to identify 0 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: There are no numerics to transform.(If i missed something consider using set_col_as_numeric to transform it)"
[1] "find_and_transform_dates: It took me 1.17s to identify formats"
[1] "find_and_transform_dates: There are no dates to transform.\n (If i missed something please provide the date format in inputs or\n consider using set_col_as_date to transform it)."
[1] "prepare_set: step two: transforming data_set."
[1] "age" "type_employer" "fnlwgt" "education"
[5] "marital" "occupation" "relationship" "race"
[9] "sex" "capital_gain" "capital_loss" "hr_per_week"
[13] "country" "income"
[1] "prepare_set: c(\"age\", \"type_employer\", \"fnlwgt\", \"education\", \"marital\", \"occupation\", \"relationship\", \"race\", \"sex\", \"capital_gain\", \"capital_loss\", \"hr_per_week\", \"country\", \"income\") aren't columns of types date i do nothing for those variables."
[1] "generate_date_diffs: I will generate difference between dates."
[1] "generate_date_diffs: It took me 0s to create 0 column(s)."
[1] "generate_factor_from_date: I will create a factor column from each date column."
[1] "generate_factor_from_date: It took me 0s to transform 0 column(s)."
[1] "age" "type_employer" "fnlwgt" "marital"
[5] "relationship" "race" "sex" "capital_gain"
[9] "capital_loss" "hr_per_week" "income"
[1] "prepare_set: c(\"age\", \"type_employer\", \"fnlwgt\", \"marital\", \"relationship\", \"race\", \"sex\", \"capital_gain\", \"capital_loss\", \"hr_per_week\", \"income\") aren't columns of types character i do nothing for those variables."
Saving _problems/test_prepare_set-15.R
[1] "remove_sd_outlier: I start to filter categorical rare events"
[1] "remove_sd_outlier: dropped 1 row(s) that are rare event on num_col."
[1] "remove_sd_outlier: 1 have been dropped. It took 0.02 seconds. "
[1] "remove_sd_outlier: I start to filter categorical rare events"
[1] "remove_sd_outlier: dropped 0 row(s) that are rare event on num_col."
[1] "remove_sd_outlier: 0 have been dropped. It took 0 seconds. "
[1] "remove_rare_categorical: I start to filter categorical rare events"
[1] "remove_rare_categorical: dropped 1 row(s) that are rare event on cat_col."
[1] "remove_rare_categorical: 1 have been dropped. It took 0 seconds. "
[1] "remove_percentile_outlier: I start to filter categorical rare events"
[1] "remove_percentile_outlier: dropped 2 row(s) that are rare event on num_col."
[1] "remove_percentile_outlier: 2 have been dropped. It took 0 seconds. "
[1] "remove_percentile_outlier: I start to filter categorical rare events"
[1] "remove_percentile_outlier: dropped 2 row(s) that are rare event on num_col."
[1] "remove_percentile_outlier: 2 have been dropped. It took 0 seconds. "
[1] "same_shape: verify that every column is present."
[1] "same_shape: columns col_2 are missing, I create them."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_2 class was logical i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: the following columns are in data_set but not in reference_set: I drop them: "
[1] "col_2"
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was character i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was character i set it to c(\"POSIXct\", \"POSIXt\")."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: col_1 class had different levels than in reference_set I change it."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: col_1 class had different levels than in reference_set I change it."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was numeric i set it to weird_class."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was numeric i set it to weird_class."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: columns type_employer?, type_employerFederal-gov, type_employerLocal-gov, type_employerNever-worked, type_employerPrivate, type_employerSelf-emp-inc, type_employerSelf-emp-not-inc, type_employerState-gov, type_employerWithout-pay, education11th, education12th, education1st-4th, education5th-6th, education7th-8th, education9th, educationAssoc-acdm, educationAssoc-voc, educationBachelors, educationDoctorate, educationHS-grad, educationMasters, educationPreschool, educationProf-school, educationSome-college, maritalMarried-AF-spouse, maritalMarried-civ-spouse, maritalMarried-spouse-absent, maritalNever-married, maritalSeparated, maritalWidowed, occupationAdm-clerical, occupationArmed-Forces, occupationCraft-repair, occupationExec-managerial, occupationFarming-fishing, occupationHandlers-cleaners, occupationMachine-op-inspct, occupationOther-service, occupationPriv-house-serv, occupationProf-specialty, occupationProtective-serv, occupationSales, occupationTech-support, occupationTransport-moving, relationshipNot-in-family, relationshipOther-relative, relationshipOwn-child, relationshipUnmarried, relationshipWife, raceAsian-Pac-Islander, raceBlack, raceOther, raceWhite, sexMale, capital_loss1408, capital_loss1564, capital_loss1573, capital_loss1719, capital_loss1762, capital_loss1887, capital_loss1902, capital_loss2042, capital_loss2179, countryCambodia, countryCanada, countryChina, countryColumbia, countryCuba, countryDominican-Republic, countryEcuador, countryEl-Salvador, countryEngland, countryFrance, countryGermany, countryGreece, countryGuatemala, countryHaiti, countryHoland-Netherlands, countryHonduras, countryHong, countryHungary, countryIndia, countryIran, countryIreland, countryItaly, countryJamaica, countryJapan, countryLaos, countryMexico, countryNicaragua, countryOutlying-US(Guam-USVI-etc), countryPeru, countryPhilippines, countryPoland, countryPortugal, countryPuerto-Rico, countryScotland, countrySouth, countryTaiwan, countryThailand, countryTrinadad&Tobago, countryUnited-States, countryVietnam, countryYugoslavia, income>50K are missing, I create them."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: the following columns are in data_set but not in reference_set: I drop them: "
[1] "type_employer" "education" "marital" "occupation"
[5] "relationship" "race" "sex" "capital_loss"
[9] "country" "income"
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: age class was integer i set it to numeric."
[1] "same_shape: fnlwgt class was integer i set it to numeric."
[1] "same_shape: education_num class was integer i set it to numeric."
[1] "same_shape: capital_gain class was integer i set it to numeric."
[1] "same_shape: hr_per_week class was integer i set it to numeric."
[1] "same_shape: type_employer? class was logical i set it to numeric."
[1] "same_shape: type_employerFederal-gov class was logical i set it to numeric."
[1] "same_shape: type_employerLocal-gov class was logical i set it to numeric."
[1] "same_shape: type_employerNever-worked class was logical i set it to numeric."
[1] "same_shape: type_employerPrivate class was logical i set it to numeric."
[1] "same_shape: type_employerSelf-emp-inc class was logical i set it to numeric."
[1] "same_shape: type_employerSelf-emp-not-inc class was logical i set it to numeric."
[1] "same_shape: type_employerState-gov class was logical i set it to numeric."
[1] "same_shape: type_employerWithout-pay class was logical i set it to numeric."
[1] "same_shape: education11th class was logical i set it to numeric."
[1] "same_shape: education12th class was logical i set it to numeric."
[1] "same_shape: education1st-4th class was logical i set it to numeric."
[1] "same_shape: education5th-6th class was logical i set it to numeric."
[1] "same_shape: education7th-8th class was logical i set it to numeric."
[1] "same_shape: education9th class was logical i set it to numeric."
[1] "same_shape: educationAssoc-acdm class was logical i set it to numeric."
[1] "same_shape: educationAssoc-voc class was logical i set it to numeric."
[1] "same_shape: educationBachelors class was logical i set it to numeric."
[1] "same_shape: educationDoctorate class was logical i set it to numeric."
[1] "same_shape: educationHS-grad class was logical i set it to numeric."
[1] "same_shape: educationMasters class was logical i set it to numeric."
[1] "same_shape: educationPreschool class was logical i set it to numeric."
[1] "same_shape: educationProf-school class was logical i set it to numeric."
[1] "same_shape: educationSome-college class was logical i set it to numeric."
[1] "same_shape: maritalMarried-AF-spouse class was logical i set it to numeric."
[1] "same_shape: maritalMarried-civ-spouse class was logical i set it to numeric."
[1] "same_shape: maritalMarried-spouse-absent class was logical i set it to numeric."
[1] "same_shape: maritalNever-married class was logical i set it to numeric."
[1] "same_shape: maritalSeparated class was logical i set it to numeric."
[1] "same_shape: maritalWidowed class was logical i set it to numeric."
[1] "same_shape: occupationAdm-clerical class was logical i set it to numeric."
[1] "same_shape: occupationArmed-Forces class was logical i set it to numeric."
[1] "same_shape: occupationCraft-repair class was logical i set it to numeric."
[1] "same_shape: occupationExec-managerial class was logical i set it to numeric."
[1] "same_shape: occupationFarming-fishing class was logical i set it to numeric."
[1] "same_shape: occupationHandlers-cleaners class was logical i set it to numeric."
[1] "same_shape: occupationMachine-op-inspct class was logical i set it to numeric."
[1] "same_shape: occupationOther-service class was logical i set it to numeric."
[1] "same_shape: occupationPriv-house-serv class was logical i set it to numeric."
[1] "same_shape: occupationProf-specialty class was logical i set it to numeric."
[1] "same_shape: occupationProtective-serv class was logical i set it to numeric."
[1] "same_shape: occupationSales class was logical i set it to numeric."
[1] "same_shape: occupationTech-support class was logical i set it to numeric."
[1] "same_shape: occupationTransport-moving class was logical i set it to numeric."
[1] "same_shape: relationshipNot-in-family class was logical i set it to numeric."
[1] "same_shape: relationshipOther-relative class was logical i set it to numeric."
[1] "same_shape: relationshipOwn-child class was logical i set it to numeric."
[1] "same_shape: relationshipUnmarried class was logical i set it to numeric."
[1] "same_shape: relationshipWife class was logical i set it to numeric."
[1] "same_shape: raceAsian-Pac-Islander class was logical i set it to numeric."
[1] "same_shape: raceBlack class was logical i set it to numeric."
[1] "same_shape: raceOther class was logical i set it to numeric."
[1] "same_shape: raceWhite class was logical i set it to numeric."
[1] "same_shape: sexMale class was logical i set it to numeric."
[1] "same_shape: capital_loss1408 class was logical i set it to numeric."
[1] "same_shape: capital_loss1564 class was logical i set it to numeric."
[1] "same_shape: capital_loss1573 class was logical i set it to numeric."
[1] "same_shape: capital_loss1719 class was logical i set it to numeric."
[1] "same_shape: capital_loss1762 class was logical i set it to numeric."
[1] "same_shape: capital_loss1887 class was logical i set it to numeric."
[1] "same_shape: capital_loss1902 class was logical i set it to numeric."
[1] "same_shape: capital_loss2042 class was logical i set it to numeric."
[1] "same_shape: capital_loss2179 class was logical i set it to numeric."
[1] "same_shape: countryCambodia class was logical i set it to numeric."
[1] "same_shape: countryCanada class was logical i set it to numeric."
[1] "same_shape: countryChina class was logical i set it to numeric."
[1] "same_shape: countryColumbia class was logical i set it to numeric."
[1] "same_shape: countryCuba class was logical i set it to numeric."
[1] "same_shape: countryDominican-Republic class was logical i set it to numeric."
[1] "same_shape: countryEcuador class was logical i set it to numeric."
[1] "same_shape: countryEl-Salvador class was logical i set it to numeric."
[1] "same_shape: countryEngland class was logical i set it to numeric."
[1] "same_shape: countryFrance class was logical i set it to numeric."
[1] "same_shape: countryGermany class was logical i set it to numeric."
[1] "same_shape: countryGreece class was logical i set it to numeric."
[1] "same_shape: countryGuatemala class was logical i set it to numeric."
[1] "same_shape: countryHaiti class was logical i set it to numeric."
[1] "same_shape: countryHoland-Netherlands class was logical i set it to numeric."
[1] "same_shape: countryHonduras class was logical i set it to numeric."
[1] "same_shape: countryHong class was logical i set it to numeric."
[1] "same_shape: countryHungary class was logical i set it to numeric."
[1] "same_shape: countryIndia class was logical i set it to numeric."
[1] "same_shape: countryIran class was logical i set it to numeric."
[1] "same_shape: countryIreland class was logical i set it to numeric."
[1] "same_shape: countryItaly class was logical i set it to numeric."
[1] "same_shape: countryJamaica class was logical i set it to numeric."
[1] "same_shape: countryJapan class was logical i set it to numeric."
[1] "same_shape: countryLaos class was logical i set it to numeric."
[1] "same_shape: countryMexico class was logical i set it to numeric."
[1] "same_shape: countryNicaragua class was logical i set it to numeric."
[1] "same_shape: countryOutlying-US(Guam-USVI-etc) class was logical i set it to numeric."
[1] "same_shape: countryPeru class was logical i set it to numeric."
[1] "same_shape: countryPhilippines class was logical i set it to numeric."
[1] "same_shape: countryPoland class was logical i set it to numeric."
[1] "same_shape: countryPortugal class was logical i set it to numeric."
[1] "same_shape: countryPuerto-Rico class was logical i set it to numeric."
[1] "same_shape: countryScotland class was logical i set it to numeric."
[1] "same_shape: countrySouth class was logical i set it to numeric."
[1] "same_shape: countryTaiwan class was logical i set it to numeric."
[1] "same_shape: countryThailand class was logical i set it to numeric."
[1] "same_shape: countryTrinadad&Tobago class was logical i set it to numeric."
[1] "same_shape: countryUnited-States class was logical i set it to numeric."
[1] "same_shape: countryVietnam class was logical i set it to numeric."
[1] "same_shape: countryYugoslavia class was logical i set it to numeric."
[1] "same_shape: income>50K class was logical i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to scale 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to scale 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to unscale 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "set_col_as_numeric: I will set some columns as numeric"
[1] "set_col_as_numeric: I am doing the column char_col_1."
[1] "set_col_as_numeric: 0 NA have been created due to transformation to numeric."
[1] "set_col_as_numeric: I am doing the column char_col_2."
[1] "set_col_as_numeric: 0 NA have been created due to transformation to numeric."
[1] "set_col_as_character: I will set some columns as character"
[1] "set_col_as_character: I am doing the column numCol."
[1] "set_col_as_character: I am doing the column factorCol."
[1] "set_col_as_character: I am doing the column charcol."
[1] "set_col_as_character: charcol is a character, i do nothing."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date1."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: I am doing the column date2."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0s to transform 2 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date2."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date1."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0.02s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: it took me: 0s to transform 0 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: Since i generated only NAs i set ID as it was before."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: ID doesn't seem to be a date, if it really is please provide format."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time_stamp_s."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time_stamp_ms."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: it took me: 0s to transform 1 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: it took me: 0s to transform 1 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: col has more than 2 values, i don't transform it."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming logical into binaries.\n"
[1] "shape_set: Previous distribution of column types:"
col_class_init
logical
1
[1] "shape_set: Current distribution of column types:"
col_class_end
integer
1
[1] "which_are_constant: constantCol is constant."
[1] "which_are_constant: it took me 0s to identify 1 constant column(s)"
[1] "which_are_in_double: it took me 0s to identify 2 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 1 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 1 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 0 column(s) to drop."
[1] "which_are_bijection: it took me 0.02s to identify 1 column(s) to drop."
[1] "which_are_bijection: education is a bijection of education_num. I put it in drop list."
[1] "which_are_bijection: it took me 0.02s to identify 1 column(s) to drop."
[1] "which_are_bijection: it took me 0s to identify 0 column(s) to drop."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column education."
[1] "which_are_included: are_50_or_more is included in column age."
[1] "which_are_included: constant is included in column sex."
[1] "which_are_included: sex is included in column fnlwgt."
[1] "which_are_included: income is included in column id."
[1] "which_are_included: race is included in column fnlwgt."
[1] "which_are_included: relationship is included in column id."
[1] "which_are_included: type_employer is included in column fnlwgt."
[1] "which_are_included: marital is included in column id."
[1] "which_are_included: occupation is included in column id."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column id."
[1] "which_are_included: capital_gain is included in column fnlwgt."
[1] "which_are_included: capital_loss is included in column fnlwgt."
[1] "which_are_included: country is included in column fnlwgt."
[1] "which_are_included: hr_per_week is included in column id."
[1] "which_are_included: age is included in column id."
[1] "which_are_included: mail is included in column id."
[1] "which_are_included: date2 is included in column id."
[1] "which_are_included: date1 is included in column id."
[1] "which_are_included: date3 is included in column date4."
[1] "which_are_included: date4 is included in column id."
[1] "which_are_included: num1 is included in column num3."
[1] "which_are_included: num3 is included in column id."
[1] "which_are_included: num2 is included in column id."
[1] "which_are_included: fnlwgt is included in column id."
[1] "which_are_included: constant is included in column sex."
[1] "which_are_included: sex is included in column fnlwgt."
[1] "which_are_included: income is included in column id."
[1] "which_are_included: race is included in column fnlwgt."
[1] "which_are_included: relationship is included in column id."
[1] "which_are_included: type_employer is included in column fnlwgt."
[1] "which_are_included: marital is included in column id."
[1] "which_are_included: occupation is included in column id."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column id."
[1] "which_are_included: capital_gain is included in column fnlwgt."
[1] "which_are_included: capital_loss is included in column fnlwgt."
[1] "which_are_included: country is included in column fnlwgt."
[1] "which_are_included: hr_per_week is included in column id."
[1] "which_are_included: age is included in column id."
[1] "which_are_included: mail is included in column id."
[1] "which_are_included: date2 is included in column id."
[1] "which_are_included: date1 is included in column id."
[1] "which_are_included: date3 is included in column date4."
[1] "which_are_included: date4 is included in column id."
[1] "which_are_included: num1 is included in column num3."
[1] "which_are_included: num3 is included in column id."
[1] "which_are_included: num2 is included in column id."
[1] "which_are_included: fnlwgt is included in column id."
[ FAIL 7 | WARN 0 | SKIP 1 | PASS 322 ]
══ Skipped tests (1) ═══════════════════════════════════════════════════════════
• empty test (1):
══ Failed tests ════════════════════════════════════════════════════════════════
── Error ('test_generate_from_character.R:13:5'): generate_from_character: don't drop so generate 3 new cols ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 3/3 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, cols = "character_col") at test_generate_from_character.R:13:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_character.R:26:5'): generate_from_character: drop generate 3 col and suppress one ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, drop = TRUE) at test_generate_from_character.R:26:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_character.R:40:5'): generate_from_character: don't reduce number of rows even with NA ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, cols = "character_col") at test_generate_from_character.R:40:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:14:5'): generate_from_factor: drop: functionnal test on reference set ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 25/25 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_factor(...) at test_generate_from_factor.R:14:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:27:5'): generate_from_factor: test don't drop => keep original col ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_factor(...) at test_generate_from_factor.R:27:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:80:5'): build_encoding: min_frequency allows to drop rare values ──
Error in ``[.data.table`(data_set, , `:=`(c("freq"), (.N/nrow(data_set))), by = col)`: attempt access index 1/1 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::build_encoding(...) at test_generate_from_factor.R:80:5
2. ├─data_set[, `:=`(c("freq"), (.N/nrow(data_set))), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_prepare_set.R:14:5'): prepare_set: functionnal test: test full pipeline. Should give result with as many rows as unique key. ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 15/15 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::prepare_set(...) at test_prepare_set.R:14:5
2. └─dataPreparation::generate_from_character(...)
3. ├─data_set[, `:=`(c(new_col), .N), by = col]
4. └─data.table:::`[.data.table`(...)
[ FAIL 7 | WARN 0 | SKIP 1 | PASS 322 ]
Error:
! Test failures.
Execution halted
Flavor: r-devel-windows-x86_64