Last updated on 2025-12-28 05:50:24 CET.
| Package | ERROR | OK |
|---|---|---|
| dataPreparation | 1 | 12 |
Current CRAN status: ERROR: 1, OK: 12
Version: 1.1.2
Check: examples
Result: ERROR
Running examples in 'dataPreparation-Ex.R' failed
The error most likely occurred in:
> ### Name: build_encoding
> ### Title: Compute encoding
> ### Aliases: build_encoding
>
> ### ** Examples
>
> # Get a data set
> data(adult)
> encoding <- build_encoding(adult, cols = "auto", verbose = TRUE)
[1] "age" "fnlwgt" "education_num" "capital_gain"
[5] "capital_loss" "hr_per_week"
[1] "build_encoding: c(\"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor or character i do nothing for those variables."
[1] "build_encoding: I will compute encoding on 9 character and factor columns."
[1] "build_encoding: it took me: 0.01s to compute encoding for 9 character and factor columns."
>
> print(encoding)
$type_employer
$type_employer$new_cols
type_employer.? type_employer.Federal-gov
"type.employer.." "type.employer.Federal.gov"
type_employer.Local-gov type_employer.Never-worked
"type.employer.Local.gov" "type.employer.Never.worked"
type_employer.Private type_employer.Self-emp-inc
"type.employer.Private" "type.employer.Self.emp.inc"
type_employer.Self-emp-not-inc type_employer.State-gov
"type.employer.Self.emp.not.inc" "type.employer.State.gov"
type_employer.Without-pay
"type.employer.Without.pay"
$type_employer$values
[1] "?" "Federal-gov" "Local-gov" "Never-worked"
[5] "Private" "Self-emp-inc" "Self-emp-not-inc" "State-gov"
[9] "Without-pay"
$education
$education$new_cols
education.10th education.11th education.12th
"education.10th" "education.11th" "education.12th"
education.1st-4th education.5th-6th education.7th-8th
"education.1st.4th" "education.5th.6th" "education.7th.8th"
education.9th education.Assoc-acdm education.Assoc-voc
"education.9th" "education.Assoc.acdm" "education.Assoc.voc"
education.Bachelors education.Doctorate education.HS-grad
"education.Bachelors" "education.Doctorate" "education.HS.grad"
education.Masters education.Preschool education.Prof-school
"education.Masters" "education.Preschool" "education.Prof.school"
education.Some-college
"education.Some.college"
$education$values
[1] "10th" "11th" "12th" "1st-4th" "5th-6th"
[6] "7th-8th" "9th" "Assoc-acdm" "Assoc-voc" "Bachelors"
[11] "Doctorate" "HS-grad" "Masters" "Preschool" "Prof-school"
[16] "Some-college"
$marital
$marital$new_cols
marital.Divorced marital.Married-AF-spouse
"marital.Divorced" "marital.Married.AF.spouse"
marital.Married-civ-spouse marital.Married-spouse-absent
"marital.Married.civ.spouse" "marital.Married.spouse.absent"
marital.Never-married marital.Separated
"marital.Never.married" "marital.Separated"
marital.Widowed
"marital.Widowed"
$marital$values
[1] "Divorced" "Married-AF-spouse" "Married-civ-spouse"
[4] "Married-spouse-absent" "Never-married" "Separated"
[7] "Widowed"
$occupation
$occupation$new_cols
occupation.? occupation.Adm-clerical
"occupation.." "occupation.Adm.clerical"
occupation.Armed-Forces occupation.Craft-repair
"occupation.Armed.Forces" "occupation.Craft.repair"
occupation.Exec-managerial occupation.Farming-fishing
"occupation.Exec.managerial" "occupation.Farming.fishing"
occupation.Handlers-cleaners occupation.Machine-op-inspct
"occupation.Handlers.cleaners" "occupation.Machine.op.inspct"
occupation.Other-service occupation.Priv-house-serv
"occupation.Other.service" "occupation.Priv.house.serv"
occupation.Prof-specialty occupation.Protective-serv
"occupation.Prof.specialty" "occupation.Protective.serv"
occupation.Sales occupation.Tech-support
"occupation.Sales" "occupation.Tech.support"
occupation.Transport-moving
"occupation.Transport.moving"
$occupation$values
[1] "?" "Adm-clerical" "Armed-Forces"
[4] "Craft-repair" "Exec-managerial" "Farming-fishing"
[7] "Handlers-cleaners" "Machine-op-inspct" "Other-service"
[10] "Priv-house-serv" "Prof-specialty" "Protective-serv"
[13] "Sales" "Tech-support" "Transport-moving"
$relationship
$relationship$new_cols
relationship.Husband relationship.Not-in-family
"relationship.Husband" "relationship.Not.in.family"
relationship.Other-relative relationship.Own-child
"relationship.Other.relative" "relationship.Own.child"
relationship.Unmarried relationship.Wife
"relationship.Unmarried" "relationship.Wife"
$relationship$values
[1] "Husband" "Not-in-family" "Other-relative" "Own-child"
[5] "Unmarried" "Wife"
$race
$race$new_cols
race.Amer-Indian-Eskimo race.Asian-Pac-Islander race.Black
"race.Amer.Indian.Eskimo" "race.Asian.Pac.Islander" "race.Black"
race.Other race.White
"race.Other" "race.White"
$race$values
[1] "Amer-Indian-Eskimo" "Asian-Pac-Islander" "Black"
[4] "Other" "White"
$sex
$sex$new_cols
sex.Female sex.Male
"sex.Female" "sex.Male"
$sex$values
[1] "Female" "Male"
$country
$country$new_cols
country.? country.Cambodia
"country.." "country.Cambodia"
country.Canada country.China
"country.Canada" "country.China"
country.Columbia country.Cuba
"country.Columbia" "country.Cuba"
country.Dominican-Republic country.Ecuador
"country.Dominican.Republic" "country.Ecuador"
country.El-Salvador country.England
"country.El.Salvador" "country.England"
country.France country.Germany
"country.France" "country.Germany"
country.Greece country.Guatemala
"country.Greece" "country.Guatemala"
country.Haiti country.Holand-Netherlands
"country.Haiti" "country.Holand.Netherlands"
country.Honduras country.Hong
"country.Honduras" "country.Hong"
country.Hungary country.India
"country.Hungary" "country.India"
country.Iran country.Ireland
"country.Iran" "country.Ireland"
country.Italy country.Jamaica
"country.Italy" "country.Jamaica"
country.Japan country.Laos
"country.Japan" "country.Laos"
country.Mexico country.Nicaragua
"country.Mexico" "country.Nicaragua"
country.Outlying-US(Guam-USVI-etc) country.Peru
"country.Outlying.US.Guam.USVI.etc." "country.Peru"
country.Philippines country.Poland
"country.Philippines" "country.Poland"
country.Portugal country.Puerto-Rico
"country.Portugal" "country.Puerto.Rico"
country.Scotland country.South
"country.Scotland" "country.South"
country.Taiwan country.Thailand
"country.Taiwan" "country.Thailand"
country.Trinadad&Tobago country.United-States
"country.Trinadad.Tobago" "country.United.States"
country.Vietnam country.Yugoslavia
"country.Vietnam" "country.Yugoslavia"
$country$values
[1] "?" "Cambodia"
[3] "Canada" "China"
[5] "Columbia" "Cuba"
[7] "Dominican-Republic" "Ecuador"
[9] "El-Salvador" "England"
[11] "France" "Germany"
[13] "Greece" "Guatemala"
[15] "Haiti" "Holand-Netherlands"
[17] "Honduras" "Hong"
[19] "Hungary" "India"
[21] "Iran" "Ireland"
[23] "Italy" "Jamaica"
[25] "Japan" "Laos"
[27] "Mexico" "Nicaragua"
[29] "Outlying-US(Guam-USVI-etc)" "Peru"
[31] "Philippines" "Poland"
[33] "Portugal" "Puerto-Rico"
[35] "Scotland" "South"
[37] "Taiwan" "Thailand"
[39] "Trinadad&Tobago" "United-States"
[41] "Vietnam" "Yugoslavia"
$income
$income$new_cols
income.<=50K income.>50K
"income...50K" "income..50K"
$income$values
[1] "<=50K" ">50K"
>
> # To limit the number of generated columns, one can use min_frequency parameter:
> build_encoding(adult, cols = "auto", verbose = TRUE, min_frequency = 0.1)
[1] "age" "fnlwgt" "education_num" "capital_gain"
[5] "capital_loss" "hr_per_week"
[1] "build_encoding: c(\"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor or character i do nothing for those variables."
[1] "build_encoding: I will compute encoding on 9 character and factor columns."
Error in `[.data.table`(data_set, , `:=`(c("freq"), (.N/nrow(data_set))), :
attempt access index 15/15 in VECTOR_ELT
Calls: build_encoding -> [ -> [.data.table
Execution halted
Flavor: r-devel-windows-x86_64
Version: 1.1.2
Check: tests
Result: ERROR
Running 'testthat.R' [14s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> if (requireNamespace("testthat", quietly = TRUE)) {
+ library(testthat)
+ library(dataPreparation)
+ test_check("dataPreparation")
+ }
dataPreparation 1.1.2
Type data_preparation_news() to see new features/changes/bug fixes.
[1] "aggregate_by_key: I start to aggregate"
[1] "aggregate_by_key: 6 columns have been constructed. It took 0.02 seconds. "
[1] "find_and_transform_dates: It took me 1.92s to identify formats"
[1] "find_and_transform_dates: It took me 0.16s to transform 4 columns to a Date format."
[1] "find_and_transform_dates: It took me 0s to identify formats"
[1] "find_and_transform_dates: There are no dates to transform.\n (If i missed something please provide the date format in inputs or\n consider using set_col_as_date to transform it)."
[1] "identify_dates: column date_col seems to have an ambiguity, I try to solve it."
[1] "V2"
[1] "fast_discretization: V2 aren't columns of types numeric i do nothing for those variables."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_freq method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will build splits for 0 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 0 numeric columns."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "equal_width_splits: constant_col can't provide 10 equal width bins; instead you will have 0 bins."
[1] "fast_discretization: column constant_col seems to be constant, I do nothing."
[1] "fast_discretization: it took me: 0s to build splits for 0 numeric columns."
[1] "equal_width_splits: data_set can't provide 10 equal width bins; instead you will have 0 bins."
[1] "equal_freq_splits: data_set can't provide 10 equal freq bins; instead you will have 2 bins."
[1] "fast_discretization: I will build splits for 1 numeric columns using, equal_width method."
[1] "fast_discretization: it took me: 0s to build splits for 1 numeric columns."
[1] "fast_discretization: I will discretize 1 numeric columns using, bins."
[1] "fast_discretization: it took me: 0s to transform 1 numeric columns into, binary columns."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor false_factor."
[1] "un_factor: It took me 0s to un-factor 1 column(s)."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor true_factor."
[1] "un_factor: I un-factor false_factor."
[1] "un_factor: It took me 0s to un-factor 2 column(s)."
[1] "fast_filter_variables: I check for constant columns."
[1] "fast_filter_variables: I delete 1 constant column(s) in data_set."
[1] "fast_filter_variables: I check for columns in double."
[1] "fast_filter_variables: I delete 1 column(s) that are in double in data_set."
[1] "fast_filter_variables: I check for columns that are bijections of another column."
[1] "fast_filter_variables: I delete 3 column(s) that are bijections of another column in data_set."
[1] "fast_filter_variables: I check for columns that are included in another column."
[1] "fast_filter_variables: I delete 1 column(s) that are bijections of another column in data_set."
[1] "string_column"
[1] "fast_round: string_column aren't columns of types numeric or integer i do nothing for those variables."
[1] "string_column"
[1] "fast_round: string_column aren't columns of types numeric or integer i do nothing for those variables."
Saving _problems/test_generate_from_character-13.R
Saving _problems/test_generate_from_character-26.R
Saving _problems/test_generate_from_character-40.R
[1] "generate_factor_from_date: I will create a factor column from each date column."
[1] "generate_factor_from_date: It took me 0s to transform 1 column(s)."
[1] "ID"
[1] "generate_date_diffs: ID aren't columns of types date i do nothing for those variables."
[1] "generate_date_diffs: I will generate difference between dates."
[1] "generate_date_diffs: It took me 0s to create 3 column(s)."
[1] "date1" "date2" "date3" "date4"
[5] "num1" "num2" "constant" "num3"
[9] "age" "fnlwgt" "education_num" "capital_gain"
[13] "capital_loss" "hr_per_week"
[1] "generate_from_factor: c(\"date1\", \"date2\", \"date3\", \"date4\", \"num1\", \"num2\", \"constant\", \"num3\", \"age\", \"fnlwgt\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor i do nothing for those variables."
Saving _problems/test_generate_from_factor-14.R
Saving _problems/test_generate_from_factor-27.R
[1] "one_hot_encoder: Since you didn't provide encoding, I compute them with build_encoding."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
[1] "build_encoding: it took me: 0s to compute encoding for 1 character and factor columns."
[1] "one_hot_encoder: I will one hot encode some columns."
[1] "one_hot_encoder: I am doing column: character_col"
[1] "one_hot_encoder: It took me 0s to transform 1 column(s)."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
[1] "build_encoding: it took me: 0s to compute encoding for 1 character and factor columns."
[1] "build_encoding: I will compute encoding on 1 character and factor columns."
Saving _problems/test_generate_from_factor-80.R
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: grades."
[1] "target_encode: Start to encode columns according to target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: grades."
[1] "target_encode: Start to encode columns according to target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "build_target_encoding: Start to compute encoding for target_encoding according to col: target."
[1] "real_cols: col_2 aren't columns of the table, i do nothing for those variables"
[1] "col_2"
[1] "real_cols: col_2 aren't columns of types numeric i do nothing for those variables."
[1] "find_and_transform_numerics: It took me 0s to identify 2 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: It took me 0s to transform 2 column(s) to a numeric format."
[1] "find_and_transform_numerics: It took me 0s to identify 0 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: There are no numerics to transform.(If i missed something consider using set_col_as_numeric to transform it)"
[1] "prepare_set: step one: correcting mistakes."
[1] "fast_filter_variables: I check for constant columns."
[1] "fast_filter_variables: I check for columns in double."
[1] "fast_filter_variables: I check for columns that are bijections of another column."
[1] "fast_filter_variables: I delete 1 column(s) that are bijections of another column in data_set."
[1] "age" "fnlwgt" "capital_gain" "capital_loss" "hr_per_week"
[1] "un_factor: c(\"age\", \"fnlwgt\", \"capital_gain\", \"capital_loss\", \"hr_per_week\") aren't columns of types factor i do nothing for those variables."
[1] "un_factor: I will identify variable that are factor but shouldn't be."
[1] "un_factor: I un-factor education."
[1] "un_factor: I un-factor occupation."
[1] "un_factor: I un-factor country."
[1] "un_factor: It took me 0s to un-factor 3 column(s)."
[1] "find_and_transform_numerics: It took me 0s to identify 0 numerics column(s), i will set them as numerics"
[1] "find_and_transform_numerics: There are no numerics to transform.(If i missed something consider using set_col_as_numeric to transform it)"
[1] "find_and_transform_dates: It took me 1.17s to identify formats"
[1] "find_and_transform_dates: There are no dates to transform.\n (If i missed something please provide the date format in inputs or\n consider using set_col_as_date to transform it)."
[1] "prepare_set: step two: transforming data_set."
[1] "age" "type_employer" "fnlwgt" "education"
[5] "marital" "occupation" "relationship" "race"
[9] "sex" "capital_gain" "capital_loss" "hr_per_week"
[13] "country" "income"
[1] "prepare_set: c(\"age\", \"type_employer\", \"fnlwgt\", \"education\", \"marital\", \"occupation\", \"relationship\", \"race\", \"sex\", \"capital_gain\", \"capital_loss\", \"hr_per_week\", \"country\", \"income\") aren't columns of types date i do nothing for those variables."
[1] "generate_date_diffs: I will generate difference between dates."
[1] "generate_date_diffs: It took me 0s to create 0 column(s)."
[1] "generate_factor_from_date: I will create a factor column from each date column."
[1] "generate_factor_from_date: It took me 0s to transform 0 column(s)."
[1] "age" "type_employer" "fnlwgt" "marital"
[5] "relationship" "race" "sex" "capital_gain"
[9] "capital_loss" "hr_per_week" "income"
[1] "prepare_set: c(\"age\", \"type_employer\", \"fnlwgt\", \"marital\", \"relationship\", \"race\", \"sex\", \"capital_gain\", \"capital_loss\", \"hr_per_week\", \"income\") aren't columns of types character i do nothing for those variables."
Saving _problems/test_prepare_set-15.R
[1] "remove_sd_outlier: I start to filter categorical rare events"
[1] "remove_sd_outlier: dropped 1 row(s) that are rare event on num_col."
[1] "remove_sd_outlier: 1 have been dropped. It took 0.02 seconds. "
[1] "remove_sd_outlier: I start to filter categorical rare events"
[1] "remove_sd_outlier: dropped 0 row(s) that are rare event on num_col."
[1] "remove_sd_outlier: 0 have been dropped. It took 0 seconds. "
[1] "remove_rare_categorical: I start to filter categorical rare events"
[1] "remove_rare_categorical: dropped 1 row(s) that are rare event on cat_col."
[1] "remove_rare_categorical: 1 have been dropped. It took 0 seconds. "
[1] "remove_percentile_outlier: I start to filter categorical rare events"
[1] "remove_percentile_outlier: dropped 2 row(s) that are rare event on num_col."
[1] "remove_percentile_outlier: 2 have been dropped. It took 0 seconds. "
[1] "remove_percentile_outlier: I start to filter categorical rare events"
[1] "remove_percentile_outlier: dropped 2 row(s) that are rare event on num_col."
[1] "remove_percentile_outlier: 2 have been dropped. It took 0 seconds. "
[1] "same_shape: verify that every column is present."
[1] "same_shape: columns col_2 are missing, I create them."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_2 class was logical i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: the following columns are in data_set but not in reference_set: I drop them: "
[1] "col_2"
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was character i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was character i set it to c(\"POSIXct\", \"POSIXt\")."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: col_1 class had different levels than in reference_set I change it."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: col_1 class had different levels than in reference_set I change it."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was numeric i set it to weird_class."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: col_1 class was numeric i set it to weird_class."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: columns type_employer?, type_employerFederal-gov, type_employerLocal-gov, type_employerNever-worked, type_employerPrivate, type_employerSelf-emp-inc, type_employerSelf-emp-not-inc, type_employerState-gov, type_employerWithout-pay, education11th, education12th, education1st-4th, education5th-6th, education7th-8th, education9th, educationAssoc-acdm, educationAssoc-voc, educationBachelors, educationDoctorate, educationHS-grad, educationMasters, educationPreschool, educationProf-school, educationSome-college, maritalMarried-AF-spouse, maritalMarried-civ-spouse, maritalMarried-spouse-absent, maritalNever-married, maritalSeparated, maritalWidowed, occupationAdm-clerical, occupationArmed-Forces, occupationCraft-repair, occupationExec-managerial, occupationFarming-fishing, occupationHandlers-cleaners, occupationMachine-op-inspct, occupationOther-service, occupationPriv-house-serv, occupationProf-specialty, occupationProtective-serv, occupationSales, occupationTech-support, occupationTransport-moving, relationshipNot-in-family, relationshipOther-relative, relationshipOwn-child, relationshipUnmarried, relationshipWife, raceAsian-Pac-Islander, raceBlack, raceOther, raceWhite, sexMale, capital_loss1408, capital_loss1564, capital_loss1573, capital_loss1719, capital_loss1762, capital_loss1887, capital_loss1902, capital_loss2042, capital_loss2179, countryCambodia, countryCanada, countryChina, countryColumbia, countryCuba, countryDominican-Republic, countryEcuador, countryEl-Salvador, countryEngland, countryFrance, countryGermany, countryGreece, countryGuatemala, countryHaiti, countryHoland-Netherlands, countryHonduras, countryHong, countryHungary, countryIndia, countryIran, countryIreland, countryItaly, countryJamaica, countryJapan, countryLaos, countryMexico, countryNicaragua, countryOutlying-US(Guam-USVI-etc), countryPeru, countryPhilippines, countryPoland, countryPortugal, countryPuerto-Rico, countryScotland, countrySouth, countryTaiwan, countryThailand, countryTrinadad&Tobago, countryUnited-States, countryVietnam, countryYugoslavia, income>50K are missing, I create them."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: the following columns are in data_set but not in reference_set: I drop them: "
[1] "type_employer" "education" "marital" "occupation"
[5] "relationship" "race" "sex" "capital_loss"
[9] "country" "income"
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: age class was integer i set it to numeric."
[1] "same_shape: fnlwgt class was integer i set it to numeric."
[1] "same_shape: education_num class was integer i set it to numeric."
[1] "same_shape: capital_gain class was integer i set it to numeric."
[1] "same_shape: hr_per_week class was integer i set it to numeric."
[1] "same_shape: type_employer? class was logical i set it to numeric."
[1] "same_shape: type_employerFederal-gov class was logical i set it to numeric."
[1] "same_shape: type_employerLocal-gov class was logical i set it to numeric."
[1] "same_shape: type_employerNever-worked class was logical i set it to numeric."
[1] "same_shape: type_employerPrivate class was logical i set it to numeric."
[1] "same_shape: type_employerSelf-emp-inc class was logical i set it to numeric."
[1] "same_shape: type_employerSelf-emp-not-inc class was logical i set it to numeric."
[1] "same_shape: type_employerState-gov class was logical i set it to numeric."
[1] "same_shape: type_employerWithout-pay class was logical i set it to numeric."
[1] "same_shape: education11th class was logical i set it to numeric."
[1] "same_shape: education12th class was logical i set it to numeric."
[1] "same_shape: education1st-4th class was logical i set it to numeric."
[1] "same_shape: education5th-6th class was logical i set it to numeric."
[1] "same_shape: education7th-8th class was logical i set it to numeric."
[1] "same_shape: education9th class was logical i set it to numeric."
[1] "same_shape: educationAssoc-acdm class was logical i set it to numeric."
[1] "same_shape: educationAssoc-voc class was logical i set it to numeric."
[1] "same_shape: educationBachelors class was logical i set it to numeric."
[1] "same_shape: educationDoctorate class was logical i set it to numeric."
[1] "same_shape: educationHS-grad class was logical i set it to numeric."
[1] "same_shape: educationMasters class was logical i set it to numeric."
[1] "same_shape: educationPreschool class was logical i set it to numeric."
[1] "same_shape: educationProf-school class was logical i set it to numeric."
[1] "same_shape: educationSome-college class was logical i set it to numeric."
[1] "same_shape: maritalMarried-AF-spouse class was logical i set it to numeric."
[1] "same_shape: maritalMarried-civ-spouse class was logical i set it to numeric."
[1] "same_shape: maritalMarried-spouse-absent class was logical i set it to numeric."
[1] "same_shape: maritalNever-married class was logical i set it to numeric."
[1] "same_shape: maritalSeparated class was logical i set it to numeric."
[1] "same_shape: maritalWidowed class was logical i set it to numeric."
[1] "same_shape: occupationAdm-clerical class was logical i set it to numeric."
[1] "same_shape: occupationArmed-Forces class was logical i set it to numeric."
[1] "same_shape: occupationCraft-repair class was logical i set it to numeric."
[1] "same_shape: occupationExec-managerial class was logical i set it to numeric."
[1] "same_shape: occupationFarming-fishing class was logical i set it to numeric."
[1] "same_shape: occupationHandlers-cleaners class was logical i set it to numeric."
[1] "same_shape: occupationMachine-op-inspct class was logical i set it to numeric."
[1] "same_shape: occupationOther-service class was logical i set it to numeric."
[1] "same_shape: occupationPriv-house-serv class was logical i set it to numeric."
[1] "same_shape: occupationProf-specialty class was logical i set it to numeric."
[1] "same_shape: occupationProtective-serv class was logical i set it to numeric."
[1] "same_shape: occupationSales class was logical i set it to numeric."
[1] "same_shape: occupationTech-support class was logical i set it to numeric."
[1] "same_shape: occupationTransport-moving class was logical i set it to numeric."
[1] "same_shape: relationshipNot-in-family class was logical i set it to numeric."
[1] "same_shape: relationshipOther-relative class was logical i set it to numeric."
[1] "same_shape: relationshipOwn-child class was logical i set it to numeric."
[1] "same_shape: relationshipUnmarried class was logical i set it to numeric."
[1] "same_shape: relationshipWife class was logical i set it to numeric."
[1] "same_shape: raceAsian-Pac-Islander class was logical i set it to numeric."
[1] "same_shape: raceBlack class was logical i set it to numeric."
[1] "same_shape: raceOther class was logical i set it to numeric."
[1] "same_shape: raceWhite class was logical i set it to numeric."
[1] "same_shape: sexMale class was logical i set it to numeric."
[1] "same_shape: capital_loss1408 class was logical i set it to numeric."
[1] "same_shape: capital_loss1564 class was logical i set it to numeric."
[1] "same_shape: capital_loss1573 class was logical i set it to numeric."
[1] "same_shape: capital_loss1719 class was logical i set it to numeric."
[1] "same_shape: capital_loss1762 class was logical i set it to numeric."
[1] "same_shape: capital_loss1887 class was logical i set it to numeric."
[1] "same_shape: capital_loss1902 class was logical i set it to numeric."
[1] "same_shape: capital_loss2042 class was logical i set it to numeric."
[1] "same_shape: capital_loss2179 class was logical i set it to numeric."
[1] "same_shape: countryCambodia class was logical i set it to numeric."
[1] "same_shape: countryCanada class was logical i set it to numeric."
[1] "same_shape: countryChina class was logical i set it to numeric."
[1] "same_shape: countryColumbia class was logical i set it to numeric."
[1] "same_shape: countryCuba class was logical i set it to numeric."
[1] "same_shape: countryDominican-Republic class was logical i set it to numeric."
[1] "same_shape: countryEcuador class was logical i set it to numeric."
[1] "same_shape: countryEl-Salvador class was logical i set it to numeric."
[1] "same_shape: countryEngland class was logical i set it to numeric."
[1] "same_shape: countryFrance class was logical i set it to numeric."
[1] "same_shape: countryGermany class was logical i set it to numeric."
[1] "same_shape: countryGreece class was logical i set it to numeric."
[1] "same_shape: countryGuatemala class was logical i set it to numeric."
[1] "same_shape: countryHaiti class was logical i set it to numeric."
[1] "same_shape: countryHoland-Netherlands class was logical i set it to numeric."
[1] "same_shape: countryHonduras class was logical i set it to numeric."
[1] "same_shape: countryHong class was logical i set it to numeric."
[1] "same_shape: countryHungary class was logical i set it to numeric."
[1] "same_shape: countryIndia class was logical i set it to numeric."
[1] "same_shape: countryIran class was logical i set it to numeric."
[1] "same_shape: countryIreland class was logical i set it to numeric."
[1] "same_shape: countryItaly class was logical i set it to numeric."
[1] "same_shape: countryJamaica class was logical i set it to numeric."
[1] "same_shape: countryJapan class was logical i set it to numeric."
[1] "same_shape: countryLaos class was logical i set it to numeric."
[1] "same_shape: countryMexico class was logical i set it to numeric."
[1] "same_shape: countryNicaragua class was logical i set it to numeric."
[1] "same_shape: countryOutlying-US(Guam-USVI-etc) class was logical i set it to numeric."
[1] "same_shape: countryPeru class was logical i set it to numeric."
[1] "same_shape: countryPhilippines class was logical i set it to numeric."
[1] "same_shape: countryPoland class was logical i set it to numeric."
[1] "same_shape: countryPortugal class was logical i set it to numeric."
[1] "same_shape: countryPuerto-Rico class was logical i set it to numeric."
[1] "same_shape: countryScotland class was logical i set it to numeric."
[1] "same_shape: countrySouth class was logical i set it to numeric."
[1] "same_shape: countryTaiwan class was logical i set it to numeric."
[1] "same_shape: countryThailand class was logical i set it to numeric."
[1] "same_shape: countryTrinadad&Tobago class was logical i set it to numeric."
[1] "same_shape: countryUnited-States class was logical i set it to numeric."
[1] "same_shape: countryVietnam class was logical i set it to numeric."
[1] "same_shape: countryYugoslavia class was logical i set it to numeric."
[1] "same_shape: income>50K class was logical i set it to numeric."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "same_shape: verify that every column is present."
[1] "same_shape: drop unwanted columns."
[1] "same_shape: verify that every column is in the right type."
[1] "same_shape: verify that every factor as the right number of levels."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to scale 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to scale 1 numeric columns."
[1] "fast_scale: I will scale 1 numeric columns."
[1] "fast_scale: it took me: 0s to unscale 1 numeric columns."
[1] "build_scales: I will compute scale on 1 numeric columns."
[1] "build_scales: it took me: 0s to compute scale for 1 numeric columns."
[1] "set_col_as_numeric: I will set some columns as numeric"
[1] "set_col_as_numeric: I am doing the column char_col_1."
[1] "set_col_as_numeric: 0 NA have been created due to transformation to numeric."
[1] "set_col_as_numeric: I am doing the column char_col_2."
[1] "set_col_as_numeric: 0 NA have been created due to transformation to numeric."
[1] "set_col_as_character: I will set some columns as character"
[1] "set_col_as_character: I am doing the column numCol."
[1] "set_col_as_character: I am doing the column factorCol."
[1] "set_col_as_character: I am doing the column charcol."
[1] "set_col_as_character: charcol is a character, i do nothing."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date1."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: I am doing the column date2."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0s to transform 2 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date2."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column date1."
[1] "set_col_as_date:1 NA have been created due to transformation to Date."
[1] "set_col_as_date: it took me: 0.02s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: it took me: 0s to transform 0 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: Since i generated only NAs i set ID as it was before."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column ID."
[1] "set_col_as_date: ID doesn't seem to be a date, if it really is please provide format."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time_stamp_s."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_date: I will set some columns as Date."
[1] "set_col_as_date: I am doing the column time_stamp_ms."
[1] "set_col_as_date: it took me: 0s to transform 1 column(s) to Dates."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: it took me: 0s to transform 1 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: it took me: 0s to transform 1 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: I am doing the column col."
[1] "set_col_as_factor: col has more than 2 values, i don't transform it."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming numerical variables into factors when length(unique(col)) <= 10."
[1] "shape_set: Previous distribution of column types:"
col_class_init
factor integer
9 6
[1] "shape_set: Current distribution of column types:"
col_class_end
factor integer
9 6
[1] "set_col_as_factor: I will set some columns to factor."
[1] "set_col_as_factor: it took me: 0s to transform 0 column(s) to factor."
[1] "shape_set: Transforming logical into binaries.\n"
[1] "shape_set: Previous distribution of column types:"
col_class_init
logical
1
[1] "shape_set: Current distribution of column types:"
col_class_end
integer
1
[1] "which_are_constant: constantCol is constant."
[1] "which_are_constant: it took me 0s to identify 1 constant column(s)"
[1] "which_are_in_double: it took me 0s to identify 2 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 1 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 1 column(s) to drop."
[1] "which_are_in_double: it took me 0s to identify 0 column(s) to drop."
[1] "which_are_bijection: it took me 0.02s to identify 1 column(s) to drop."
[1] "which_are_bijection: education is a bijection of education_num. I put it in drop list."
[1] "which_are_bijection: it took me 0.02s to identify 1 column(s) to drop."
[1] "which_are_bijection: it took me 0s to identify 0 column(s) to drop."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column education."
[1] "which_are_included: are_50_or_more is included in column age."
[1] "which_are_included: constant is included in column sex."
[1] "which_are_included: sex is included in column fnlwgt."
[1] "which_are_included: income is included in column id."
[1] "which_are_included: race is included in column fnlwgt."
[1] "which_are_included: relationship is included in column id."
[1] "which_are_included: type_employer is included in column fnlwgt."
[1] "which_are_included: marital is included in column id."
[1] "which_are_included: occupation is included in column id."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column id."
[1] "which_are_included: capital_gain is included in column fnlwgt."
[1] "which_are_included: capital_loss is included in column fnlwgt."
[1] "which_are_included: country is included in column fnlwgt."
[1] "which_are_included: hr_per_week is included in column id."
[1] "which_are_included: age is included in column id."
[1] "which_are_included: mail is included in column id."
[1] "which_are_included: date2 is included in column id."
[1] "which_are_included: date1 is included in column id."
[1] "which_are_included: date3 is included in column date4."
[1] "which_are_included: date4 is included in column id."
[1] "which_are_included: num1 is included in column num3."
[1] "which_are_included: num3 is included in column id."
[1] "which_are_included: num2 is included in column id."
[1] "which_are_included: fnlwgt is included in column id."
[1] "which_are_included: constant is included in column sex."
[1] "which_are_included: sex is included in column fnlwgt."
[1] "which_are_included: income is included in column id."
[1] "which_are_included: race is included in column fnlwgt."
[1] "which_are_included: relationship is included in column id."
[1] "which_are_included: type_employer is included in column fnlwgt."
[1] "which_are_included: marital is included in column id."
[1] "which_are_included: occupation is included in column id."
[1] "which_are_included: education is included in column education_num."
[1] "which_are_included: education_num is included in column id."
[1] "which_are_included: capital_gain is included in column fnlwgt."
[1] "which_are_included: capital_loss is included in column fnlwgt."
[1] "which_are_included: country is included in column fnlwgt."
[1] "which_are_included: hr_per_week is included in column id."
[1] "which_are_included: age is included in column id."
[1] "which_are_included: mail is included in column id."
[1] "which_are_included: date2 is included in column id."
[1] "which_are_included: date1 is included in column id."
[1] "which_are_included: date3 is included in column date4."
[1] "which_are_included: date4 is included in column id."
[1] "which_are_included: num1 is included in column num3."
[1] "which_are_included: num3 is included in column id."
[1] "which_are_included: num2 is included in column id."
[1] "which_are_included: fnlwgt is included in column id."
[ FAIL 7 | WARN 0 | SKIP 1 | PASS 322 ]
══ Skipped tests (1) ═══════════════════════════════════════════════════════════
• empty test (1):
══ Failed tests ════════════════════════════════════════════════════════════════
── Error ('test_generate_from_character.R:13:5'): generate_from_character: don't drop so generate 3 new cols ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 3/3 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, cols = "character_col") at test_generate_from_character.R:13:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_character.R:26:5'): generate_from_character: drop generate 3 col and suppress one ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, drop = TRUE) at test_generate_from_character.R:26:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_character.R:40:5'): generate_from_character: don't reduce number of rows even with NA ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_character(data_set, cols = "character_col") at test_generate_from_character.R:40:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:14:5'): generate_from_factor: drop: functionnal test on reference set ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 25/25 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_factor(...) at test_generate_from_factor.R:14:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:27:5'): generate_from_factor: test don't drop => keep original col ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 2/2 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::generate_from_factor(...) at test_generate_from_factor.R:27:5
2. ├─data_set[, `:=`(c(new_col), .N), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_generate_from_factor.R:80:5'): build_encoding: min_frequency allows to drop rare values ──
Error in ``[.data.table`(data_set, , `:=`(c("freq"), (.N/nrow(data_set))), by = col)`: attempt access index 1/1 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::build_encoding(...) at test_generate_from_factor.R:80:5
2. ├─data_set[, `:=`(c("freq"), (.N/nrow(data_set))), by = col]
3. └─data.table:::`[.data.table`(...)
── Error ('test_prepare_set.R:14:5'): prepare_set: functionnal test: test full pipeline. Should give result with as many rows as unique key. ──
Error in ``[.data.table`(data_set, , `:=`(c(new_col), .N), by = col)`: attempt access index 15/15 in VECTOR_ELT
Backtrace:
▆
1. └─dataPreparation::prepare_set(...) at test_prepare_set.R:14:5
2. └─dataPreparation::generate_from_character(...)
3. ├─data_set[, `:=`(c(new_col), .N), by = col]
4. └─data.table:::`[.data.table`(...)
[ FAIL 7 | WARN 0 | SKIP 1 | PASS 322 ]
Error:
! Test failures.
Execution halted
Flavor: r-devel-windows-x86_64
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.