# nolint start
library(mlexperiments)
The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
# nolint start
library(mlexperiments)
See https://github.com/kapsner/mlexperiments/blob/main/R/learner_knn.R for implementation details.
library(mlbench)
data("DNA")
<- DNA |>
dataset ::as.data.table() |>
data.tablena.omit()
<- colnames(dataset)[1:180]
feature_cols <- "Class" target_col
<- 123
seed if (isTRUE(as.logical(Sys.getenv("_R_CHECK_LIMIT_CORES_")))) {
# on cran
<- 2L
ncores else {
} <- ifelse(
ncores test = parallel::detectCores() > 4,
yes = 4L,
no = ifelse(
test = parallel::detectCores() < 2L,
yes = 1L,
no = parallel::detectCores()
)
)
}options("mlexperiments.bayesian.max_init" = 10L)
<- splitTools::partition(
data_split y = dataset[, get(target_col)],
p = c(train = 0.7, test = 0.3),
type = "stratified",
seed = seed
)
<- model.matrix(
train_x ~ -1 + .,
$train, .SD, .SDcols = feature_cols]
dataset[data_split
)<- dataset[data_split$train, get(target_col)]
train_y
<- model.matrix(
test_x ~ -1 + .,
$test, .SD, .SDcols = feature_cols]
dataset[data_split
)<- dataset[data_split$test, get(target_col)] test_y
<- splitTools::create_folds(
fold_list y = train_y,
k = 3,
type = "stratified",
seed = seed
)
# required learner arguments, not optimized
<- list(
learner_args l = 2,
test = parse(text = "fold_test$x"),
use.all = FALSE
)
# set arguments for predict function and performance metric,
# required for mlexperiments::MLCrossValidation and
# mlexperiments::MLNestedCV
<- list(type = "response")
predict_args <- metric("bacc")
performance_metric <- NULL
performance_metric_args <- FALSE
return_models
# required for grid search and initialization of bayesian optimization
<- expand.grid(
parameter_grid k = seq(4, 68, 6)
)# reduce to a maximum of 10 rows
if (nrow(parameter_grid) > 10) {
set.seed(123)
<- sample(seq_len(nrow(parameter_grid)), 10, FALSE)
sample_rows <- kdry::mlh_subset(parameter_grid, sample_rows)
parameter_grid
}
# required for bayesian optimization
<- list(k = c(2L, 80L))
parameter_bounds <- list(
optim_args iters.n = ncores,
kappa = 3.5,
acq = "ucb"
)
<- mlexperiments::MLTuneParameters$new(
tuner learner = LearnerKnn$new(),
strategy = "grid",
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
tuner$learner_args <- learner_args
tuner$split_type <- "stratified"
tuner
$set_data(
tunerx = train_x,
y = train_y
)
<- tuner$execute(k = 3)
tuner_results_grid #>
#> Parameter settings [===================>------------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>---------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [======================================>-----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [================================================>-------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [==========================================================>---------------------------------------] 6/10 ( 60%)
#> Parameter settings [====================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [=============================================================================>--------------------] 8/10 ( 80%)
#> Parameter settings [=======================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [=================================================================================================] 10/10 (100%)
head(tuner_results_grid)
#> setting_id metric_optim_mean k l use.all
#> 1: 1 0.1669134 16 2 FALSE
#> 2: 2 0.1256584 64 2 FALSE
#> 3: 3 0.1870928 10 2 FALSE
#> 4: 4 0.1364111 34 2 FALSE
#> 5: 5 0.1243125 58 2 FALSE
#> 6: 6 0.1462841 28 2 FALSE
<- mlexperiments::MLTuneParameters$new(
tuner learner = LearnerKnn$new(),
strategy = "bayesian",
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
tuner$parameter_bounds <- parameter_bounds
tuner
$learner_args <- learner_args
tuner$optim_args <- optim_args
tuner
$split_type <- "stratified"
tuner
$set_data(
tunerx = train_x,
y = train_y
)
<- tuner$execute(k = 3)
tuner_results_bayesian #>
#> Registering parallel backend using 4 cores.
head(tuner_results_bayesian)
#> Epoch setting_id k gpUtility acqOptimum inBounds Elapsed Score metric_optim_mean errorMessage l use.all
#> 1: 0 1 16 NA FALSE TRUE 1.061 -0.1651140 0.1651140 NA 2 FALSE
#> 2: 0 2 64 NA FALSE TRUE 1.131 -0.1261065 0.1261065 NA 2 FALSE
#> 3: 0 3 10 NA FALSE TRUE 1.060 -0.1835086 0.1835086 NA 2 FALSE
#> 4: 0 4 34 NA FALSE TRUE 1.074 -0.1377516 0.1377516 NA 2 FALSE
#> 5: 0 5 58 NA FALSE TRUE 1.101 -0.1247624 0.1247624 NA 2 FALSE
#> 6: 0 6 28 NA FALSE TRUE 1.046 -0.1462823 0.1462823 NA 2 FALSE
<- mlexperiments::MLCrossValidation$new(
validator learner = LearnerKnn$new(),
fold_list = fold_list,
ncores = ncores,
seed = seed
)
$learner_args <- tuner$results$best.setting[-1]
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator
$set_data(
validatorx = train_x,
y = train_y
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> CV fold: Fold2
#> CV progress [======================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> CV fold: Fold3
#> CV progress [==========================================================================================================] 3/3 (100%)
head(validator_results)
#> fold performance k l use.all
#> 1: Fold1 0.8931022 58 2 FALSE
#> 2: Fold2 0.8445084 58 2 FALSE
#> 3: Fold3 0.9010913 58 2 FALSE
<- mlexperiments::MLNestedCV$new(
validator learner = LearnerKnn$new(),
strategy = "grid",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator
$set_data(
validatorx = train_x,
y = train_y
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> Parameter settings [===================>------------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>---------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [======================================>-----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [================================================>-------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [==========================================================>---------------------------------------] 6/10 ( 60%)
#> Parameter settings [====================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [=============================================================================>--------------------] 8/10 ( 80%)
#> Parameter settings [=======================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [=================================================================================================] 10/10 (100%)
#> CV fold: Fold2
#> CV progress [======================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Parameter settings [===================>------------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>---------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [======================================>-----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [================================================>-------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [==========================================================>---------------------------------------] 6/10 ( 60%)
#> Parameter settings [====================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [=============================================================================>--------------------] 8/10 ( 80%)
#> Parameter settings [=======================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [=================================================================================================] 10/10 (100%)
#> CV fold: Fold3
#> CV progress [==========================================================================================================] 3/3 (100%)
#>
#> Parameter settings [===================>------------------------------------------------------------------------------] 2/10 ( 20%)
#> Parameter settings [============================>---------------------------------------------------------------------] 3/10 ( 30%)
#> Parameter settings [======================================>-----------------------------------------------------------] 4/10 ( 40%)
#> Parameter settings [================================================>-------------------------------------------------] 5/10 ( 50%)
#> Parameter settings [==========================================================>---------------------------------------] 6/10 ( 60%)
#> Parameter settings [====================================================================>-----------------------------] 7/10 ( 70%)
#> Parameter settings [=============================================================================>--------------------] 8/10 ( 80%)
#> Parameter settings [=======================================================================================>----------] 9/10 ( 90%)
#> Parameter settings [=================================================================================================] 10/10 (100%)
head(validator_results)
#> fold performance k l use.all
#> 1: Fold1 0.8863818 64 2 FALSE
#> 2: Fold2 0.8396360 64 2 FALSE
#> 3: Fold3 0.9000926 64 2 FALSE
<- mlexperiments::MLNestedCV$new(
validator learner = LearnerKnn$new(),
strategy = "bayesian",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator
$parameter_bounds <- parameter_bounds
validator$optim_args <- optim_args
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator
$set_data(
validatorx = train_x,
y = train_y
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold2
#> CV progress [======================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold3
#> CV progress [==========================================================================================================] 3/3 (100%)
#> Registering parallel backend using 4 cores.
head(validator_results)
#> fold performance k l use.all
#> 1: Fold1 0.8702444 28 2 FALSE
#> 2: Fold2 0.8396360 64 2 FALSE
#> 3: Fold3 0.9010913 58 2 FALSE
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.