The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
SelectBoost.FDA now includes a validation layer for
repeated simulations, method benchmarks, plain-SelectBoost baselines,
and direct advantage summaries for FDA-aware
SelectBoost.
library(SelectBoost.FDA)
sim_grid <- simulate_fda_scenario(
n = 60,
grid_length = 30,
scenario = "localized_dense",
representation = "grid",
seed = 1
)
sim_grid
#> FDA simulation data
#> observations: 60
#> features: 62
#> active features: 13
#> scenario: localized_dense
#> confounding strength: 0
#> active region scale: 1
#> local correlation: 0
#> active predictors: signal, age, treatment
head(selection_map(sim_grid$design))
#> feature predictor block position argval representation
#> signal.1 signal_1 signal signal 1 0 grid
#> signal.2 signal_2 signal signal 2 0.0344827586206897 grid
#> signal.3 signal_3 signal signal 3 0.0689655172413793 grid
#> signal.4 signal_4 signal signal 4 0.103448275862069 grid
#> signal.5 signal_5 signal signal 5 0.137931034482759 grid
#> signal.6 signal_6 signal signal 6 0.172413793103448 grid
#> basis_type transform source_predictor source_representation
#> signal.1 <NA> identity signal grid
#> signal.2 <NA> identity signal grid
#> signal.3 <NA> identity signal grid
#> signal.4 <NA> identity signal grid
#> signal.5 <NA> identity signal grid
#> signal.6 <NA> identity signal grid
#> source_position_start source_position_end source_argval_start
#> signal.1 1 1 0
#> signal.2 2 2 0.0344827586206897
#> signal.3 3 3 0.0689655172413793
#> signal.4 4 4 0.103448275862069
#> signal.5 5 5 0.137931034482759
#> signal.6 6 6 0.172413793103448
#> source_argval_end domain_start domain_end component
#> signal.1 0 0 0 <NA>
#> signal.2 0.0344827586206897 0.0344827586206897 0.0344827586206897 <NA>
#> signal.3 0.0689655172413793 0.0689655172413793 0.0689655172413793 <NA>
#> signal.4 0.103448275862069 0.103448275862069 0.103448275862069 <NA>
#> signal.5 0.137931034482759 0.137931034482759 0.137931034482759 <NA>
#> signal.6 0.172413793103448 0.172413793103448 0.172413793103448 <NA>
#> unit feature_index basis_component domain_label
#> signal.1 <NA> 1 <NA> 0
#> signal.2 <NA> 2 <NA> 0.0344827586206897
#> signal.3 <NA> 3 <NA> 0.0689655172413793
#> signal.4 <NA> 4 <NA> 0.103448275862069
#> signal.5 <NA> 5 <NA> 0.137931034482759
#> signal.6 <NA> 6 <NA> 0.172413793103448
sim_grid$truth$active_predictors
#> [1] "signal" "age" "treatment"The returned object keeps both the fitted fda_design and
the mapped truth for the transformed feature space.
study_dense <- run_simulation_study(
n_rep = 2,
simulate_args = list(
n = 50,
grid_length = 28,
scenario = "localized_dense",
representation = "basis"
),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group", "basis"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 4),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 10
)
study_smooth <- run_simulation_study(
n_rep = 2,
simulate_args = list(
n = 50,
grid_length = 28,
scenario = "distributed_smooth",
representation = "basis"
),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group", "basis"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 14),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 20
)
summarise_benchmark_advantage(
study_dense,
target = "selectboost",
reference = c("plain_selectboost", "stability"),
level = "feature",
metric = "f1"
)
#> scenario
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 localized_dense
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 localized_dense
#> representation
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 basis
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 basis
#> family
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 gaussian
#> level
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 feature
#> target
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 selectboost
#> reference
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 stability
#> metric
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 f1
#> n_rep
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 2
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 2
#> target_value_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.7638889
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.7638889
#> reference_value_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.7500000
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.6923077
#> delta_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.01388889
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.07158120
#> delta_sd
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.09035253
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.12842751
#> win_rate
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.5
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.5
summarise_benchmark_advantage(
study_smooth,
target = "selectboost",
reference = c("plain_selectboost", "stability"),
level = "feature",
metric = "f1"
)
#> scenario
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 distributed_smooth
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 distributed_smooth
#> representation
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 basis
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 basis
#> family
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 gaussian
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 gaussian
#> level
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 feature
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 feature
#> target
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 selectboost
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 selectboost
#> reference
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 stability
#> metric
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 f1
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 f1
#> n_rep
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 2
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 2
#> target_value_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.8117647
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.8117647
#> reference_value_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.8444444
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.5000000
#> delta_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 -0.03267974
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.31176471
#> delta_sd
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.07949174
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.01663781
#> win_rate
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.5
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 1.0The repeated-study summary reports the mean and standard deviation of
recovery metrics by method, evaluation level, scenario, and
c0 when applicable. In practice, the
localized_dense setting is the most direct stress test for
the FDA-aware grouping built into selectboost_fda().
sensitivity <- run_selectboost_sensitivity_study(
n_rep = 1,
simulate_grid = data.frame(
scenario = c("localized_dense", "confounded_blocks"),
confounding_strength = c(0.4, 0.9),
active_region_scale = c(0.8, 0.7),
local_correlation = c(1, 2),
stringsAsFactors = FALSE
),
selectboost_grid = data.frame(
association_method = c("correlation", "hybrid", "interval"),
bandwidth = c(NA, 4, 4),
stringsAsFactors = FALSE
),
simulate_args = list(n = 50, grid_length = 28, representation = "grid"),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 40),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 50
)
summarise_benchmark_advantage(
sensitivity,
target = "selectboost",
reference = "plain_selectboost",
level = "feature",
metric = "f1"
)
#> scenario
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 confounded_blocks
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 confounded_blocks
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 localized_dense
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 localized_dense
#> representation
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 grid
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 grid
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 grid
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 grid
#> family
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 gaussian
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 gaussian
#> association_method
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 hybrid
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 interval
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 hybrid
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 interval
#> bandwidth
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 4
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 4
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 4
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 4
#> confounding_strength
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.9
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.9
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4
#> active_region_scale
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.7
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.7
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.8
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.8
#> local_correlation
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 2
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 2
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> level
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 feature
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 feature
#> target
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 selectboost
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 selectboost
#> reference
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> metric
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 f1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 f1
#> n_rep
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> target_value_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.6206897
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.5454545
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5517241
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4444444
#> reference_value_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.5294118
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.4736842
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5517241
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5625000
#> delta_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.09127789
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.07177033
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.00000000
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 -0.11805556
#> delta_sd
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> win_rate
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0This is the intended benchmark workflow when the goal is to show when
FDA-aware grouping matters. The summary table keeps
association_method, bandwidth,
confounding_strength, active_region_scale, and
local_correlation as explicit columns, so it is
straightforward to isolate the settings where
selectboost_fda() gains over the plain baseline.
The repository also ships a larger saved sensitivity study generated
by tools/run_selectboost_sensitivity_study.R. That script
runs a broader sweep and writes reusable benchmark summaries to
inst/extdata/benchmarks/.
benchmark_dir <- system.file("extdata", "benchmarks", package = "SelectBoost.FDA")
top_feature_settings <- utils::read.csv(
file.path(benchmark_dir, "selectboost_sensitivity_top_settings.csv"),
stringsAsFactors = FALSE
)
utils::head(
top_feature_settings[
,
c(
"scenario",
"confounding_strength",
"active_region_scale",
"local_correlation",
"association_method",
"bandwidth",
"selectboost_f1_mean",
"plain_selectboost_f1_mean",
"delta_mean",
"win_rate"
)
],
10
)
#> scenario confounding_strength active_region_scale local_correlation
#> 1 confounded_blocks 0.6 0.5 2
#> 2 confounded_blocks 1.0 0.8 2
#> 3 confounded_blocks 0.6 0.8 2
#> 4 localized_dense 0.6 0.5 2
#> 5 confounded_blocks 0.6 0.5 2
#> 6 confounded_blocks 0.6 0.5 2
#> 7 confounded_blocks 1.0 0.5 0
#> 8 localized_dense 1.0 0.8 2
#> 9 confounded_blocks 1.0 0.5 2
#> 10 localized_dense 0.6 0.8 2
#> association_method bandwidth selectboost_f1_mean plain_selectboost_f1_mean
#> 1 interval 8 0.5362319 0.4087266
#> 2 hybrid 4 0.5885135 0.4826750
#> 3 hybrid 4 0.5833671 0.4944862
#> 4 neighborhood 4 0.4972542 0.4144859
#> 5 hybrid 4 0.5429293 0.4657088
#> 6 neighborhood 4 0.5072823 0.4322990
#> 7 interval 8 0.5323457 0.4575499
#> 8 neighborhood 4 0.5635386 0.4924953
#> 9 neighborhood 4 0.4655172 0.3983586
#> 10 interval 8 0.5392157 0.4769314
#> delta_mean win_rate
#> 1 0.12750533 1.0000000
#> 2 0.10583853 1.0000000
#> 3 0.08888092 1.0000000
#> 4 0.08276831 0.6666667
#> 5 0.07722048 0.6666667
#> 6 0.07498337 1.0000000
#> 7 0.07479582 1.0000000
#> 8 0.07104330 0.6666667
#> 9 0.06715866 1.0000000
#> 10 0.06228427 0.6666667The key comparison columns are selectboost_f1_mean,
plain_selectboost_f1_mean, and delta_mean.
This makes the algorithm comparison explicit at the feature-selection
level while keeping the FDA-specific settings attached to each row.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.