Methods, Calibration, and Formula Workflows

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

SelectBoost.FDA now exposes a broader modeling layer on top of the FDA-native design object:

Build a design from a formula

library(SelectBoost.FDA)
data("spectra_example", package = "SelectBoost.FDA")

formula_data <- list(
  y = spectra_example$response,
  signal = fda_grid(
    spectra_example$predictors$signal,
    argvals = spectra_example$grid,
    name = "signal",
    unit = "nm"
  ),
  nuisance = fda_grid(
    spectra_example$predictors$nuisance,
    argvals = spectra_example$grid,
    name = "nuisance",
    unit = "nm"
  ),
  age = spectra_example$scalar_covariates$age,
  treatment = factor(spectra_example$scalar_covariates$treatment)
)

design <- fda_design_formula(
  y ~ signal + nuisance + age + treatment,
  data = formula_data,
  transforms = list(
    signal = fda_fpca(n_components = 3),
    nuisance = fda_bspline(df = 5)
  ),
  scalar_transform = fda_standardize(),
  family = "gaussian"
)

design
#> FDA design
#>   observations: 80 
#>   features: 11 
#>   functional predictors: 2 
#>   scalar covariates: 3 
#>   family: gaussian 
#>   response available: TRUE
selection_map(design, level = "basis")
#>                 predictor representation basis_type source_representation
#> nuisance.spline  nuisance          basis     spline                  grid
#> signal.fpca        signal          basis       fpca                  grid
#>                 n_components first_component last_component         components
#> nuisance.spline            5              B1             B5 B1, B2, B3, B4, B5
#> signal.fpca                3             PC1            PC3      PC1, PC2, PC3
#>                 domain_start domain_end
#> nuisance.spline         1100       2500
#> signal.fpca             1100       2500

Calibrate modeling choices

These helpers run actual fits over user-defined grids and summarize the result.

cal_stability <- calibrate_stability_selection(
  design,
  selector = "lasso",
  sample_fraction_grid = c(0.5, 0.7),
  cutoff_grid = c(0.5, 0.7),
  B = 8,
  seed = 1
)

cal_width <- calibrate_interval_width(
  design,
  widths = c(4, 6),
  selector = "lasso",
  B = 8,
  cutoff = 0.5,
  seed = 2
)

cal_selectboost <- calibrate_selectboost(
  design,
  selector = "lasso",
  c0_grid = c(0.7, 0.4),
  B = 4
)

cal_stability
#> FDA calibration grid
#>   type: stability_selection 
#>   rows: 4
cal_stability$grid
#>   sample_fraction cutoff n_selected_features n_selected_groups
#> 1             0.5    0.5                   5                 4
#> 2             0.7    0.5                   6                 4
#> 3             0.5    0.7                   4                 3
#> 4             0.7    0.7                   5                 4
#>   mean_feature_frequency max_feature_frequency mean_group_frequency
#> 1              0.5227273                     1                0.750
#> 2              0.6136364                     1                0.800
#> 3              0.4886364                     1                0.725
#> 4              0.5795455                     1                0.825
#>   max_group_frequency
#> 1                   1
#> 2                   1
#> 3                   1
#> 4                   1
cal_width$grid
#>   width step overlap n_selected_features n_selected_groups
#> 1     4    4   FALSE                   6                 4
#> 2     6    6   FALSE                   5                 4
#>   mean_feature_frequency max_feature_frequency mean_group_frequency
#> 1              0.5340909                     1            0.6666667
#> 2              0.4886364                     1            0.7250000
#>   max_group_frequency
#> 1                   1
#> 2                   1
cal_selectboost$grid
#>                c0 n_selected_features n_selected_groups mean_feature_selection
#> c0 = 0.4 c0 = 0.4                  11                 5              0.6818182
#> c0 = 0.7 c0 = 0.7                   9                 5              0.6818182
#>          max_feature_selection mean_group_selection max_group_selection
#> c0 = 0.4                     1                 0.74                   1
#> c0 = 0.7                     1                 0.86                   1

Compare methods on one design

comparison <- compare_selection_methods(
  design,
  methods = c("stability", "interval", "selectboost"),
  stability_args = list(selector = "lasso", B = 8, cutoff = 0.5, seed = 3),
  interval_args = list(selector = "lasso", width = 5, B = 8, cutoff = 0.5, seed = 4),
  selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
)

comparison
#> FDA method comparison
#>   methods: stability, interval, selectboost 
#>   rows: 4
summary(comparison)
#> FDA method comparison summary
#>   methods: stability, interval, selectboost 
#>       method n_selected_features n_selected_groups mean_feature_frequency
#>    stability                   5                 4              0.4886364
#>     interval                   5                 4              0.5227273
#>  selectboost                  10                 4                     NA
#>  selectboost                   9                 5                     NA
#>  max_feature_frequency mean_group_frequency max_group_frequency width       c0
#>                      1                0.725                   1    NA     <NA>
#>                      1                0.725                   1     3     <NA>
#>                     NA                   NA                  NA    NA c0 = 0.4
#>                     NA                   NA                  NA    NA c0 = 0.7
#>  mean_feature_selection max_feature_selection mean_group_selection
#>                      NA                    NA                   NA
#>                      NA                    NA                   NA
#>               0.6590909                     1                 0.69
#>               0.6590909                     1                 0.81
#>  max_group_selection
#>                   NA
#>                   NA
#>                    1
#>                    1
head(selection_map(comparison, level = "group"))
#>    predictor group_id       group representation basis_type
#> 1     signal        1      signal          basis       fpca
#> 2   nuisance        2    nuisance          basis     spline
#> 3        age        3         age         scalar           
#> 4 treatment0        4  treatment0         scalar           
#> 5 treatment1        5  treatment1         scalar           
#> 6     signal        1 signal[1:3]          basis       fpca
#>   source_representation n_features start_position end_position start_argval
#> 1                  grid          3              1            3          PC1
#> 2                  grid          5              1            5           B1
#> 3                scalar          1              1            1          age
#> 4                scalar          1              1            1   treatment0
#> 5                scalar          1              1            1   treatment1
#> 6                  grid          3              1            3          PC1
#>   end_argval domain_start domain_end mean_feature_frequency
#> 1        PC3         1100       2500                  0.875
#> 2         B5         1100       2500                  0.150
#> 3        age          age        age                  0.875
#> 4 treatment0   treatment0 treatment0                  1.000
#> 5 treatment1   treatment1 treatment1                  0.125
#> 6        PC3         1100       2500                  0.875
#>   max_feature_frequency selected_features group_frequency group_selected
#> 1                 1.000                 3           1.000           TRUE
#> 2                 0.375                 0           0.625           TRUE
#> 3                 0.875                 1           0.875           TRUE
#> 4                 1.000                 1           1.000           TRUE
#> 5                 0.125                 0           0.125          FALSE
#> 6                 1.000                 3           1.000           TRUE
#>      method interval_start interval_end interval_label   c0 mean_selection
#> 1 stability             NA           NA           <NA> <NA>             NA
#> 2 stability             NA           NA           <NA> <NA>             NA
#> 3 stability             NA           NA           <NA> <NA>             NA
#> 4 stability             NA           NA           <NA> <NA>             NA
#> 5 stability             NA           NA           <NA> <NA>             NA
#> 6  interval              1            3    signal[1:3] <NA>             NA
#>   max_selection
#> 1            NA
#> 2            NA
#> 3            NA
#> 4            NA
#> 5            NA
#> 6            NA

Switch selector backends

The selector argument now accepts common aliases such as "lasso", "group_lasso", and "sparse_group_lasso".

fit_stability(
  design,
  selector = "sparse_group_lasso",
  B = 8,
  cutoff = 0.5,
  seed = 5
)
#> FDA stability selection
#>   family: gaussian 
#>   features: 11 
#>   groups: 5 
#>   replicates: 8 
#>   cutoff: 0.5

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.