The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

LightGBM models

Function Works
tidypredict_fit(), tidypredict_sql(), parse_model()
tidypredict_to_column()
tidypredict_test()
tidypredict_interval(), tidypredict_sql_interval()
parsnip

tidypredict_ functions

library(lightgbm)

# Prepare data
X <- data.matrix(mtcars[, c("mpg", "cyl", "disp")])
y <- mtcars$hp

dtrain <- lgb.Dataset(X, label = y, colnames = c("mpg", "cyl", "disp"))

model <- lgb.train(
  params = list(
    num_leaves = 4L,
    learning_rate = 0.5,
    objective = "regression",
    min_data_in_leaf = 1L
  ),
  data = dtrain,
  nrounds = 10L,
  verbose = -1L
)

Supported objectives

LightGBM supports many objective functions. The following objectives are supported by tidypredict:

Regression objectives (identity transform)

Regression objectives (exp transform)

Binary classification (sigmoid transform)

Multiclass classification

Binary classification example

X_bin <- data.matrix(mtcars[, c("mpg", "cyl", "disp")])
y_bin <- mtcars$am

dtrain_bin <- lgb.Dataset(X_bin, label = y_bin, colnames = c("mpg", "cyl", "disp"))

model_bin <- lgb.train(
  params = list(
    num_leaves = 4L,
    learning_rate = 0.5,
    objective = "binary",
    min_data_in_leaf = 1L
  ),
  data = dtrain_bin,
  nrounds = 10L,
  verbose = -1L
)

tidypredict_test(model_bin, xg_df = X_bin)
#> tidypredict test results
#> Difference threshold: 1e-12
#> 
#>  All results are within the difference threshold

Multiclass classification

For multiclass models, tidypredict_fit() returns a named list of formulas, one for each class:

X_iris <- data.matrix(iris[, 1:4])
colnames(X_iris) <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
y_iris <- as.integer(iris$Species) - 1L

dtrain_iris <- lgb.Dataset(X_iris, label = y_iris, colnames = colnames(X_iris))

model_multi <- lgb.train(
  params = list(
    num_leaves = 4L,
    learning_rate = 0.5,
    objective = "multiclass",
    num_class = 3L,
    min_data_in_leaf = 1L
  ),
  data = dtrain_iris,
  nrounds = 5L,
  verbose = -1L
)

fit_formulas <- tidypredict_fit(model_multi)
names(fit_formulas)
#> [1] "class_0" "class_1" "class_2"

Each formula produces the predicted probability for that class:

iris %>%
  mutate(
    prob_setosa = !!fit_formulas$class_0,
    prob_versicolor = !!fit_formulas$class_1,
    prob_virginica = !!fit_formulas$class_2
  ) %>%
  select(Species, starts_with("prob_")) %>%
  head()
#>   Species prob_setosa prob_versicolor prob_virginica
#> 1  setosa   0.9786973      0.01046491      0.0108378
#> 2  setosa   0.9786973      0.01046491      0.0108378
#> 3  setosa   0.9786973      0.01046491      0.0108378
#> 4  setosa   0.9786973      0.01046491      0.0108378
#> 5  setosa   0.9786973      0.01046491      0.0108378
#> 6  setosa   0.9786973      0.01046491      0.0108378

Note: tidypredict_test() does not support multiclass models. Use tidypredict_fit() directly.

Categorical features

LightGBM supports native categorical features. When a feature is marked as categorical, tidypredict generates appropriate %in% conditions:

set.seed(123)
n <- 200
cat_data <- data.frame(
  cat_feat = sample(0:3, n, replace = TRUE),
  y = NA
)
cat_data$y <- ifelse(cat_data$cat_feat %in% c(0, 1), 10, -10) + rnorm(n, sd = 2)

X_cat <- matrix(cat_data$cat_feat, ncol = 1)
colnames(X_cat) <- "cat_feat"

dtrain_cat <- lgb.Dataset(
  X_cat,
  label = cat_data$y,
  categorical_feature = "cat_feat"
)

model_cat <- lgb.train(
  params = list(
    num_leaves = 4L,
    learning_rate = 1.0,
    objective = "regression",
    min_data_in_leaf = 1L
  ),
  data = dtrain_cat,
  nrounds = 2L,
  verbose = -1L
)

tidypredict_fit(model_cat)
#> case_when(cat_feat %in% 0:1 ~ 9.22111156962135, .default = -9.19527530561794) + 
#>     case_when(cat_feat %in% 0:1 ~ 0.837108638881579, .default = -0.837108347632668)

parsnip

parsnip fitted models (via the bonsai package) are also supported by tidypredict:

library(parsnip)
library(bonsai)

p_model <- boost_tree(
  trees = 10,
  tree_depth = 3,
  min_n = 1
) %>%
  set_engine("lightgbm") %>%
  set_mode("regression") %>%
  fit(hp ~ mpg + cyl + disp, data = mtcars)

# Extract the underlying lgb.Booster
lgb_model <- p_model$fit

tidypredict_test(lgb_model, xg_df = X)
#> tidypredict test results
#> Difference threshold: 1e-12
#> 
#>  All results are within the difference threshold

Parse model spec

Here is an example of the model spec:

pm <- parse_model(model)
str(pm, 2)
#> List of 2
#>  $ general:List of 9
#>   ..$ model                 : chr "lgb.Booster"
#>   ..$ type                  : chr "lgb"
#>   ..$ version               : num 3
#>   ..$ params                :List of 8
#>   ..$ feature_names         : chr [1:3] "mpg" "cyl" "disp"
#>   ..$ nfeatures             : int 3
#>   ..$ num_class             : int 1
#>   ..$ num_tree_per_iteration: int 1
#>   ..$ niter                 : int 10
#>  $ trees  :List of 10
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>   ..$ :List of 4
#>  - attr(*, "class")= chr [1:3] "parsed_model" "pm_lgb" "list"
str(pm$trees[1])
#> List of 1
#>  $ :List of 4
#>   ..$ :List of 3
#>   .. ..$ prediction: num 122
#>   .. ..$ linear    : NULL
#>   .. ..$ path      :List of 1
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "cyl"
#>   .. .. .. ..$ val    : num 7
#>   .. .. .. ..$ op     : chr "less-equal"
#>   .. .. .. ..$ missing: logi TRUE
#>   ..$ :List of 3
#>   .. ..$ prediction: num 241
#>   .. ..$ linear    : NULL
#>   .. ..$ path      :List of 3
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "cyl"
#>   .. .. .. ..$ val    : num 7
#>   .. .. .. ..$ op     : chr "more"
#>   .. .. .. ..$ missing: logi FALSE
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "mpg"
#>   .. .. .. ..$ val    : num 15.1
#>   .. .. .. ..$ op     : chr "less-equal"
#>   .. .. .. ..$ missing: logi TRUE
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "disp"
#>   .. .. .. ..$ val    : num 334
#>   .. .. .. ..$ op     : chr "less-equal"
#>   .. .. .. ..$ missing: logi TRUE
#>   ..$ :List of 3
#>   .. ..$ prediction: num 187
#>   .. ..$ linear    : NULL
#>   .. ..$ path      :List of 3
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "cyl"
#>   .. .. .. ..$ val    : num 7
#>   .. .. .. ..$ op     : chr "more"
#>   .. .. .. ..$ missing: logi FALSE
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "mpg"
#>   .. .. .. ..$ val    : num 15.1
#>   .. .. .. ..$ op     : chr "less-equal"
#>   .. .. .. ..$ missing: logi TRUE
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "disp"
#>   .. .. .. ..$ val    : num 334
#>   .. .. .. ..$ op     : chr "more"
#>   .. .. .. ..$ missing: logi FALSE
#>   ..$ :List of 3
#>   .. ..$ prediction: num 164
#>   .. ..$ linear    : NULL
#>   .. ..$ path      :List of 2
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "cyl"
#>   .. .. .. ..$ val    : num 7
#>   .. .. .. ..$ op     : chr "more"
#>   .. .. .. ..$ missing: logi FALSE
#>   .. .. ..$ :List of 5
#>   .. .. .. ..$ type   : chr "conditional"
#>   .. .. .. ..$ col    : chr "mpg"
#>   .. .. .. ..$ val    : num 15.1
#>   .. .. .. ..$ op     : chr "more"
#>   .. .. .. ..$ missing: logi FALSE

Limitations

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.