The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Binary classification with leaf

library(leaf)
if (!backend_available()) {
  message("Install backend with leaf::install_leaf()")
}  
set.seed(42)

N <- 50L

# Generate features
x1 <- runif(N, min = 1, max = 40)
x2 <- runif(N, min = 0, max = 2)

# Generate target: y = log(x1) * x2 - 3
score <- log(x1) * x2 - 3
y <- as.integer(score > 0)

train_data <- data.frame(x1 = x1, x2 = x2, y = y)
head(train_data)
#>         x1         x2 y
#> 1 36.67744 0.66685442 0
#> 2 37.54594 0.69349650 0
#> 3 12.15944 0.79697082 0
#> 4 33.38746 1.56938555 1
#> 5 26.02808 0.07787298 0
#> 6 21.24474 1.49759077 1

Stage 1: Initialize the symbolic regressor

regressor = leaf::SymbolicRegressor$new(
  engine='rsrm', 
  num_iterations=4L, 
  loss='BinaryCrossEntropy', 
  max_params=2L,
  base = list(verbose = FALSE)
)

Stage 2: Discover equation skeletons

search_results = regressor$search_equations(
        data = train_data,
        formula = "y ~ f(x1, x2)",
        normalization = 'divide_by_gmd'
)
#> 1. Processing data for equation search based on formula...
#> 2. Running engine 'rsrm' over 1 folds using up to 1 processes...
#> -- FINAL RESULTS --
#> Episode: 1/4
#> time: 2.35s
#> loss: 1.1102230246251565e-16
#> form: F
#> HOF:
#>                            equation  complexity                                                                                                   loss
#> 0                                 0           0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1                           -0.3228           1                                                                                                   0.68
#> 2                         0.3432*X2           2                                                                                                   0.66
#> 3                3.5531*X2 - 5.8267           3                                                                                                   0.29
#> 4            4.0450*X1*X2 - 11.8264           4                                                                                                   0.14
#> 5  2206.4145*X1*log(X2) - 1482.6623           5                                                                                                   0.00
#> ---
#> 
task:dataset_d4bf7f82-d8e4-4933-a520-12d671bd251c expr:776.389030930181*X1*X2 + -2939.9324468385626/X2 Loss_BinaryCrossEntropy:0.00 Test 0/1.
#> final result:
#> success rate : 100%
#> average discovery time is 2.357 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]:  [[248, 0, 247]]
#> 3. Found 6 raw skeletons. Deduplicating...

print("=== Search results ===")
#> [1] "=== Search results ==="
print(search_results)
#>                Equation Complexity
#> 0                 -1⋅β1          1
#> 1                    β1          1
#> 2                 β1⋅x2          2
#> 3         β1⋅x2 + -1⋅β2          3
#> 4      β1⋅x1⋅x2 + -1⋅β2          4
#> 5 β1⋅x1⋅log(x2) + -1⋅β2          5

Stage 3: Fit parameters and compute loss

regressor$fit(data = train_data)
#> Fitting parameters for 6 equations...
#> Parameter fitting complete.
#>                Equation Complexity         Loss
#> 0                 -1⋅β1          1 6.802920e-01
#> 1                    β1          1 6.802920e-01
#> 2                 β1⋅x2          2 6.562670e-01
#> 3         β1⋅x2 + -1⋅β2          3 2.870362e-01
#> 4      β1⋅x1⋅x2 + -1⋅β2          4 1.448630e-01
#> 5 β1⋅x1⋅log(x2) + -1⋅β2          5 1.110223e-16

Stage 4: Evaluate additional metrics

regressor$evaluate(metrics = c('TSS', 'Elbow'))
#>                Equation Complexity         Loss       TSS      Elbow
#> 1                    β1          1 6.802920e-01 0.0000000        NaN
#> 2                 β1⋅x2          2 6.562670e-01 0.0000000        NaN
#> 3         β1⋅x2 + -1⋅β2          3 2.870362e-01 0.7536946 0.02398125
#> 4      β1⋅x1⋅x2 + -1⋅β2          4 1.448630e-01 0.8834154 0.01048082
#> 5 β1⋅x1⋅log(x2) + -1⋅β2          5 1.110223e-16 1.0000000 0.02632977

# Show results
print(regressor$get_pareto_front())
#>                Equation Complexity         Loss       TSS      Elbow
#> 1                    β1          1 6.802920e-01 0.0000000        NaN
#> 2                 β1⋅x2          2 6.562670e-01 0.0000000        NaN
#> 3         β1⋅x2 + -1⋅β2          3 2.870362e-01 0.7536946 0.02398125
#> 4      β1⋅x1⋅x2 + -1⋅β2          4 1.448630e-01 0.8834154 0.01048082
#> 5 β1⋅x1⋅log(x2) + -1⋅β2          5 1.110223e-16 1.0000000 0.02632977

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.