Multi-View Symbolic Regression with leaf

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

# Install the Python backend (only needs to be done once). 
leaf::install_leaf()

# Load package 
library(leaf)
if (!backend_available()) {
  message("Install backend with leaf::install_leaf()")
}

set.seed(42)

# Group sizes
n_a <- 50L
n_b <- 30L

# Group A
x1_a <- runif(n_a, min = 0, max = 2)
x2_a <- runif(n_a, min = 0, max = 3)
y_a  <- x1_a^2 + rnorm(n_a, mean = 0, sd = 1e-4)

# Group B
x1_b <- runif(n_b, min = 0, max = 5)
x2_b <- runif(n_b, min = 0, max = 1)
y_b  <- 2 * x1_b^2 - 3 * x2_b + rnorm(n_b, mean = 0, sd = 1e-4)

# Combine data
train_data <- data.frame(
  x1 = c(x1_a, x1_b),
  x2 = c(x2_a, x2_b),
  y = c(y_a, y_b),
  group = rep(c("A", "B"), c(n_a, n_b))
)
head(train_data)
#>          x1        x2         y group
#> 1 1.8296121 1.0002816 3.3475126     A
#> 2 1.8741508 1.0402447 3.5123629     A
#> 3 0.5722791 1.1954562 0.3276609     A
#> 4 1.6608953 2.3540783 2.7586373     A
#> 5 1.2834910 0.1168095 1.6473582     A
#> 6 1.0381919 2.2463862 1.0778701     A

# Initialize the symbolic regressor
regressor = leaf::SymbolicRegressor$new(
    engine = 'rsrm',
    loss = 'MSE',
    max_params = 3L,
    num_iterations=3L, 
    threshold = 1e-10,
    base = list(verbose = FALSE),
    mcts = list(times = 8L),
    gp = list(times = 8L)
)

# Stage 1: Discover equation skeletons
search_results = regressor$search_equations(
    data = train_data,
    formula = "y ~ f(x1, x2 | group)"
)
#> 1. Processing data for equation search based on formula...
#> 2. Running engine 'rsrm' over 1 folds using up to 1 processes...
#> -- FINAL RESULTS --
#> Episode: 3/3
#> time: 20.96s
#> loss: 7.798843366570674e-09
#> form: X2+F
#> HOF:
#>                                   equation  complexity                                                                                                   loss
#> 0                                        0           0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00
#> 1                                   1.7959           1                                                                                                 208.56
#> 2                                1.1097*X1           2                                                                                                  42.00
#> 3                             0.4656*X1**2           3                                                                                                   1.31
#> 4                    0.5621*X1**2 - 0.3723           4                                                                                                   0.70
#> 5                 0.4656*X1**2 + 0.0000*X2           5                                                                                                   0.00
#> 6        0.4656*X1**2 + 0.0000*X2 - 0.0000           6                                                                                                   0.00
#> 7     0.4656*X1**2 + 0.0000*X2 - 0.0000/X1           8                                                                                                   0.00
#> 8  0.4656*X1**2 + 0.0000*X2 - 0.0000/X1**2           9                                                                                                   0.00
#> 9  0.4656*X1**2 + 0.0000*X2 - 0.0000/X1**2          12                                                                                                   0.00
#> ---
#> 
task:dataset_aa19bbdd-5194-4694-a36e-e06f64ac155a expr:X1**2+X2+(-0.534387536908588*X1**2 + -0.9999837943962612*X2 + -2.0685022306810226e-08/X1**2) Loss_MSE:0.00 Test 0/1.
#> final result:
#> success rate : 0%
#> average discovery time is 20.971 seconds
#> Number of equations looked at (per test) [Total, Timed out, Successful]:  [[1270, 0, 1270]]
#> 3. Found 10 raw skeletons. Deduplicating...
print("=== Search results ===")
#> [1] "=== Search results ==="
print(search_results)
#>                          Equation Complexity
#> 0                              u1          1
#> 1                           u1⋅x1          2
#> 2                         u1⋅x1^2          3
#> 3                 u1⋅x1^2 + -1⋅u2          4
#> 4                 u1⋅x1^2 + u2⋅x2          5
#> 5         u1⋅x1^2 + u2⋅x2 + -1⋅u3          6
#> 6   u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1          8
#> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1          9

# Stage 2: Fit parameters and compute loss (with CV)
fit_results = regressor$fit(
    data=train_data
)
#> Fitting parameters for 8 equations...
#> Parameter fitting complete.
print("\n=== Fit results ===")
#> [1] "\n=== Fit results ==="
print(fit_results)
#>                          Equation Complexity         Loss
#> 0                              u1          1 7.930425e+01
#> 1                           u1⋅x1          2 1.588952e+01
#> 2                         u1⋅x1^2          3 4.898860e-01
#> 3                 u1⋅x1^2 + -1⋅u2          4 2.643394e-01
#> 4                 u1⋅x1^2 + u2⋅x2          5 7.573504e-09
#> 5         u1⋅x1^2 + u2⋅x2 + -1⋅u3          6 7.556009e-09
#> 6   u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1          8 7.284390e-09
#> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1          9 7.245014e-09

# Inspect the last equation in the results
last_id <- rownames(fit_results)[nrow(fit_results)]
print("=== Last equation ===")
#> [1] "=== Last equation ==="
regressor$show_equation(last_id)
#> Details for Equation ID: 7
#> ----------------------------------------
#> Equation: u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1
#> ----------------------------------------
#> Full Expression per Group:
#> 
#> Group 'A':
#>                     2                            9.63124936535987e-9
#> 0.999995292847239⋅x₁  + 1.67023510898235e-5⋅x₂ - ───────────────────
#>                                                            2        
#>                                                          x₁         
#> 
#> Group 'B':
#>                    2                         2.75633799071188e-6
#> 1.99999904083213⋅x₁  - 2.99998954617359⋅x₂ - ───────────────────
#>                                                        2        
#>                                                      x₁

# Stage 3: Evaluate additional metrics
eval_table = regressor$evaluate(
    metrics = c('RMSE', 'AIC', 'Elbow')
)
print("\n=== Additional metrics ===")
#> [1] "\n=== Additional metrics ==="
print(eval_table)
#>                          Equation Complexity         Loss       AIC
#> 0                              u1          1 7.930425e+01  578.8935
#> 1                           u1⋅x1          2 1.588952e+01  450.2830
#> 2                         u1⋅x1^2          3 4.898860e-01  171.9436
#> 3                 u1⋅x1^2 + -1⋅u2          4 2.643394e-01  124.5885
#> 4                 u1⋅x1^2 + u2⋅x2          5 7.573504e-09 -206.0486
#> 5         u1⋅x1^2 + u2⋅x2 + -1⋅u3          6 7.556009e-09 -204.0486
#> 6   u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1          8 7.284390e-09 -204.0486
#> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1          9 7.245014e-09 -204.0486
#>           AIC_w         RMSE      Elbow
#> 0 1.694368e-171 8.905294e+00 0.01615145
#> 1 1.433616e-143 3.986166e+00 0.07100753
#> 2  3.954274e-83 6.999186e-01 0.84262426
#> 3  7.587431e-73 5.141395e-01 0.83529735
#> 4  4.753667e-01 8.702588e-05 0.16452063
#> 5  1.748776e-01 8.692530e-05 0.08365697
#> 6  1.748778e-01 8.534864e-05 0.00000000
#> 7  1.748778e-01 8.511765e-05 0.00000000

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.