The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Programming environment

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(iml)
library(rmlnomogram)

Binary outcome (or class-wise multinomial outcome)

1 - Categorical predictors and binary outcome without probability

# Load dataset
data("mtcars")

# Preprocess for training a classifier
mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c("Auto", "Manual"))

# Convert some numerical features to categorical for demonstration
mtcars$cyl <- factor(mtcars$cyl)
mtcars$vs <- factor(mtcars$vs)
mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))

# Using tidyverse to filter only factor columns
mtcars <- select(mtcars, where(is.factor))

# Create dummy variables for all factor variables, excluding the target variable
dummy_vars <- dummyVars(am ~ ., data = mtcars) |> 
  predict(newdata = mtcars)

# Combine binarized predictors with the target variable
mtcars_binarized <- as.data.frame(dummy_vars) |>
  mutate_all(as.factor) |>
  mutate(am = mtcars$am) |>
  select(-vs.0, -cyl.4, -qsec.0)

# Train a random forest model using the full dataset
set.seed(1)
model <- train(
  am ~ ., 
  data = mtcars_binarized, 
  method = "rf", 
  trControl = trainControl(method = "none", classProbs = TRUE)
)
# Extract feature names used in the trained model from caret model object
caret_features <- str_remove_all(model$finalModel$xNames, "1$")

# Modify the training dataset to include only the model features
mtcars_selected <- mtcars_binarized[, caret_features]

# Generate all possible feature combinations for nomogram
nomogram_features <- expand.grid(lapply(mtcars_selected, unique))
##   cyl.6 cyl.8 qsec.1 vs.1
## 1     1     0      0    0
## 2     0     0      0    0
## 3     1     1      0    0
## 4     0     1      0    0
## 5     1     0      1    0
## 6     0     0      1    0
nomogram_outputs <- predict(model, nomogram_features, type = "prob") |>
  select(output = levels(mtcars_binarized$am)[2])
##   output
## 1  0.884
## 2  0.818
## 3  0.246
## 4  0.002
## 5  0.498
## 6  0.700
nomogram <- create_nomogram(nomogram_features, nomogram_outputs)

# Prepare data and model for SHAP value calculation using iml
X <- mtcars_binarized[, -which(names(mtcars_binarized) == "am")]

# Create a predictor object
predictor <- Predictor$new(model = model, data = X)

# Calculate SHAP values
nomogram_shaps <- list()

for(i in seq(nrow(nomogram_features))){
  shapley <-  Shapley$new(predictor, x.interest = nomogram_features[i, ])
  nomogram_shaps[[i]] <- shapley$results
}

names(nomogram_shaps) <- seq(nrow(nomogram_features))

nomogram_shaps <- reduce(imap(nomogram_shaps, ~ mutate(.x, i = .y)), rbind) |>
  filter(class == levels(mtcars_binarized$am)[2]) |>
  select(i, feature, phi) |>
  spread(feature, phi) |>
  arrange(as.numeric(i)) |>
  column_to_rownames(var = "i")
##   cyl.6 cyl.8 qsec.1 vs.1
## 1 -0.09  0.53   0.19    0
## 2  0.03  0.50   0.04    0
## 3 -0.05 -0.42   0.06    0
## 4  0.06 -0.50   0.09    0
## 5 -0.43  0.33  -0.25    0
## 6  0.13  0.38  -0.05    0

2 - Categorical predictors and binary outcome with probability

nomogram2 <- create_nomogram(nomogram_features, nomogram_outputs, prob = TRUE)

nomogram2_with_shap <-
  create_nomogram(
    nomogram_features, nomogram_outputs, nomogram_shaps
    , prob = TRUE
  )

3 - Categorical with single numerical predictors and binary outcome with probability

# Reload original dataset
data("mtcars")

# Round to 0 decimal to reduce possible combinations later
mtcars <- mutate(mtcars, qsec = round(qsec, 0))

# Add single numerical predictor to binarized predictors with the target variable
mtcars_mixed <- cbind(mtcars["qsec"], select(mtcars_binarized, -qsec.1))

# Train a random forest model using the full dataset
set.seed(1)
model2 <- train(
  am ~ ., 
  data = mtcars_mixed, 
  method = "rf", 
  trControl = trainControl(method = "none", classProbs = TRUE)
)
# Extract feature names used in the trained model from caret model object
caret_features2 <- str_remove_all(model2$finalModel$xNames, "1$")

# Modify the training dataset to include only the model features
mtcars_selected2 <- mtcars_mixed[, caret_features2]

# Generate all possible feature combinations for nomogram
nomogram_features2 <-
  select_if(mtcars_selected2, is.numeric) |>
  lapply(\(x) seq(min(x), max(x))) |>
  c(lapply(select_if(mtcars_selected2, is.factor), unique)) |>
  expand.grid()
##   qsec cyl.6 cyl.8 vs.1
## 1   14     1     0    0
## 2   15     1     0    0
## 3   16     1     0    0
## 4   17     1     0    0
## 5   18     1     0    0
## 6   19     1     0    0
nomogram_outputs2 <- predict(model2, nomogram_features2, type = "prob") |>
  select(output = levels(mtcars_mixed$am)[2])
nomogram3 <- create_nomogram(nomogram_features2, nomogram_outputs2, prob = TRUE)

# Prepare data and model for SHAP value calculation using iml
X2 <- mtcars_mixed[, -which(names(mtcars_mixed) == "am")]

# Create a predictor object
predictor2 <- Predictor$new(model = model2, data = X2)

# Calculate SHAP values
nomogram_shaps2 <- list()

for(i in seq(nrow(nomogram_features2))){
  shapley2 <-  Shapley$new(predictor2, x.interest = nomogram_features2[i, ])
  nomogram_shaps2[[i]] <- shapley2$results
}

names(nomogram_shaps2) <- seq(nrow(nomogram_features2))

nomogram_shaps2 <- reduce(imap(nomogram_shaps2, ~ mutate(.x, i = .y)), rbind) |>
  filter(class == levels(mtcars_mixed$am)[2]) |>
  select(i, feature, phi) |>
  spread(feature, phi) |>
  arrange(as.numeric(i)) |>
  column_to_rownames(var = "i")
nomogram3_with_shap <-
  create_nomogram(
    nomogram_features2, nomogram_outputs2, nomogram_shaps2
    , prob = TRUE
  )

Continuous outcome

4 - Categorical predictors and continuous outcome

# Load dataset
data("mtcars")

# Preprocess for training a regressor
outcome <- mtcars$wt

# Convert some numerical features to categorical for demonstration
mtcars$cyl <- factor(mtcars$cyl)
mtcars$vs <- factor(mtcars$vs)
mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))

# Using tidyverse to filter only factor columns
mtcars <- cbind(select(mtcars, where(is.factor)), select(mtcars, wt))

# Create dummy variables for all factor variables, excluding the target variable
dummy_vars2 <- dummyVars(wt ~ ., data = mtcars) |> 
  predict(newdata = mtcars)

# Combine binarized predictors with the target variable
mtcars_binarized2 <- as.data.frame(dummy_vars2) |>
  mutate_all(as.factor) |>
  mutate(wt = outcome) |>
  select(-vs.0, -cyl.4, -qsec.0)

# Train a random forest model using the full dataset
set.seed(1)
model3 <- train(
  wt ~ ., 
  data = mtcars_binarized2, 
  method = "rf", 
  trControl = trainControl(method = "none")
)
# Extract feature names used in the trained model from caret model object
caret_features3 <- str_remove_all(model3$finalModel$xNames, "1$")

# Modify the training dataset to include only the model features
mtcars_selected3 <- mtcars_binarized2[, caret_features3]

# Generate all possible feature combinations for nomogram
nomogram_features3 <- expand.grid(lapply(mtcars_selected3, unique))
nomogram_outputs3 <- data.frame(output = predict(model3, nomogram_features3))
##     output
## 1 2.958282
## 2 3.010966
## 3 3.537469
## 4 3.878820
## 5 3.037775
## 6 2.971140
nomogram4 <- create_nomogram(nomogram_features3, nomogram_outputs3, est = TRUE)

# Prepare data and model for SHAP value calculation using iml
X3 <- mtcars_binarized2[, -which(names(mtcars_binarized2) == "wt")]

# Create a predictor object
predictor3 <- Predictor$new(model = model3, data = X3)

# Calculate SHAP values
nomogram_shaps3 <- list()

for(i in seq(nrow(nomogram_features3))){
  shapley3 <-  Shapley$new(predictor3, x.interest = nomogram_features3[i, ])
  nomogram_shaps3[[i]] <- shapley3$results
}

names(nomogram_shaps3) <- seq(nrow(nomogram_features3))

nomogram_shaps3 <- reduce(imap(nomogram_shaps3, ~ mutate(.x, i = .y)), rbind) |>
  select(i, feature, phi) |>
  spread(feature, phi) |>
  arrange(as.numeric(i)) |>
  column_to_rownames(var = "i")
nomogram4_with_shap <-
  create_nomogram(
    nomogram_features3, nomogram_outputs3, nomogram_shaps3
    , est = TRUE
  )

5 - Categorical with single numerical predictors and continuous outcome

# Reload original dataset
data("mtcars")

# Round to 0 decimal to reduce possible combinations later
mtcars <- mutate(mtcars, qsec = round(qsec, 0))

# Add single numerical predictor to binarized predictors with the target variable
mtcars_mixed2 <- cbind(mtcars["qsec"], select(mtcars_binarized2, -qsec.1))

# Train a random forest model using the full dataset
set.seed(1)
model4 <- train(
  wt ~ ., 
  data = mtcars_mixed2, 
  method = "rf", 
  trControl = trainControl(method = "none")
)
# Extract feature names used in the trained model from caret model object
caret_features4 <- str_remove_all(model4$finalModel$xNames, "1$")

# Modify the training dataset to include only the model features
mtcars_selected4 <- mtcars_mixed2[, caret_features4]

# Generate all possible feature combinations for nomogram
nomogram_features4 <-
  select_if(mtcars_selected4, is.numeric) |>
  lapply(\(x) seq(min(x), max(x))) |>
  c(lapply(select_if(mtcars_selected4, is.factor), unique)) |>
  expand.grid()
nomogram_outputs4 <- data.frame(output = predict(model4, nomogram_features4))
nomogram5 <- create_nomogram(nomogram_features4, nomogram_outputs4, est = TRUE)

# Prepare data and model for SHAP value calculation using iml
X4 <- mtcars_mixed2[, -which(names(mtcars_mixed2) == "wt")]

# Create a predictor object
predictor4 <- Predictor$new(model = model4, data = X4)

# Calculate SHAP values
nomogram_shaps4 <- list()

for(i in seq(nrow(nomogram_features4))){
  shapley4 <-  Shapley$new(predictor4, x.interest = nomogram_features4[i, ])
  nomogram_shaps4[[i]] <- shapley4$results
}

names(nomogram_shaps4) <- seq(nrow(nomogram_features4))

nomogram_shaps4 <- reduce(imap(nomogram_shaps4, ~ mutate(.x, i = .y)), rbind) |>
  select(i, feature, phi) |>
  spread(feature, phi) |>
  arrange(as.numeric(i)) |>
  column_to_rownames(var = "i")
nomogram4_with_shap <-
  create_nomogram(
    nomogram_features4, nomogram_outputs4, nomogram_shaps4
    , est = TRUE
  )

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-unknown-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/aarch64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/aarch64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rmlnomogram_0.1.2 iml_0.11.3        caret_6.0-94      lattice_0.22-6   
##  [5] lubridate_1.9.3   forcats_1.0.0     stringr_1.5.1     dplyr_1.1.4      
##  [9] purrr_1.0.2       readr_2.1.5       tidyr_1.3.1       tibble_3.2.1     
## [13] ggplot2_3.5.1     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1     timeDate_4041.110    farver_2.1.2        
##  [4] fastmap_1.2.0        pROC_1.18.5          digest_0.6.37       
##  [7] rpart_4.1.23         timechange_0.3.0     lifecycle_1.0.4     
## [10] survival_3.6-4       magrittr_2.0.3       compiler_4.4.1      
## [13] rlang_1.1.4          sass_0.4.9           tools_4.4.1         
## [16] utf8_1.2.4           yaml_2.3.10          data.table_1.16.2   
## [19] ggsignif_0.6.4       knitr_1.48           labeling_0.4.3      
## [22] plyr_1.8.9           abind_1.4-8          withr_3.0.2         
## [25] Metrics_0.1.4        nnet_7.3-19          grid_4.4.1          
## [28] stats4_4.4.1         fansi_1.0.6          ggpubr_0.6.0        
## [31] colorspace_2.1-1     future_1.34.0        globals_0.16.3      
## [34] scales_1.3.0         iterators_1.0.14     MASS_7.3-60.2       
## [37] cli_3.6.3            rmarkdown_2.28       generics_0.1.3      
## [40] rstudioapi_0.17.1    future.apply_1.11.3  reshape2_1.4.4      
## [43] tzdb_0.4.0           cachem_1.1.0         splines_4.4.1       
## [46] parallel_4.4.1       vctrs_0.6.5          hardhat_1.4.0       
## [49] Matrix_1.7-0         carData_3.0-5        jsonlite_1.8.9      
## [52] car_3.1-3            hms_1.1.3            rstatix_0.7.2       
## [55] Formula_1.2-5        listenv_0.9.1        foreach_1.5.2       
## [58] gower_1.0.1          jquerylib_0.1.4      recipes_1.1.0       
## [61] glue_1.8.0           parallelly_1.38.0    codetools_0.2-20    
## [64] cowplot_1.1.3        stringi_1.8.4        gtable_0.3.6        
## [67] munsell_0.5.1        pillar_1.9.0         htmltools_0.5.8.1   
## [70] ipred_0.9-15         lava_1.8.0           R6_2.5.1            
## [73] evaluate_1.0.1       highr_0.11           backports_1.5.0     
## [76] broom_1.0.7          bslib_0.8.0          class_7.3-22        
## [79] Rcpp_1.0.13          nlme_3.1-164         prodlim_2024.06.25  
## [82] checkmate_2.3.2      xfun_0.48            pkgconfig_2.0.3     
## [85] ModelMetrics_1.2.2.2

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.