The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(iml)
library(rmlnomogram)
# Load dataset
data("mtcars")
# Preprocess for training a classifier
mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c("Auto", "Manual"))
# Convert some numerical features to categorical for demonstration
mtcars$cyl <- factor(mtcars$cyl)
mtcars$vs <- factor(mtcars$vs)
mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))
# Using tidyverse to filter only factor columns
mtcars <- select(mtcars, where(is.factor))
# Create dummy variables for all factor variables, excluding the target variable
dummy_vars <- dummyVars(am ~ ., data = mtcars) |>
predict(newdata = mtcars)
# Combine binarized predictors with the target variable
mtcars_binarized <- as.data.frame(dummy_vars) |>
mutate_all(as.factor) |>
mutate(am = mtcars$am) |>
select(-vs.0, -cyl.4, -qsec.0)
# Train a random forest model using the full dataset
set.seed(1)
model <- train(
am ~ .,
data = mtcars_binarized,
method = "rf",
trControl = trainControl(method = "none", classProbs = TRUE)
)
# Extract feature names used in the trained model from caret model object
caret_features <- str_remove_all(model$finalModel$xNames, "1$")
# Modify the training dataset to include only the model features
mtcars_selected <- mtcars_binarized[, caret_features]
# Generate all possible feature combinations for nomogram
nomogram_features <- expand.grid(lapply(mtcars_selected, unique))
## cyl.6 cyl.8 qsec.1 vs.1
## 1 1 0 0 0
## 2 0 0 0 0
## 3 1 1 0 0
## 4 0 1 0 0
## 5 1 0 1 0
## 6 0 0 1 0
nomogram_outputs <- predict(model, nomogram_features, type = "prob") |>
select(output = levels(mtcars_binarized$am)[2])
## output
## 1 0.884
## 2 0.818
## 3 0.246
## 4 0.002
## 5 0.498
## 6 0.700
nomogram <- create_nomogram(nomogram_features, nomogram_outputs)
# Prepare data and model for SHAP value calculation using iml
X <- mtcars_binarized[, -which(names(mtcars_binarized) == "am")]
# Create a predictor object
predictor <- Predictor$new(model = model, data = X)
# Calculate SHAP values
nomogram_shaps <- list()
for(i in seq(nrow(nomogram_features))){
shapley <- Shapley$new(predictor, x.interest = nomogram_features[i, ])
nomogram_shaps[[i]] <- shapley$results
}
names(nomogram_shaps) <- seq(nrow(nomogram_features))
nomogram_shaps <- reduce(imap(nomogram_shaps, ~ mutate(.x, i = .y)), rbind) |>
filter(class == levels(mtcars_binarized$am)[2]) |>
select(i, feature, phi) |>
spread(feature, phi) |>
arrange(as.numeric(i)) |>
column_to_rownames(var = "i")
## cyl.6 cyl.8 qsec.1 vs.1
## 1 -0.09 0.53 0.19 0
## 2 0.03 0.50 0.04 0
## 3 -0.05 -0.42 0.06 0
## 4 0.06 -0.50 0.09 0
## 5 -0.43 0.33 -0.25 0
## 6 0.13 0.38 -0.05 0
nomogram2 <- create_nomogram(nomogram_features, nomogram_outputs, prob = TRUE)
nomogram2_with_shap <-
create_nomogram(
nomogram_features, nomogram_outputs, nomogram_shaps
, prob = TRUE
)
# Reload original dataset
data("mtcars")
# Round to 0 decimal to reduce possible combinations later
mtcars <- mutate(mtcars, qsec = round(qsec, 0))
# Add single numerical predictor to binarized predictors with the target variable
mtcars_mixed <- cbind(mtcars["qsec"], select(mtcars_binarized, -qsec.1))
# Train a random forest model using the full dataset
set.seed(1)
model2 <- train(
am ~ .,
data = mtcars_mixed,
method = "rf",
trControl = trainControl(method = "none", classProbs = TRUE)
)
# Extract feature names used in the trained model from caret model object
caret_features2 <- str_remove_all(model2$finalModel$xNames, "1$")
# Modify the training dataset to include only the model features
mtcars_selected2 <- mtcars_mixed[, caret_features2]
# Generate all possible feature combinations for nomogram
nomogram_features2 <-
select_if(mtcars_selected2, is.numeric) |>
lapply(\(x) seq(min(x), max(x))) |>
c(lapply(select_if(mtcars_selected2, is.factor), unique)) |>
expand.grid()
## qsec cyl.6 cyl.8 vs.1
## 1 14 1 0 0
## 2 15 1 0 0
## 3 16 1 0 0
## 4 17 1 0 0
## 5 18 1 0 0
## 6 19 1 0 0
nomogram_outputs2 <- predict(model2, nomogram_features2, type = "prob") |>
select(output = levels(mtcars_mixed$am)[2])
nomogram3 <- create_nomogram(nomogram_features2, nomogram_outputs2, prob = TRUE)
# Prepare data and model for SHAP value calculation using iml
X2 <- mtcars_mixed[, -which(names(mtcars_mixed) == "am")]
# Create a predictor object
predictor2 <- Predictor$new(model = model2, data = X2)
# Calculate SHAP values
nomogram_shaps2 <- list()
for(i in seq(nrow(nomogram_features2))){
shapley2 <- Shapley$new(predictor2, x.interest = nomogram_features2[i, ])
nomogram_shaps2[[i]] <- shapley2$results
}
names(nomogram_shaps2) <- seq(nrow(nomogram_features2))
nomogram_shaps2 <- reduce(imap(nomogram_shaps2, ~ mutate(.x, i = .y)), rbind) |>
filter(class == levels(mtcars_mixed$am)[2]) |>
select(i, feature, phi) |>
spread(feature, phi) |>
arrange(as.numeric(i)) |>
column_to_rownames(var = "i")
nomogram3_with_shap <-
create_nomogram(
nomogram_features2, nomogram_outputs2, nomogram_shaps2
, prob = TRUE
)
# Load dataset
data("mtcars")
# Preprocess for training a regressor
outcome <- mtcars$wt
# Convert some numerical features to categorical for demonstration
mtcars$cyl <- factor(mtcars$cyl)
mtcars$vs <- factor(mtcars$vs)
mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))
# Using tidyverse to filter only factor columns
mtcars <- cbind(select(mtcars, where(is.factor)), select(mtcars, wt))
# Create dummy variables for all factor variables, excluding the target variable
dummy_vars2 <- dummyVars(wt ~ ., data = mtcars) |>
predict(newdata = mtcars)
# Combine binarized predictors with the target variable
mtcars_binarized2 <- as.data.frame(dummy_vars2) |>
mutate_all(as.factor) |>
mutate(wt = outcome) |>
select(-vs.0, -cyl.4, -qsec.0)
# Train a random forest model using the full dataset
set.seed(1)
model3 <- train(
wt ~ .,
data = mtcars_binarized2,
method = "rf",
trControl = trainControl(method = "none")
)
# Extract feature names used in the trained model from caret model object
caret_features3 <- str_remove_all(model3$finalModel$xNames, "1$")
# Modify the training dataset to include only the model features
mtcars_selected3 <- mtcars_binarized2[, caret_features3]
# Generate all possible feature combinations for nomogram
nomogram_features3 <- expand.grid(lapply(mtcars_selected3, unique))
nomogram_outputs3 <- data.frame(output = predict(model3, nomogram_features3))
## output
## 1 2.958282
## 2 3.010966
## 3 3.537469
## 4 3.878820
## 5 3.037775
## 6 2.971140
nomogram4 <- create_nomogram(nomogram_features3, nomogram_outputs3, est = TRUE)
# Prepare data and model for SHAP value calculation using iml
X3 <- mtcars_binarized2[, -which(names(mtcars_binarized2) == "wt")]
# Create a predictor object
predictor3 <- Predictor$new(model = model3, data = X3)
# Calculate SHAP values
nomogram_shaps3 <- list()
for(i in seq(nrow(nomogram_features3))){
shapley3 <- Shapley$new(predictor3, x.interest = nomogram_features3[i, ])
nomogram_shaps3[[i]] <- shapley3$results
}
names(nomogram_shaps3) <- seq(nrow(nomogram_features3))
nomogram_shaps3 <- reduce(imap(nomogram_shaps3, ~ mutate(.x, i = .y)), rbind) |>
select(i, feature, phi) |>
spread(feature, phi) |>
arrange(as.numeric(i)) |>
column_to_rownames(var = "i")
nomogram4_with_shap <-
create_nomogram(
nomogram_features3, nomogram_outputs3, nomogram_shaps3
, est = TRUE
)
# Reload original dataset
data("mtcars")
# Round to 0 decimal to reduce possible combinations later
mtcars <- mutate(mtcars, qsec = round(qsec, 0))
# Add single numerical predictor to binarized predictors with the target variable
mtcars_mixed2 <- cbind(mtcars["qsec"], select(mtcars_binarized2, -qsec.1))
# Train a random forest model using the full dataset
set.seed(1)
model4 <- train(
wt ~ .,
data = mtcars_mixed2,
method = "rf",
trControl = trainControl(method = "none")
)
# Extract feature names used in the trained model from caret model object
caret_features4 <- str_remove_all(model4$finalModel$xNames, "1$")
# Modify the training dataset to include only the model features
mtcars_selected4 <- mtcars_mixed2[, caret_features4]
# Generate all possible feature combinations for nomogram
nomogram_features4 <-
select_if(mtcars_selected4, is.numeric) |>
lapply(\(x) seq(min(x), max(x))) |>
c(lapply(select_if(mtcars_selected4, is.factor), unique)) |>
expand.grid()
nomogram_outputs4 <- data.frame(output = predict(model4, nomogram_features4))
nomogram5 <- create_nomogram(nomogram_features4, nomogram_outputs4, est = TRUE)
# Prepare data and model for SHAP value calculation using iml
X4 <- mtcars_mixed2[, -which(names(mtcars_mixed2) == "wt")]
# Create a predictor object
predictor4 <- Predictor$new(model = model4, data = X4)
# Calculate SHAP values
nomogram_shaps4 <- list()
for(i in seq(nrow(nomogram_features4))){
shapley4 <- Shapley$new(predictor4, x.interest = nomogram_features4[i, ])
nomogram_shaps4[[i]] <- shapley4$results
}
names(nomogram_shaps4) <- seq(nrow(nomogram_features4))
nomogram_shaps4 <- reduce(imap(nomogram_shaps4, ~ mutate(.x, i = .y)), rbind) |>
select(i, feature, phi) |>
spread(feature, phi) |>
arrange(as.numeric(i)) |>
column_to_rownames(var = "i")
nomogram4_with_shap <-
create_nomogram(
nomogram_features4, nomogram_outputs4, nomogram_shaps4
, est = TRUE
)
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-unknown-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/aarch64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/aarch64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rmlnomogram_0.1.2 iml_0.11.3 caret_6.0-94 lattice_0.22-6
## [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [13] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 timeDate_4041.110 farver_2.1.2
## [4] fastmap_1.2.0 pROC_1.18.5 digest_0.6.37
## [7] rpart_4.1.23 timechange_0.3.0 lifecycle_1.0.4
## [10] survival_3.6-4 magrittr_2.0.3 compiler_4.4.1
## [13] rlang_1.1.4 sass_0.4.9 tools_4.4.1
## [16] utf8_1.2.4 yaml_2.3.10 data.table_1.16.2
## [19] ggsignif_0.6.4 knitr_1.48 labeling_0.4.3
## [22] plyr_1.8.9 abind_1.4-8 withr_3.0.2
## [25] Metrics_0.1.4 nnet_7.3-19 grid_4.4.1
## [28] stats4_4.4.1 fansi_1.0.6 ggpubr_0.6.0
## [31] colorspace_2.1-1 future_1.34.0 globals_0.16.3
## [34] scales_1.3.0 iterators_1.0.14 MASS_7.3-60.2
## [37] cli_3.6.3 rmarkdown_2.28 generics_0.1.3
## [40] rstudioapi_0.17.1 future.apply_1.11.3 reshape2_1.4.4
## [43] tzdb_0.4.0 cachem_1.1.0 splines_4.4.1
## [46] parallel_4.4.1 vctrs_0.6.5 hardhat_1.4.0
## [49] Matrix_1.7-0 carData_3.0-5 jsonlite_1.8.9
## [52] car_3.1-3 hms_1.1.3 rstatix_0.7.2
## [55] Formula_1.2-5 listenv_0.9.1 foreach_1.5.2
## [58] gower_1.0.1 jquerylib_0.1.4 recipes_1.1.0
## [61] glue_1.8.0 parallelly_1.38.0 codetools_0.2-20
## [64] cowplot_1.1.3 stringi_1.8.4 gtable_0.3.6
## [67] munsell_0.5.1 pillar_1.9.0 htmltools_0.5.8.1
## [70] ipred_0.9-15 lava_1.8.0 R6_2.5.1
## [73] evaluate_1.0.1 highr_0.11 backports_1.5.0
## [76] broom_1.0.7 bslib_0.8.0 class_7.3-22
## [79] Rcpp_1.0.13 nlme_3.1-164 prodlim_2024.06.25
## [82] checkmate_2.3.2 xfun_0.48 pkgconfig_2.0.3
## [85] ModelMetrics_1.2.2.2
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.