The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
The modelStudio() function uses DALEX
explainers created with DALEX::explain() or
DALEXtra::explain_*().
# packages for the explainer objects
install.packages("DALEX")
install.packages("DALEXtra")In this example, we make a studio for the ranger model
on the apartments data.
# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)
data <- DALEX::apartments
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# fit a model
task <- makeRegrTask(id = "apartments", data = train, target = "m2.price")
learner <- makeLearner("regr.ranger", predict.type = "response")
model <- train(learner, task)
# create an explainer for the model
explainer <- explain_mlr(model,
data = test,
y = test$m2.price,
label = "mlr")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)In this example, we make a studio for the ranger model
on the titanic data.
# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# mlr3 TaskClassif takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
task <- TaskClassif$new(id = "titanic", backend = train, target = "survived")
learner <- lrn("classif.ranger", predict_type = "prob")
learner$train(task)
# create an explainer for the model
explainer <- explain_mlr3(learner,
data = test,
y = test$survived,
label = "mlr3")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)In this example, we make a studio for the xgboost model
on the titanic data.
# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)
# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 500)
# create an explainer for the model
explainer <- explain(model,
data = test_matrix,
y = test$survived,
type = "classification",
label = "xgboost")
# pick observations
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)In this example, we make a studio for the gbm model on
the titanic data.
# load packages and data
library(caret)
library(DALEX)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# caret train takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
cv <- trainControl(method = "repeatedcv", number = 3, repeats = 3)
model <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE)
# create an explainer for the model
explainer <- explain(model,
data = test,
y = test$survived,
label = "caret")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)In this example, we make a studio for the h2o.automl
model on the titanic data.
# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# init h2o
h2o.init()
h2o.no_progress()
# split the data
h2o_split <- h2o.splitFrame(as.h2o(data))
train <- h2o_split[[1]]
test <- as.data.frame(h2o_split[[2]])
# h2o automl takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
model <- automl@leader
# create an explainer for the model
explainer <- explain_h2o(model,
data = test,
y = test$survived,
label = "h2o")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation,
B = 5)
# shutdown h2o
h2o.shutdown(prompt = FALSE)In this example, we make a studio for the ranger model
on the apartments data.
# load packages and data
library(parsnip)
library(DALEX)
library(modelStudio)
data <- DALEX::apartments
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# fit a model
model <- rand_forest() %>%
set_engine("ranger", importance = "impurity") %>%
set_mode("regression") %>%
fit(m2.price ~ ., data = train)
# create an explainer for the model
explainer <- explain(model,
data = test,
y = test$m2.price,
label = "parsnip")
# make a studio for the model
modelStudio(explainer)In this example, we make a studio for the ranger model
on the titanic data.
# load packages and data
library(tidymodels)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# tidymodels fit takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
rec <- recipe(survived ~ ., data = train) %>%
step_normalize(fare)
clf <- rand_forest(mtry = 2) %>%
set_engine("ranger") %>%
set_mode("classification")
wflow <- workflow() %>%
add_recipe(rec) %>%
add_model(clf)
model <- wflow %>% fit(data = train)
# create an explainer for the model
explainer <- explain_tidymodels(model,
data = test,
y = test$survived,
label = "tidymodels")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)The modelStudio() function uses dalex
explainers created with dalex.Explainer().
# package for the Explainer object
pip install dalex -UUse pickle Python module and reticulate R
package to easily make a studio for a model.
# package for pickle load
install.packages("reticulate")In this example, we make a studio for the Pipeline SVR
model on the fifa data.
First, use dalex in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from numpy import log
data = dx.datasets.load_fifa()
X = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1)
y = log(data.value_eur)
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
model = Pipeline([('scale', StandardScaler()), ('svm', SVR())])
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
# pack the explainer into a pickle file
explainer.dump(open('explainer_scikitlearn.pickle', 'wb'))Then, use modelStudio in R:
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")
# make a studio for the model
library(modelStudio)
modelStudio(explainer, B = 5)In this example, we make a studio for the
Pipeline LGBMClassifier model on the titanic
data.
First, use dalex in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from lightgbm import LGBMClassifier
data = dx.datasets.load_titanic()
X = data.drop(columns='survived')
y = data.survived
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
classifier = LGBMClassifier(n_estimators=300)
model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', classifier)
]
)
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')
# pack the explainer into a pickle file
explainer.dump(open('explainer_lightgbm.pickle', 'wb')) Then, use modelStudio in R:
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_lightgbm.pickle", pickle = "pickle")
# make a studio for the model
library(modelStudio)
modelStudio(explainer)In this example, we make a studio for the
Pipeline KerasClassifier model on the titanic
data.
First, use dalex in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense
from keras.models import Sequential
data = dx.datasets.load_titanic()
X = data.drop(columns='survived')
y = data.survived
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
def create_architecture():
model = Sequential()
# there are 17 inputs after the pipeline
model.add(Dense(60, input_dim=17, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
classifier = KerasClassifier(build_fn=create_architecture,
epochs=100, batch_size=32, verbose=False)
model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', classifier)
]
)
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')
# pack the explainer into a pickle file
explainer.dump(open('explainer_keras.pickle', 'wb'))Then, use modelStudio in R:
# load the explainer from the pickle file
library(reticulate)
#! add blank create_architecture function before load !
py_run_string('
def create_architecture():
return True
')
explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle")
# make a studio for the model
library(modelStudio)
modelStudio(explainer)These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.