The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
XGeoRTR provides a platform layer for explanation
geometry in R. It standardizes generic explanation tables into a
normalized xgeo_state, computes embeddings, diagnostics,
and multiscale level-of-detail summaries, and exposes backend-neutral
tables for downstream packages.
This vignette uses common R workflows and built-in datasets. It does not render graphics. Rendering and viewport orchestration are delegated to downstream frontends.
library(XGeoRTR)
scale01 <- function(x) {
x <- as.numeric(x)
rng <- range(x, finite = TRUE)
if (!all(is.finite(rng)) || diff(rng) == 0) {
return(rep(0.5, length(x)))
}
(x - rng[[1]]) / diff(rng)
}
finish_state <- function(state, embedding_source = "explanations", k = 3L) {
state <- compute_xgeo_embedding(
state,
method = "pca",
source = embedding_source,
dims = 2
)
embedding_name <- paste("pca", embedding_source, sep = "_")
state <- set_active_embedding(state, embedding_name)
state <- compute_xgeo_diagnostics(
state,
embedding = embedding_name,
source = embedding_source,
k = k
)
build_xgeo_lod(
state,
embedding = embedding_name,
levels = c(8L, 16L),
auto_threshold = 10L
)
}stats::lm() on mtcarsThe first example turns coefficient-scaled centered predictors into explanation values. Each car becomes a point, each predictor becomes a feature contribution, and fitted/residual information becomes point metadata.
mt <- datasets::mtcars
mt$car <- rownames(mt)
fit_lm <- stats::lm(mpg ~ wt + hp + qsec, data = mt)
terms_lm <- c("wt", "hp", "qsec")
centered_lm <- scale(mt[, terms_lm, drop = FALSE], center = TRUE, scale = FALSE)
contrib_lm <- sweep(centered_lm, 2, stats::coef(fit_lm)[terms_lm], `*`)
fitted_lm <- stats::predict(fit_lm)
resid_lm <- stats::residuals(fit_lm)
lm_tbl <- data.frame(
point_id = rep(mt$car, each = length(terms_lm)),
feature = rep(terms_lm, times = nrow(mt)),
value = as.vector(t(contrib_lm)),
x = rep(scale01(mt$wt), each = length(terms_lm)),
y = rep(scale01(fitted_lm), each = length(terms_lm)),
z = rep(scale01(abs(resid_lm)), each = length(terms_lm)),
response = rep(mt$mpg, each = length(terms_lm)),
fitted = rep(fitted_lm, each = length(terms_lm)),
residual = rep(resid_lm, each = length(terms_lm))
)
state_lm <- as_xgeo_state(
lm_tbl,
point_id_col = "point_id",
feature_col = "feature",
method = "linear-model-coefficient-contributions",
meta = list(dataset = "datasets::mtcars", model = "stats::lm")
)
state_lm <- finish_state(state_lm)
summary(state_lm)
#> <summary.xgeo_state>
#> structure: spatial
#> method: linear-model-coefficient-contributions
#> points: 32
#> features: 3
#> explanations: 96
#> embeddings: 2 (active: pca_explanations)
#> diagnostics: 1 (active: diagnostics_pca_explanations_explanations)
#> lod bundles: 1 (active: density_grid_pca_explanations)
#> selected points:0
#> selected feats: 0stats::glm() on
mtcarsThe second example uses a logistic model and stores the predicted probability as state metadata. This gives the same backend state contract for a classification workflow.
mt$efficient <- as.integer(mt$mpg > stats::median(mt$mpg))
fit_glm <- stats::glm(efficient ~ wt + hp + qsec, data = mt, family = stats::binomial())
#> Warning: glm.fit: algorithm did not converge
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
prob_glm <- stats::predict(fit_glm, type = "response")
terms_glm <- c("wt", "hp", "qsec")
centered_glm <- scale(mt[, terms_glm, drop = FALSE], center = TRUE, scale = FALSE)
contrib_glm <- sweep(centered_glm, 2, stats::coef(fit_glm)[terms_glm], `*`)
glm_tbl <- data.frame(
point_id = rep(mt$car, each = length(terms_glm)),
feature = rep(terms_glm, times = nrow(mt)),
value = as.vector(t(contrib_glm)),
x = rep(scale01(mt$wt), each = length(terms_glm)),
y = rep(prob_glm, each = length(terms_glm)),
z = rep(scale01(abs(stats::predict(fit_glm, type = "link"))), each = length(terms_glm)),
class = rep(ifelse(mt$efficient == 1L, "high_mpg", "low_mpg"), each = length(terms_glm)),
probability = rep(prob_glm, each = length(terms_glm))
)
state_glm <- as_xgeo_state(
glm_tbl,
point_id_col = "point_id",
feature_col = "feature",
method = "logistic-model-coefficient-contributions",
meta = list(dataset = "datasets::mtcars", model = "stats::glm")
)
state_glm <- finish_state(state_glm)
summary(state_glm)
#> <summary.xgeo_state>
#> structure: spatial
#> method: logistic-model-coefficient-contributions
#> points: 32
#> features: 3
#> explanations: 96
#> embeddings: 2 (active: pca_explanations)
#> diagnostics: 1 (active: diagnostics_pca_explanations_explanations)
#> lod bundles: 1 (active: density_grid_pca_explanations)
#> selected points:0
#> selected feats: 0stats::kmeans() on irisFor clustering, explanation values can be residuals to the assigned cluster center. The geometry can be defined by a PCA layout, while the explanation table stores feature-level deviations.
iris_x <- scale(datasets::iris[, 1:4])
km <- stats::kmeans(iris_x, centers = 3L, nstart = 5L)
pca_iris <- stats::prcomp(iris_x, center = FALSE, scale. = FALSE)
cluster_residual <- iris_x - km$centers[km$cluster, , drop = FALSE]
iris_tbl <- data.frame(
point_id = rep(paste0("iris_", seq_len(nrow(iris_x))), each = ncol(iris_x)),
feature = rep(colnames(iris_x), times = nrow(iris_x)),
value = as.vector(t(cluster_residual)),
x = rep(pca_iris$x[, 1], each = ncol(iris_x)),
y = rep(pca_iris$x[, 2], each = ncol(iris_x)),
z = rep(scale01(km$cluster), each = ncol(iris_x)),
species = rep(as.character(datasets::iris$Species), each = ncol(iris_x)),
cluster = rep(paste0("cluster_", km$cluster), each = ncol(iris_x))
)
state_km <- as_xgeo_state(
iris_tbl,
point_id_col = "point_id",
feature_col = "feature",
method = "kmeans-residual-geometry",
meta = list(dataset = "datasets::iris", model = "stats::kmeans")
)
state_km <- finish_state(state_km, k = 5L)
summary(state_km)
#> <summary.xgeo_state>
#> structure: spatial
#> method: kmeans-residual-geometry
#> points: 150
#> features: 4
#> explanations: 600
#> embeddings: 2 (active: pca_explanations)
#> diagnostics: 1 (active: diagnostics_pca_explanations_explanations)
#> lod bundles: 1 (active: density_grid_pca_explanations)
#> selected points:0
#> selected feats: 0stats::prcomp() on
USArrestsPCA loadings can also be exposed as feature-level contributions.
arrests <- datasets::USArrests
pca_arrests <- stats::prcomp(arrests, center = TRUE, scale. = TRUE)
scaled_arrests <- scale(arrests, center = pca_arrests$center, scale = pca_arrests$scale)
pc1_contrib <- sweep(scaled_arrests, 2, pca_arrests$rotation[, 1], `*`)
pca_tbl <- data.frame(
point_id = rep(rownames(arrests), each = ncol(arrests)),
feature = rep(colnames(arrests), times = nrow(arrests)),
value = as.vector(t(pc1_contrib)),
x = rep(pca_arrests$x[, 1], each = ncol(arrests)),
y = rep(pca_arrests$x[, 2], each = ncol(arrests)),
z = rep(scale01(rowSums(abs(pc1_contrib))), each = ncol(arrests))
)
state_pca <- as_xgeo_state(
pca_tbl,
point_id_col = "point_id",
feature_col = "feature",
method = "pca-loading-contribution-geometry",
meta = list(dataset = "datasets::USArrests", model = "stats::prcomp")
)
state_pca <- finish_state(state_pca, k = 4L)
summary(state_pca)
#> <summary.xgeo_state>
#> structure: spatial
#> method: pca-loading-contribution-geometry
#> points: 50
#> features: 4
#> explanations: 200
#> embeddings: 2 (active: pca_explanations)
#> diagnostics: 1 (active: diagnostics_pca_explanations_explanations)
#> lod bundles: 1 (active: density_grid_pca_explanations)
#> selected points:0
#> selected feats: 0datasets::volcanoMatrix data can be converted into state and exposed as a regular grid for downstream scientific-visualization packages.
state_volcano <- as_xgeo_state(
datasets::volcano,
method = "matrix-regular-grid",
meta = list(dataset = "datasets::volcano")
)
state_volcano <- finish_state(state_volcano, embedding_source = "points", k = 4L)
point_tbl <- xgeo_point_values(state_volcano)
grid <- xgeo_regular_grid(point_tbl)
names(grid)
#> [1] "x" "y" "z"Serialization writes to an explicit temporary file.
Downstream packages consume public tables rather than internal state details.
long_tbl <- xgeo_explanation_table(state_lm)
point_tbl <- xgeo_point_values(state_lm)
utils::head(long_tbl)
#> point_id feature value x y z label response
#> 1 Mazda RX4 wt 2.6032916 0.2830478 0.6765502 0.27370651 wt 21.0
#> 2 Mazda RX4 qsec -0.7094203 0.2830478 0.6765502 0.27370651 qsec 21.0
#> 3 Mazda RX4 hp 0.6538546 0.2830478 0.6765502 0.27370651 hp 21.0
#> 4 Merc 280 wt -0.9709221 0.4927129 0.5331987 0.07873225 wt 19.2
#> 5 Merc 280 qsec 0.2305137 0.4927129 0.5331987 0.07873225 qsec 19.2
#> 6 Merc 280 hp 0.4221651 0.4927129 0.5331987 0.07873225 hp 19.2
#> fitted residual
#> 1 22.63835 -1.6383509
#> 2 22.63835 -1.6383509
#> 3 22.63835 -1.6383509
#> 4 19.77238 -0.5723817
#> 5 19.77238 -0.5723817
#> 6 19.77238 -0.5723817
utils::head(point_tbl)
#> point_id x y z value response
#> 1 Mazda RX4 0.2830478 0.6765502 0.273706514 2.5477259 21.0
#> 2 Mazda RX4 Wag 0.3482485 0.6352636 0.122729451 1.7222995 21.0
#> 3 Datsun 710 0.2063411 0.8120459 0.439953430 5.2566361 22.8
#> 4 Hornet 4 Drive 0.4351828 0.6229704 0.004612335 1.4765260 21.4
#> 5 Hornet Sportabout 0.4927129 0.4541382 0.067000881 -1.8988686 18.7
#> 6 Valiant 0.4978266 0.5939425 0.502059129 0.8961823 18.1
#> fitted residual
#> 1 22.63835 -1.6383509
#> 2 21.81292 -0.8129245
#> 3 25.34726 -2.5472611
#> 4 21.56715 -0.1671510
#> 5 18.19176 0.5082436
#> 6 20.98681 -2.8868073These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.