The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This vignette mirrors the main package example (see
README) and compares a PLS baseline to
soilVAE on the included dataset
datsoilspc.
CRAN/CI note: this vignette is written to build on systems without Python/TensorFlow.
The VAE training chunks are automatically skipped unless TensorFlow/Keras are available.
# IMPORTANT: do NOT use devtools in vignettes (it is not available on CRAN/CI runners).
# Only load packages that are in Imports/Suggests.
for (p in c("prospectr", "pls", "reticulate", "soilVAE")) {
if (!requireNamespace(p, quietly = TRUE)) {
stop("Package '", p, "' is required to build this vignette. ",
"Please install it (it should be installed automatically from Suggests/Imports during checks).")
}
}
library(prospectr)
library(pls)
library(reticulate)
library(soilVAE)data("datsoilspc", package = "soilVAE")
str(datsoilspc, max.level = 1)
#> 'data.frame': 391 obs. of 5 variables:
#> $ clay : num 49 7 56 14 53 24 9 18 33 27 ...
#> $ silt : num 10 24 17 19 7 21 9 20 13 19 ...
#> $ sand : num 42 69 27 67 40 55 83 61 54 55 ...
#> $ TotalCarbon: num 0.15 0.12 0.17 1.06 0.69 2.76 0.66 1.36 0.19 0.16 ...
#> $ spc : num [1:391, 1:2151] 0.0898 0.1677 0.0778 0.0958 0.0359 ...
#> ..- attr(*, "dimnames")=List of 2
#> - attr(*, "na.action")= 'omit' Named int 392
#> ..- attr(*, "names")= chr "63"eval_quant <- function(y, yhat) {
y <- as.numeric(y); yhat <- as.numeric(yhat)
ok <- is.finite(y) & is.finite(yhat)
y <- y[ok]; yhat <- yhat[ok]
if (length(y) < 3) {
return(list(n = length(y), ME = NA_real_, MAE = NA_real_, RMSE = NA_real_,
R2 = NA_real_, RPIQ = NA_real_, RPD = NA_real_))
}
err <- yhat - y
me <- mean(err)
mae <- mean(abs(err))
rmse <- sqrt(mean(err^2))
ss_res <- sum((y - yhat)^2)
ss_tot <- sum((y - mean(y))^2)
r2 <- if (ss_tot == 0) NA_real_ else 1 - ss_res / ss_tot
rpiq <- stats::IQR(y) / rmse
rpd <- stats::sd(y) / rmse
list(n = length(y), ME = me, MAE = mae, RMSE = rmse, R2 = r2, RPIQ = rpiq, RPD = rpd)
}
as_df_metrics <- function(x) {
data.frame(
n = x$n,
ME = round(x$ME, 2),
MAE = round(x$MAE, 2),
RMSE = round(x$RMSE, 2),
R2 = round(x$R2, 2),
RPIQ = round(x$RPIQ, 2),
RPD = round(x$RPD, 2),
stringsAsFactors = FALSE
)
}# Reflectance → absorbance
spcA <- log(1 / as.matrix(datsoilspc$spc))
# Resample to 5 nm
oldWavs <- as.numeric(colnames(spcA))
newWavs <- seq(min(oldWavs), max(oldWavs), by = 5)
spcARs <- prospectr::resample(
X = spcA,
wav = oldWavs,
new.wav = newWavs,
interpol = "linear"
)
# SNV + moving average
spcASnv <- prospectr::standardNormalVariate(spcARs)
spcAMovav <- prospectr::movav(spcASnv, w = 11)
# Store in object to match book-style workflows
datsoilspc$spcAMovav <- spcAMovavmatplot(
x = as.numeric(colnames(datsoilspc$spcAMovav)),
y = t(datsoilspc$spcAMovav),
xlab = "Wavelength / nm",
ylab = "Absorbance (SNV + movav)",
type = "l", lty = 1,
col = rgb(0.5, 0.5, 0.5, alpha = 0.25)
)Preprocessed spectra (SNV + movav)
maxc <- 30
pls_fit <- pls::plsr(
TotalCarbon ~ spcAMovav,
data = datC,
method = "oscorespls",
ncomp = maxc,
validation = "CV"
)
plot(pls_fit, "val", main = "PLS CV performance", xlab = "Number of components")PLS CV performance
nc <- 14
pls_pred_C <- as.numeric(predict(pls_fit, ncomp = nc, newdata = datC$spcAMovav))
pls_pred_V <- as.numeric(predict(pls_fit, ncomp = nc, newdata = datV$spcAMovav))
pls_cal <- eval_quant(datC$TotalCarbon, pls_pred_C)
pls_tst <- eval_quant(datV$TotalCarbon, pls_pred_V)
rbind(
cbind(Model = "PLS", Split = "Calibration (datC)", as_df_metrics(pls_cal)),
cbind(Model = "PLS", Split = "TEST (datV)", as_df_metrics(pls_tst))
)
#> Model Split n ME MAE RMSE R2 RPIQ RPD
#> 1 PLS Calibration (datC) 293 0.00 0.37 0.56 0.86 2.04 2.63
#> 2 PLS TEST (datV) 98 0.02 0.36 0.52 0.69 2.34 1.81tab <- rbind(
cbind(Model = "PLS", Split = "Calibration (datC)", as_df_metrics(pls_cal)),
cbind(Model = "PLS", Split = "TEST (datV)", as_df_metrics(pls_tst)),
cbind(Model = "soilVAE",Split = "Train (internal)", as_df_metrics(vae_tr)),
cbind(Model = "soilVAE",Split = "Val (internal)", as_df_metrics(vae_va)),
cbind(Model = "soilVAE",Split = "TEST (datV)", as_df_metrics(vae_te))
)
row.names(tab) <- NULL
tabThese binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.