The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This package provides functions for performing robust stepwise split regularized regression.
You can install the stable version on R CRAN.
{r installation, eval = FALSE} install.packages("robStepSplitReg", dependencies = TRUE)
You can install the development version from GitHub
library(devtools)
::install_github("AnthonyChristidis/robStepSplitReg") devtools
# Required library
library(mvnfast)
# Simulation parameters
<- 50
n <- 500
p <- 0.8
rho <- 100
p.active <- 3
snr <- 0.2
contamination.prop
# Setting the seed
set.seed(0)
# Simulation of beta vector
<- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active))
true.beta
# Simulation of uncontaminated data
<- matrix(0, nrow = p, ncol = p)
sigma.mat 1:p.active, 1:p.active] <- rho
sigma.mat[diag(sigma.mat) <- 1
<- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat)
x <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr))
sigma <- x %*% true.beta + rnorm(n, 0, sigma)
y
# Contamination of data
<- 1:floor(n*contamination.prop)
contamination_indices <- 2
k_lev <- 100
k_slo <- x
x_train <- y
y_train <- true.beta
beta_cont !=0] <- beta_cont[true.beta!=0]*(1 + k_slo)
beta_cont[true.beta==0] <- k_slo*max(abs(true.beta))
beta_cont[true.betafor(cont_id in contamination_indices){
<- runif(p, min = -1, max = 1)
a <- a - as.numeric((1/p)*t(a) %*% rep(1, p))
a <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) +
x_train[cont_id,] * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a))
k_lev <- t(x_train[cont_id,]) %*% beta_cont
y_train[cont_id]
}
# Ensemble models
<- robStepSplitReg(x_train, y_train,
ensemble_fit n_models = 5,
model_saturation = c("fixed", "p-value")[1],
alpha = 0.05, model_size = 25,
robust = TRUE,
compute_coef = TRUE,
pense_alpha = 1/4, pense_cv_k = 5, pense_cv_repl = 1,
cl = NULL)
# Ensemble coefficients
<- coef(ensemble_fit, group_index = 1:ensemble_fit$n_models)
ensemble_coefs <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/p.active
sens_ensemble <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/sum(ensemble_coefs[-1]!=0)
spec_ensemble
# Simulation of test data
<- 2e3
m <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat)
x_test <- x_test %*% true.beta + rnorm(m, 0, sigma)
y_test
# Prediction of test samples
<- predict(ensemble_fit, newx = x_test,
ensemble_preds group_index = 1:ensemble_fit$n_models,
dynamic = FALSE)
<- mean((y_test - ensemble_preds)^2)/sigma^2 mspe_ensemble
This package is free and open source software, licensed under GPL (>= 2).
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.