This package provides functions for performing robust stepwise split regularized regression.
You can install the stable version on R CRAN.
{r installation, eval = FALSE} install.packages("robStepSplitReg", dependencies = TRUE)
You can install the development version from GitHub
library(devtools)
::install_github("AnthonyChristidis/robStepSplitReg") devtools
# Required library
library(mvnfast)
# Simulation parameters
<- 50
n <- 500
p <- 0.8
rho <- 100
p.active <- 3
snr <- 0.2
contamination.prop
# Setting the seed
set.seed(0)
# Simulation of beta vector
<- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active))
true.beta
# Simulation of uncontaminated data
<- matrix(0, nrow = p, ncol = p)
sigma.mat 1:p.active, 1:p.active] <- rho
sigma.mat[diag(sigma.mat) <- 1
<- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat)
x <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr))
sigma <- x %*% true.beta + rnorm(n, 0, sigma)
y
# Contamination of data
<- 1:floor(n*contamination.prop)
contamination_indices <- 2
k_lev <- 100
k_slo <- x
x_train <- y
y_train <- true.beta
beta_cont !=0] <- beta_cont[true.beta!=0]*(1 + k_slo)
beta_cont[true.beta==0] <- k_slo*max(abs(true.beta))
beta_cont[true.betafor(cont_id in contamination_indices){
<- runif(p, min = -1, max = 1)
a <- a - as.numeric((1/p)*t(a) %*% rep(1, p))
a <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) +
x_train[cont_id,] * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a))
k_lev <- t(x_train[cont_id,]) %*% beta_cont
y_train[cont_id]
}
# Ensemble models
<- robStepSplitReg(x_train, y_train,
ensemble_fit n_models = 5,
model_saturation = c("fixed", "p-value")[1],
alpha = 0.05, model_size = 25,
robust = TRUE,
compute_coef = TRUE,
pense_alpha = 1/4, pense_cv_k = 5, pense_cv_repl = 1,
cl = NULL)
# Ensemble coefficients
<- coef(ensemble_fit, group_index = 1:ensemble_fit$n_models)
ensemble_coefs <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/p.active
sens_ensemble <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/sum(ensemble_coefs[-1]!=0)
spec_ensemble
# Simulation of test data
<- 2e3
m <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat)
x_test <- x_test %*% true.beta + rnorm(m, 0, sigma)
y_test
# Prediction of test samples
<- predict(ensemble_fit, newx = x_test,
ensemble_preds group_index = 1:ensemble_fit$n_models,
dynamic = FALSE)
<- mean((y_test - ensemble_preds)^2)/sigma^2 mspe_ensemble
This package is free and open source software, licensed under GPL (>= 2).