This package contains the forecasting algorithm developed by Adämmer, Lehmann and Schüssler (2023). When using the package, please remember to cite the paper.
The package comprises three functions:
stsc()
can be used to directly apply the
“Signal-Transformed-Subset-Combination” forecasting algorithm described
in Adämmer, Lehmann
and Schüssler (2023).
tvc()
can be used to transform predictive signals
into univariate density forecasts via time-varying coefficient models,
where each model generates a conditionally gaussian predictive density
for each signal at each point in time (first part of the STSC
algorithm).
dsc()
can be used to dynamically select a subset of
candidate forecast models for each period based on their past
performance in terms of density forecast accuracy (second part of the
STSC algorithm).
You can install the released version of hdflex from CRAN:
install.packages("hdflex")
You can install hdflex from GitHub:
# install.packages("devtools")
::install_github("https://github.com/lehmasve/hdflex") devtools
The package compiles some C++ source code for installation, which is why you need the appropriate compilers:
On Windows you need Rtools available from CRAN.
On macOS you need Xcode and a Fortran compiler - for more details see Compiler.
Example using the stsc()
function:
#########################################################
######### Forecasting quarterly U.S. inflation ##########
#### Please see Koop & Korobilis (2023) for further ####
#### details regarding the data & external forecasts ####
#########################################################
# Load Package
library("hdflex")
library("ggplot2")
library("cowplot")
########## Get Data ##########
# Load Package Data
<- inflation_data
inflation_data
# Set Target Variable
<- inflation_data[, 1]
y
# Set 'P-Signals'
<- inflation_data[, 2:442]
X
# Set 'F-Signals'
<- inflation_data[, 443:462]
Ext_F
# Get Dates and Number of Observations
<- rownames(inflation_data)
tdates <- length(tdates)
tlength
# First complete observation (no missing values)
<- which(complete.cases(inflation_data))[1]
first_complete
########## Rolling AR2-Benchmark ##########
# Set up matrix for predictions
<- matrix(NA, nrow = tlength,
benchmark ncol = 1, dimnames = list(tdates, "AR2"))
# Set Window-Size (15 years of quarterly data)
<- 15 * 4
window_size
# Time Sequence
<- seq(window_size, tlength - 1)
t_seq
# Loop with rolling window
for (t in t_seq) {
# Split Data for Training Train Data
<- cbind(int = 1, X[(t - window_size + 1):t, 1:2])
x_train <- y[(t - window_size + 1):t]
y_train
# Split Data for Prediction
<- cbind(int = 1, X[t + 1, 1:2, drop = FALSE])
x_pred
# Fit AR-Model
<- .lm.fit(x_train, y_train)
model_ar
# Predict and store in benchmark matrix
+ 1, ] <- x_pred %*% model_ar$coefficients
benchmark[t
}
########## STSC ##########
# Set TV-C-Parameter
<- 5 * 4
init <- c(0.90, 0.95, 1.00)
lambda_grid <- c(0.94, 0.96, 0.98)
kappa_grid <- TRUE
bias
# Set DSC-Parameter
<- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
gamma_grid 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
<- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
n_tvc <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4)))
psi_grid <- 0.95
delta <- first_complete + init / 2
burn_in <- 1
burn_in_dsc <- 5
metric <- TRUE
equal_weight <- NULL
incl <- FALSE
parallel <- NULL
n_threads
# Apply STSC-Function
<- hdflex::stsc(y,
results
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,
parallel,
n_threads,NULL)
########## Evaluation ##########
# Define Evaluation Period (OOS-Period)
<- which(tdates >= "1991-04-01" & tdates <= "2021-12-01")
eval_period
# Apply Evaluation Summary for STSC
<- summary(obj = results, eval_period = eval_period)
eval_results
# Calculate (Mean-)Squared-Errors for AR2-Benchmark
<- (y[eval_period] - benchmark[eval_period, 1])^2
se_ar2 <- mean(se_ar2)
mse_ar2
# Create Cumulative Squared Error Differences (CSSED) Plot
<- cumsum(se_ar2 - eval_results$MSE[[2]])
cssed <- ggplot(
plot_cssed data.frame(eval_period, cssed),
aes(x = eval_period, y = cssed)
+
) geom_line() +
ylim(-0.0008, 0.0008) +
ggtitle("Cumulative Squared Error Differences") +
xlab("Time Index") +
ylab("CSSED") +
geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") +
theme_minimal(base_size = 15) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill = NA),
axis.ticks = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5)
)
# Show Plots
options(repr.plot.width = 15, repr.plot.height = 15)
<- eval_results$Plots
plots_list <- c(list(plot_cssed), plots_list)
plots_list ::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv")
cowplot
# Relative MSE
print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4)))
Same example using the tvc()
and dsc()
functions:
#########################################################
######### Forecasting quarterly U.S. inflation ##########
#### Please see Koop & Korobilis (2023) for further ####
#### details regarding the data & external forecasts ####
#########################################################
# Load Package
library("hdflex")
library("ggplot2")
library("cowplot")
########## Get Data ##########
# Load Package Data
<- inflation_data
inflation_data
# Set Target Variable
<- inflation_data[, 1]
y
# Set 'P-Signals'
<- inflation_data[, 2:442]
X
# Set 'F-Signals'
<- inflation_data[, 443:462]
Ext_F
# Get Dates and Number of Observations
<- rownames(inflation_data)
tdates <- length(tdates)
tlength
# First complete observation (no missing values)
<- which(complete.cases(inflation_data))[1]
first_complete
########## Rolling AR2-Benchmark ##########
# Set up matrix for predictions
<- matrix(NA, nrow = tlength,
benchmark ncol = 1, dimnames = list(tdates, "AR2"))
# Set Window-Size (15 years of quarterly data)
<- 15 * 4
window_size
# Time Sequence
<- seq(window_size, tlength - 1)
t_seq
# Loop with rolling window
for (t in t_seq) {
# Split Data for Training Train Data
<- cbind(int = 1, X[(t - window_size + 1):t, 1:2])
x_train <- y[(t - window_size + 1):t]
y_train
# Split Data for Prediction
<- cbind(int = 1, X[t + 1, 1:2, drop = FALSE])
x_pred
# Fit AR-Model
<- .lm.fit(x_train, y_train)
model_ar
# Predict and store in benchmark matrix
+ 1, ] <- x_pred %*% model_ar$coefficients
benchmark[t
}
########## STSC ##########
### Part 1: TVC-Function
# Set TV-C-Parameter
<- 5 * 4
init <- c(0.90, 0.95, 1.00)
lambda_grid <- c(0.94, 0.96, 0.98)
kappa_grid <- TRUE
bias
# Apply TVC-Function
<- hdflex::tvc(y,
tvc_results
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias)
# Assign TVC-Results
<- tvc_results$Forecasts$Point_Forecasts
forecast_tvc <- tvc_results$Forecasts$Variance_Forecasts
variance_tvc
# First complete forecast period (no missing values)
<- seq(which(complete.cases(forecast_tvc))[1], tlength)
sub_period
### Part 2: DSC-Function
# Set DSC-Parameter
<- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
gamma_grid 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
<- c(1:100, sapply(1:4, function(i) floor(i * ncol(forecast_tvc) / 4)))
psi_grid <- 0.95
delta <- (init / 2) + 1
burn_in <- 1
burn_in_dsc <- 5
metric <- TRUE
equal_weight <- NULL
incl
# Apply DSC-Function
<- hdflex::dsc(y[sub_period],
dsc_results drop = FALSE],
forecast_tvc[sub_period, , drop = FALSE],
variance_tvc[sub_period, ,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,NULL)
# Assign DSC-Results
<- dsc_results$Forecasts$Point_Forecasts
pred_stsc <- dsc_results$Forecasts$Variance_Forecasts
var_stsc
########## Evaluation ##########
# Define Evaluation Period (OOS-Period)
<- which(tdates[sub_period] >= "1991-04-01" & tdates[sub_period] <= "2021-12-01")
eval_period
# Get Evaluation Summary for STSC
<- summary(obj = dsc_results, eval_period = eval_period)
eval_results
# Calculate (Mean-)Squared-Errors for AR2-Benchmark
<- y[sub_period][eval_period]
oos_y <- benchmark[sub_period[eval_period], , drop = FALSE]
oos_benchmark <- (oos_y - oos_benchmark)^2
se_ar2 <- mean(se_ar2)
mse_ar2
# Create Cumulative Squared Error Differences (CSSED) Plot
<- cumsum(se_ar2 - eval_results$MSE[[2]])
cssed <- ggplot(
plot_cssed data.frame(eval_period, cssed),
aes(x = eval_period, y = cssed)
+
) geom_line() +
ylim(-0.0008, 0.0008) +
ggtitle("Cumulative Squared Error Differences") +
xlab("Time Index") +
ylab("CSSED") +
geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") +
theme_minimal(base_size = 15) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill = NA),
axis.ticks = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5)
)
# Show Plots
options(repr.plot.width = 15, repr.plot.height = 15)
<- eval_results$Plots
plots_list <- c(list(plot_cssed), plots_list)
plots_list ::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv")
cowplot
# Relative MSE
print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4)))
Philipp Adämmer, Sven Lehmann and Rainer Schüssler
GPL (>= 2)