The hdflex
package implements the
“Signal-Transformed-Subset-Combination” (STSC) forecasting algorithm
developed by Adämmer, Lehmann,
and Schüssler (2025). Please cite the paper when using this
package.
The package provides three core functions:
stsc()
: Directly applies the complete STSC forecasting
algorithm described in Adämmer, Lehmann,
and Schüssler (2025).tvc()
: Transforms predictive signals into univariate
density forecasts using time-varying coefficient (TVC) models. Each
model generates a conditionally Gaussian predictive density for each
signal at each point in time. This function performs the first part of
the STSC algorithm.dsc()
: Dynamically selects a subset of candidate
forecast models for each period based on their past density forecast
accuracy. This function performs the second part of the STSC
algorithm.Install the released version of hdflex
from CRAN:
install.packages("hdflex")
Alternatively, install the development version from GitHub:
# install.packages("devtools")
::install_github("https://github.com/lehmasve/hdflex") devtools
Compiler Requirements: The package involves C++ source code compilation during installation. Ensure you have the necessary compilers:
The following examples demonstrate how to forecast quarterly U.S. inflation. For further details on the data and external forecasts, please see Koop & Korobilis (2023).
stsc()
functionThis example shows how to apply the STSC algorithm directly using the
stsc()
function.
#########################################################
######### Forecasting quarterly U.S. inflation ##########
#### Please see Koop & Korobilis (2023) for further ####
#### details regarding the data & external forecasts ####
#########################################################
# Load Package
library("hdflex")
library("ggplot2")
library("cowplot")
########## Get Data ##########
# Load Package Data
<- inflation_data
inflation_data
# Set Target Variable
<- inflation_data[, 1]
y
# Set 'P-Signals'
<- inflation_data[, 2:442]
X
# Set 'F-Signals'
<- inflation_data[, 443:462]
Ext_F
# Get Dates and Number of Observations
<- rownames(inflation_data)
tdates <- length(tdates)
tlength
# First complete observation (no missing values)
<- which(complete.cases(inflation_data))[1]
first_complete
########## Rolling AR2-Benchmark ##########
# Set up matrix for predictions
<- matrix(NA, nrow = tlength,
benchmark ncol = 1, dimnames = list(tdates, "AR2"))
# Set Window-Size (15 years of quarterly data)
<- 15 * 4
window_size
# Time Sequence
<- seq(window_size, tlength - 1)
t_seq
# Loop with rolling window
for (t in t_seq) {
# Split Data for Training Train Data
<- cbind(int = 1, X[(t - window_size + 1):t, 1:2])
x_train <- y[(t - window_size + 1):t]
y_train
# Split Data for Prediction
<- cbind(int = 1, X[t + 1, 1:2, drop = FALSE])
x_pred
# Fit AR-Model
<- .lm.fit(x_train, y_train)
model_ar
# Predict and store in benchmark matrix
+ 1, ] <- x_pred %*% model_ar$coefficients
benchmark[t
}
########## STSC ##########
# Set TV-C-Parameter
<- 5 * 4
init <- c(0.90, 0.95, 1.00)
lambda_grid <- c(0.94, 0.96, 0.98)
kappa_grid <- TRUE
bias
# Set DSC-Parameter
<- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
gamma_grid 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
<- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
n_tvc <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4)))
psi_grid <- 0.95
delta <- first_complete + init / 2
burn_in <- 1
burn_in_dsc <- 5
metric <- TRUE
equal_weight <- NULL
incl <- FALSE
parallel <- NULL
n_threads
# Apply STSC-Function
<- hdflex::stsc(y,
results
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,
parallel,
n_threads,NULL)
########## Evaluation ##########
# Define Evaluation Period (OOS-Period)
<- which(tdates >= "1991-04-01" & tdates <= "2021-12-01")
eval_period
# Apply Evaluation Summary for STSC
<- summary(obj = results, eval_period = eval_period)
eval_results
# Calculate (Mean-)Squared-Errors for AR2-Benchmark
<- (y[eval_period] - benchmark[eval_period, 1])^2
se_ar2 <- mean(se_ar2)
mse_ar2
# Create Cumulative Squared Error Differences (CSSED) Plot
<- cumsum(se_ar2 - eval_results$MSE[[2]])
cssed <- ggplot(
plot_cssed data.frame(eval_period, cssed),
aes(x = eval_period, y = cssed)
+
) geom_line() +
ylim(-0.0008, 0.0008) +
ggtitle("Cumulative Squared Error Differences") +
xlab("Time Index") +
ylab("CSSED") +
geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") +
theme_minimal(base_size = 15) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill = NA),
axis.ticks = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5)
)
# Show Plots
options(repr.plot.width = 15, repr.plot.height = 15)
<- eval_results$Plots
plots_list <- c(list(plot_cssed), plots_list)
plots_list ::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv")
cowplot
# Relative MSE
print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4)))
tvc()
and dsc()
functions
separatelyThis example demonstrates the two-step application of the STSC
algorithm, first using tvc()
to generate individual model
forecasts and then dsc()
to combine them.
#########################################################
######### Forecasting quarterly U.S. inflation ##########
#### Please see Koop & Korobilis (2023) for further ####
#### details regarding the data & external forecasts ####
#########################################################
# Load Package
library("hdflex")
library("ggplot2")
library("cowplot")
########## Get Data ##########
# Load Package Data
<- inflation_data
inflation_data
# Set Target Variable
<- inflation_data[, 1]
y
# Set 'P-Signals'
<- inflation_data[, 2:442]
X
# Set 'F-Signals'
<- inflation_data[, 443:462]
Ext_F
# Get Dates and Number of Observations
<- rownames(inflation_data)
tdates <- length(tdates)
tlength
# First complete observation (no missing values)
<- which(complete.cases(inflation_data))[1]
first_complete
########## Rolling AR2-Benchmark ##########
# Set up matrix for predictions
<- matrix(NA, nrow = tlength,
benchmark ncol = 1, dimnames = list(tdates, "AR2"))
# Set Window-Size (15 years of quarterly data)
<- 15 * 4
window_size
# Time Sequence
<- seq(window_size, tlength - 1)
t_seq
# Loop with rolling window
for (t in t_seq) {
# Split Data for Training Train Data
<- cbind(int = 1, X[(t - window_size + 1):t, 1:2])
x_train <- y[(t - window_size + 1):t]
y_train
# Split Data for Prediction
<- cbind(int = 1, X[t + 1, 1:2, drop = FALSE])
x_pred
# Fit AR-Model
<- .lm.fit(x_train, y_train)
model_ar
# Predict and store in benchmark matrix
+ 1, ] <- x_pred %*% model_ar$coefficients
benchmark[t
}
########## STSC ##########
### Part 1: TVC-Function
# Set TV-C-Parameter
<- 5 * 4
init <- c(0.90, 0.95, 1.00)
lambda_grid <- c(0.94, 0.96, 0.98)
kappa_grid <- TRUE
bias
# Apply TVC-Function
<- hdflex::tvc(y,
tvc_results
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias)
# Assign TVC-Results
<- tvc_results$Forecasts$Point_Forecasts
forecast_tvc <- tvc_results$Forecasts$Variance_Forecasts
variance_tvc
# First complete forecast period (no missing values)
<- seq(which(complete.cases(forecast_tvc))[1], tlength)
sub_period
### Part 2: DSC-Function
# Set DSC-Parameter
<- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
gamma_grid 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
<- c(1:100, sapply(1:4, function(i) floor(i * ncol(forecast_tvc) / 4)))
psi_grid <- 0.95
delta <- (init / 2) + 1
burn_in <- 1
burn_in_dsc <- 5
metric <- TRUE
equal_weight <- NULL
incl
# Apply DSC-Function
<- hdflex::dsc(y[sub_period],
dsc_results drop = FALSE],
forecast_tvc[sub_period, , drop = FALSE],
variance_tvc[sub_period, ,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,NULL)
# Assign DSC-Results
<- dsc_results$Forecasts$Point_Forecasts
pred_stsc <- dsc_results$Forecasts$Variance_Forecasts
var_stsc
########## Evaluation ##########
# Define Evaluation Period (OOS-Period)
<- which(tdates[sub_period] >= "1991-04-01" & tdates[sub_period] <= "2021-12-01")
eval_period
# Get Evaluation Summary for STSC
<- summary(obj = dsc_results, eval_period = eval_period)
eval_results
# Calculate (Mean-)Squared-Errors for AR2-Benchmark
<- y[sub_period][eval_period]
oos_y <- benchmark[sub_period[eval_period], , drop = FALSE]
oos_benchmark <- (oos_y - oos_benchmark)^2
se_ar2 <- mean(se_ar2)
mse_ar2
# Create Cumulative Squared Error Differences (CSSED) Plot
<- cumsum(se_ar2 - eval_results$MSE[[2]])
cssed <- ggplot(
plot_cssed data.frame(eval_period, cssed),
aes(x = eval_period, y = cssed)
+
) geom_line() +
ylim(-0.0008, 0.0008) +
ggtitle("Cumulative Squared Error Differences") +
xlab("Time Index") +
ylab("CSSED") +
geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") +
theme_minimal(base_size = 15) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill = NA),
axis.ticks = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5)
)
# Show Plots
options(repr.plot.width = 15, repr.plot.height = 15)
<- eval_results$Plots
plots_list <- c(list(plot_cssed), plots_list)
plots_list ::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv")
cowplot
# Relative MSE
print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4)))
hdflex
with rpy2
This section illustrates how to utilize the hdflex
R
package within a Python environment using the rpy2
library.
rpy2
acts as an interface to R, enabling you to execute R
functions and manipulate R objects directly from Python. The example
replicates the U.S. inflation forecasting task shown previously.
Prerequisites:
Ensure the following are installed to run this example:
hdflex
package.rpy2
,
pandas
, and matplotlib
. Install them via pip:
bash pip install rpy2 pandas matplotlib
hdflex
,
ggplot2
, and cowplot
. Install them in your R
environment:
R install.packages(c("hdflex", "ggplot2", "cowplot"))
### Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.linalg import lstsq
from rpy2 import robjects
from rpy2.robjects import pandas2ri, numpy2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.lib import grdevices
from IPython.display import display, Image
# Import R packages
= importr("hdflex")
hdflex = importr("ggplot2")
ggplot2 = importr("cowplot")
cowplot = importr("base")
base
#########################################################
######### Forecasting quarterly U.S. inflation ##########
#### Please see Koop & Korobilis (2023) for further ####
#### details regarding the data & external forecasts ####
#########################################################
########## Get Data ##########
# Load Package Data
= robjects.r["inflation_data"]
inflation_r
with localconverter(robjects.default_converter + pandas2ri.converter):
= robjects.conversion.rpy2py(inflation_r)
tmp
= list(robjects.r["rownames"](inflation_r))
row_names = list(robjects.r["colnames"](inflation_r))
col_names = pd.DataFrame(tmp, index=row_names, columns=col_names)
inflation_df = pd.to_datetime(inflation_df.index)
inflation_df.index
# Set Target Variable, P-Signals and F-Signals
= inflation_df.iloc[:, 0].to_numpy()
y = inflation_df.iloc[:, 1:443].to_numpy()
X = inflation_df.iloc[:, 443:463].to_numpy()
Ext_F
# Dates, Observations and First Complete Row
= len(inflation_df)
tlength = inflation_df.index
tdates = np.where(~inflation_df.isna().any(axis=1))[0][0]
first_complete
########## Rolling AR2-Benchmark ##########
# Set up matrix for predictions
= np.full(tlength, np.nan)
benchmark
# Set Window Size (15 years, quarterly data)
= 15 * 4
window_size
# Time Sequence
for t in range(window_size - 1, tlength - 1):
= np.column_stack((
x_train
np.ones(window_size),- window_size + 1 : t + 1, :2]
X[t
))= y[t - window_size + 1 : t + 1]
y_train
# OLS coefficients
*_ = lstsq(x_train, y_train, rcond=None)
beta,
# 1-step-ahead prediction
= np.concatenate(([1.0], X[t + 1, :2]))
x_pred + 1] = x_pred @ beta
benchmark[t
########## STSC ##########
# Convert Python numpy arrays to R objects for hdflex
with localconverter(robjects.default_converter + numpy2ri.converter):
= robjects.conversion.py2rpy(y)
y_r = robjects.conversion.py2rpy(X)
X_r = robjects.conversion.py2rpy(Ext_F)
Ext_F_r
# Set TV-C-Parameter
= 5 * 4
init = robjects.FloatVector([0.90, 0.95, 1.00])
lambda_grid = robjects.FloatVector([0.94, 0.96, 0.98])
kappa_grid = True
bias
# Set DSC-Parameter
= robjects.FloatVector(
gamma_grid 0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
[0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00]
)= (X.shape[1] + Ext_F.shape[1]) * len(lambda_grid) * len(kappa_grid)
n_tvc = list(range(1, 101)) + [int(i * n_tvc / 4) for i in range(1, 5)]
psi_vec = robjects.IntVector(psi_vec)
psi_grid = 0.95
delta = int(first_complete + init / 2)
burn_in = 1
burn_in_dsc = 5
metric = True
equal_weight = robjects.NULL
incl = False
parallel = robjects.NULL
n_threads
# Apply STSC-Function
= hdflex.stsc(
results
y_r, X_r, Ext_F_r,
init,
lambda_grid, kappa_grid, bias,
gamma_grid, psi_grid, delta,
burn_in, burn_in_dsc,
metric, equal_weight,
incl, parallel, n_threads, robjects.NULL
)
########## Evaluation ##########
# Define Evaluation Period (OOS-Period)
= np.where(
eval_idx >= "1991-04-01") & (tdates <= "2021-12-01")
(tdates 0]
)[
# Apply Evaluation Summary for STSC
= robjects.r["summary"]
summary = summary(results, eval_period = robjects.IntVector(eval_idx + 1))
eval_res
with localconverter(robjects.default_converter + pandas2ri.converter):
= robjects.conversion.rpy2py(eval_res.rx2("MSE"))
mse_vec = mse_vec[0].item()
mse_stsc
# Calculate MSE for AR2-Benchmark
= (y[eval_idx] - benchmark[eval_idx])**2
se_ar2 = np.nanmean(se_ar2)
mse_ar2
# Print MSE values
print(f"Relative MSE (STSC / AR2 benchmark): {mse_stsc / mse_ar2:.3f}")
# Create Cumulative Squared Error Differences (CSSED) Plot
= np.cumsum(se_ar2 - np.asarray(mse_vec[1], dtype=float))
cssed =(10, 6))
plt.figure(figsize="black")
plt.plot(tdates[eval_idx], cssed, color-0.0008, 0.0008)
plt.ylim("Cumulative Squared-Error Differences (AR2 – STSC)")
plt.title("CSSED")
plt.ylabel("Time Index", fontsize=12)
plt.xlabel(0, linestyle="--", lw=1)
plt.axhline(False)
plt.grid(
plt.tight_layout()
plt.show()
# Retrieve ggplot objects from R
= eval_res.rx2("Plots")
plots for p in plots:
with grdevices.render_to_bytesio(
=6, height=4, units="in", res=150) as img:
grdevices.png, width
ggplot2.print_ggplot(p) display(Image(img.getvalue()))
Philipp Adämmer, Sven Lehmann and Rainer Schüssler.
GPL (>= 2)