The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Forecast Combination

Nickalus Redell

2020-05-05

Purpose

The purpose of this vignette is to illustrate the various approaches in forecsatML for producing final forecasts that are (a) a combination of short- and long-term forecasts as well as (b) a combination of many ML models at select forecast horizons.

The goal of forecastML::combine_forecasts() is to provide maximum flexibility when producing a single forecast that is expected to perform as well in the near-term as it is in the long-term.

Forecast Combination by Horizon

Load Packages & Data

# library(forecastML)
library(dplyr)
library(ggplot2)
library(glmnet)

data("data_seatbelts", package = "forecastML")
data <- data_seatbelts

1: One Model Training Function


horizons <- c(1, 3, 6, 9, 12)
data_train <- forecastML::create_lagged_df(data_seatbelts, type = "train", method = "direct",
                                           outcome_col = 1, lookback = 1:15, horizon = horizons)

windows <- forecastML::create_windows(data_train, window_length = 0)

model_fun <- function(data) {
  x <- as.matrix(data[, -1, drop = FALSE])
  y <- as.matrix(data[, 1, drop = FALSE])
  set.seed(1)
  model <- glmnet::cv.glmnet(x, y, nfolds = 5)
}

model_results <- forecastML::train_model(data_train, windows, model_name = "LASSO", model_function = model_fun)

prediction_fun <- function(model, data_features) {
  data_pred <- data.frame("y_pred" = predict(model, as.matrix(data_features)),
                          "y_pred_lower" = predict(model, as.matrix(data_features)) - 30,
                          "y_pred_upper" = predict(model, as.matrix(data_features)) + 30)
}

data_forecast <- forecastML::create_lagged_df(data_seatbelts, type = "forecast", method = "direct",
                                              outcome_col = 1, lookback = 1:15, horizon = horizons)

data_forecasts <- predict(model_results, prediction_function = list(prediction_fun), data = data_forecast)

data_forecasts <- forecastML::combine_forecasts(data_forecasts, type = "horizon")

plot(data_forecasts, data_actual = data_seatbelts[-(1:170), ], actual_indices = (1:nrow(data_seatbelts))[-(1:170)])


2: Multiple Model Training Functions


# LASSO

horizons <- c(1, 3, 6)
data_train <- forecastML::create_lagged_df(data_seatbelts, type = "train", method = "direct",
                                           outcome_col = 1, lookback = 1:15, horizon = horizons)

windows <- forecastML::create_windows(data_train, window_length = 0)

model_fun_lasso <- function(data) {
  x <- as.matrix(data[, -1, drop = FALSE])
  y <- as.matrix(data[, 1, drop = FALSE])
  set.seed(1)
  model <- glmnet::cv.glmnet(x, y, alpha = 1, nfolds = 5)
}

model_results <- forecastML::train_model(data_train, windows, model_name = "LASSO", model_function = model_fun_lasso)

prediction_fun <- function(model, data_features) {
  data_pred <- data.frame("y_pred" = predict(model, as.matrix(data_features)),
                          "y_pred_lower" = predict(model, as.matrix(data_features)) - 30,
                          "y_pred_upper" = predict(model, as.matrix(data_features)) + 30)
}

data_forecast <- forecastML::create_lagged_df(data_seatbelts, type = "forecast", method = "direct",
                                              outcome_col = 1, lookback = 1:15, horizon = horizons)

data_forecasts_lasso <- predict(model_results, prediction_function = list(prediction_fun), data = data_forecast)
#------------------------------------------------------------------------------
# Ridge

horizons <- c(9, 12)
data_train <- forecastML::create_lagged_df(data_seatbelts, type = "train", method = "direct",
                                           outcome_col = 1, lookback = 1:15, horizon = horizons)

windows <- forecastML::create_windows(data_train, window_length = 0)

model_fun_ridge <- function(data) {
  x <- as.matrix(data[, -1, drop = FALSE])
  y <- as.matrix(data[, 1, drop = FALSE])
  set.seed(1)
  model <- glmnet::cv.glmnet(x, y, alpha = 0, nfolds = 5)
}

model_results <- forecastML::train_model(data_train, windows, model_name = "Ridge", model_function = model_fun_ridge)

prediction_fun <- function(model, data_features) {
  data_pred <- data.frame("y_pred" = predict(model, as.matrix(data_features)),
                          "y_pred_lower" = predict(model, as.matrix(data_features)) - 30,
                          "y_pred_upper" = predict(model, as.matrix(data_features)) + 30)
}

data_forecast <- forecastML::create_lagged_df(data_seatbelts, type = "forecast", method = "direct",
                                              outcome_col = 1, lookback = 1:15, horizon = horizons)

data_forecasts_ridge <- predict(model_results, prediction_function = list(prediction_fun), data = data_forecast)
#------------------------------------------------------------------------------
# Forecast combination.

data_forecasts <- forecastML::combine_forecasts(data_forecasts_lasso, data_forecasts_ridge, type = "horizon")

plot(data_forecasts, data_actual = data_seatbelts[-(1:170), ], actual_indices = (1:nrow(data_seatbelts))[-(1:170)])


3: Multiple Model Training Functions - Aggregation


# LASSO

horizons <- c(1, 3, 6, 9, 12)
data_train <- forecastML::create_lagged_df(data_seatbelts, type = "train", method = "direct",
                                           outcome_col = 1, lookback = 1:15, horizon = horizons)

windows <- forecastML::create_windows(data_train, window_length = 0)

model_fun_lasso <- function(data) {
  x <- as.matrix(data[, -1, drop = FALSE])
  y <- as.matrix(data[, 1, drop = FALSE])
  set.seed(1)
  model <- glmnet::cv.glmnet(x, y, alpha = 1, nfolds = 5)
}

model_results <- forecastML::train_model(data_train, windows, model_name = "LASSO", model_function = model_fun_lasso)

prediction_fun <- function(model, data_features) {
  data_pred <- data.frame("y_pred" = predict(model, as.matrix(data_features)),
                          "y_pred_lower" = predict(model, as.matrix(data_features)) - 30,
                          "y_pred_upper" = predict(model, as.matrix(data_features)) + 30)
}

data_forecast <- forecastML::create_lagged_df(data_seatbelts, type = "forecast", method = "direct",
                                              outcome_col = 1, lookback = 1:15, horizon = horizons)

data_forecasts_lasso <- predict(model_results, prediction_function = list(prediction_fun), data = data_forecast)
#------------------------------------------------------------------------------
# Ridge

horizons <- c(1, 3, 6, 9, 12)
data_train <- forecastML::create_lagged_df(data_seatbelts, type = "train", method = "direct",
                                           outcome_col = 1, lookback = 1:15, horizon = horizons)

windows <- forecastML::create_windows(data_train, window_length = 0)

model_fun_ridge <- function(data) {
  x <- as.matrix(data[, -1, drop = FALSE])
  y <- as.matrix(data[, 1, drop = FALSE])
  set.seed(1)
  model <- glmnet::cv.glmnet(x, y, alpha = 0, nfolds = 5)
}

model_results <- forecastML::train_model(data_train, windows, model_name = "Ridge", model_function = model_fun_ridge)

prediction_fun <- function(model, data_features) {
  data_pred <- data.frame("y_pred" = predict(model, as.matrix(data_features)),
                          "y_pred_lower" = predict(model, as.matrix(data_features)) - 30,
                          "y_pred_upper" = predict(model, as.matrix(data_features)) + 30)
}

data_forecast <- forecastML::create_lagged_df(data_seatbelts, type = "forecast", method = "direct",
                                              outcome_col = 1, lookback = 1:15, horizon = horizons)

data_forecasts_ridge <- predict(model_results, prediction_function = list(prediction_fun), data = data_forecast)
#------------------------------------------------------------------------------
# Forecast combination.

data_forecasts <- forecastML::combine_forecasts(data_forecasts_lasso, data_forecasts_ridge,
                                                type = "horizon", aggregate = stats::median)

plot(data_forecasts, data_actual = data_seatbelts[-(1:170), ], actual_indices = (1:nrow(data_seatbelts))[-(1:170)])


These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.