The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

user_sample_2

Run multiple stations and models for 9 euro ticket

  1. Adapt directory path!
# set the dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
  1. Adapt this part based on the effect/target/stations that is investigated
sample_name <- "NeunEuroTicket"
target <- "NO2"
stations <- list(Luenen = "DENW006", AachenBurtscheid = "DENW094")
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")

application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
buffer <- 0 # number of data points to be ignored before effect

trend <- "linear"
# hyperparameters can be set in params/params.yaml
model_types <- c("lightgbm", "rf", "dynamic_regression", "fnn")

window_size <- 14 # days of data to calculate the mean in prediction results
  1. Load data and train models. This part does not necessarily need to be changed.
library(ubair)
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)

params <- load_params()
params$target <- target
params$meteo_variables <- meteo_variables
for (station_name in names(stations)) {
  station <- stations[[station_name]]
  predictions_all <- data.table::data.table()
  metrics_all <- data.table::data.table()
  env_data <- clean_data(data, station = station)
  dt_prepared <- prepare_data_for_modelling(env_data, params)
  dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
  split_data <- split_data_counterfactual(
    dt_prepared,
    application_start = application_start,
    application_end = application_end
  )
  for (model_type in model_types) {
    message(paste("start training:", station_name, station, model_type))
    res <- run_counterfactual(split_data,
                              params,
                              detrending_function = trend,
                              model_type = model_type,
                              alpha = 0.9,
                              log_transform = FALSE
    )
    predictions <- data.table::copy(res$prediction)

    # plot
    bau_plot <- plot_counterfactual(predictions, params,
                                    window_size = window_size,
                                    date_effect_start,
                                    buffer = buffer
    )
    # evaluation
    metrics <- round(calc_performance_metrics(predictions,
                                              date_effect_start,
                                              buffer = buffer
    ), 2)

    effect <- estimate_effect_size(predictions,
                                   date_effect_start,
                                   buffer = buffer,
                                   verbose = FALSE
    )
    metrics["effect_size"] <- effect["absolute_effect"]
    metrics["relative_effect"] <- effect["relative_effect"]
    # add information for export
    metrics["model"] <- model_type
    metrics["trend"] <- trend
    metrics["station_name"] <- station_name
    metrics["station"] <- station
    metrics["buffer_start"] <- format(
      date_effect_start - as.difftime(buffer, units = "hours"),
      "%Y-%m-%d"
    )
    metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
    metrics_dt <- data.table::as.data.table(t(metrics))
    metrics_all <- rbind(metrics_all, metrics_dt)
    predictions[, station := station]
    predictions[, model := model_type]
    predictions[, trend := trend]
    predictions_all <- rbind(predictions_all, predictions)
  }

  # save predictions (hourly data) and metrics
  predictions_save <- dplyr::select(
    predictions_all,
    c(
      date,
      value,
      prediction,
      prediction_lower,
      prediction_upper,
      station,
      model,
      trend
    )
  )
  predictions_save$date <- format(predictions_save$date, "%Y-%m-%d %H:%M")
}
#> start training: Luenen DENW006 lightgbm
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000319 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
#> start training: Luenen DENW006 rf
#> start training: Luenen DENW006 dynamic_regression
#> Using data for dynamic regression training from  2021-01-22 01:00:00 to  2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: Luenen DENW006 fnn
#> start training: AachenBurtscheid DENW094 lightgbm
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031247 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60039, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> start training: AachenBurtscheid DENW094 rf
#> start training: AachenBurtscheid DENW094 dynamic_regression
#> Using data for dynamic regression training from  2021-01-10 04:00:00 to  2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: AachenBurtscheid DENW094 fnn

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.