The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Run multiple stations and models for 9 euro ticket
sample_name <- "NeunEuroTicket"
target <- "NO2"
stations <- list(Luenen = "DENW006", AachenBurtscheid = "DENW094")
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
buffer <- 0 # number of data points to be ignored before effect
trend <- "linear"
# hyperparameters can be set in params/params.yaml
model_types <- c("lightgbm", "rf", "dynamic_regression", "fnn")
window_size <- 14 # days of data to calculate the mean in prediction results
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)
params <- load_params()
params$target <- target
params$meteo_variables <- meteo_variables
for (station_name in names(stations)) {
station <- stations[[station_name]]
predictions_all <- data.table::data.table()
metrics_all <- data.table::data.table()
env_data <- clean_data(data, station = station)
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared,
application_start = application_start,
application_end = application_end
)
for (model_type in model_types) {
message(paste("start training:", station_name, station, model_type))
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = FALSE
)
predictions <- data.table::copy(res$prediction)
# plot
bau_plot <- plot_counterfactual(predictions, params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
# evaluation
metrics <- round(calc_performance_metrics(predictions,
date_effect_start,
buffer = buffer
), 2)
effect <- estimate_effect_size(predictions,
date_effect_start,
buffer = buffer,
verbose = FALSE
)
metrics["effect_size"] <- effect["absolute_effect"]
metrics["relative_effect"] <- effect["relative_effect"]
# add information for export
metrics["model"] <- model_type
metrics["trend"] <- trend
metrics["station_name"] <- station_name
metrics["station"] <- station
metrics["buffer_start"] <- format(
date_effect_start - as.difftime(buffer, units = "hours"),
"%Y-%m-%d"
)
metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
metrics_dt <- data.table::as.data.table(t(metrics))
metrics_all <- rbind(metrics_all, metrics_dt)
predictions[, station := station]
predictions[, model := model_type]
predictions[, trend := trend]
predictions_all <- rbind(predictions_all, predictions)
}
# save predictions (hourly data) and metrics
predictions_save <- dplyr::select(
predictions_all,
c(
date,
value,
prediction,
prediction_lower,
prediction_upper,
station,
model,
trend
)
)
predictions_save$date <- format(predictions_save$date, "%Y-%m-%d %H:%M")
}
#> start training: Luenen DENW006 lightgbm
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000319 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
#> start training: Luenen DENW006 rf
#> start training: Luenen DENW006 dynamic_regression
#> Using data for dynamic regression training from 2021-01-22 01:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: Luenen DENW006 fnn
#> start training: AachenBurtscheid DENW094 lightgbm
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031247 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60039, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> start training: AachenBurtscheid DENW094 rf
#> start training: AachenBurtscheid DENW094 dynamic_regression
#> Using data for dynamic regression training from 2021-01-10 04:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: AachenBurtscheid DENW094 fnn
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.