The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(ubair)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zooeffect_title <- "neun_euro_ticket"
model_types <- c("rf", "lightgbm", "dynamic_regression") # , "fnn")
station <- "DESN025"
station_name <- "Leipzig-Mitte"
window_size <- 14
trend <- "linear"
# set dates
training_start <- lubridate::ymd("20100101")
training_end <- lubridate::ymd("20220301")
application_start <- lubridate::ymd("20220301")
application_end <- lubridate::ymd("20220831")
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
buffer <- 0# load data
params <- load_params()
env_data <- sample_data_DESN025
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared, application_start,
application_end
)predictions_all <- data.table::data.table()
metrics_all <- data.table::data.table()
for (model_type in model_types) {
print(paste("model = ", model_type))
print(paste("trend = ", trend))
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = TRUE
)
counterfactual_plot <- plot_counterfactual(res$prediction, params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
print(counterfactual_plot)
ggplot2::ggsave(paste0(model_type, "_", station, effect_title, ".png"),
plot = counterfactual_plot, width = 12, height = 4
)
# evaluation
metrics <- round(
calc_performance_metrics(res$prediction,
date_effect_start,
buffer = buffer
),
2
)
metrics["effect_size"] <- estimate_effect_size(res$prediction,
date_effect_start,
buffer = buffer,
verbose = FALSE
)["absolute_effect"]
print(metrics["effect_size"])
metrics["model"] <- model_type
metrics["trend"] <- trend
metrics["station"] <- station
metrics["station_name"] <- station_name
metrics["buffer_start"] <- format(
date_effect_start - as.difftime(buffer,
units = "hours"
),
"%Y-%m-%d"
)
metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
metrics_dt <- data.table::as.data.table(t(metrics))
predictions <- data.table::copy(res$prediction)
predictions[, station := station]
predictions[, model := model_type]
predictions[, trend := trend]
predictions_all <- rbind(predictions_all, predictions)
metrics_all <- rbind(metrics_all, metrics_dt)
}
#> [1] "model = rf"
#> [1] "trend = linear"#> $effect_size
#> [1] -3.054844
#>
#> [1] "model = lightgbm"
#> [1] "trend = linear"
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002633 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1557
#> [LightGBM] [Info] Number of data points in the train set: 103965, number of used features: 9
#> [LightGBM] [Info] Start training from score 0.000000
#> $effect_size
#> [1] -3.058521
#>
#> [1] "model = dynamic_regression"
#> [1] "trend = linear"
#> Using data for dynamic regression training from 2021-02-12 05:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> $effect_size
#> [1] -2.50006
predictions_save <- dplyr::select(
predictions_all,
c(
date,
value,
prediction,
prediction_lower,
prediction_upper,
station, model,
trend
)
)
predictions_save[, date := as.Date(date)]
# Calculate the window_size rolling mean on these daily aggregated values
predictions_save[, (names(predictions_save)[sapply(
predictions_save,
is.numeric
)]) :=
lapply(.SD, function(x) {
data.table::frollmean(x,
n = window_size * 24,
align = "center",
na.rm = TRUE
)
}),
by = .(model, trend),
.SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
# reduce to daily mean for demonstrator export
daily_data <- predictions_save[, lapply(.SD, mean, na.rm = TRUE),
by = .(model, trend, date),
.SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
data.table::fwrite(daily_data, file = paste0(
"preds_NO2_",
effect_title, "_",
station_name,
window_size,
"d_mean.csv"
))
data.table::fwrite(metrics_all, file = paste0(
"metrics_NO2_",
effect_title,
"_",
station_name,
".csv"
))These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.