The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Installation for user as described in README
library(ubair)
library(ggplot2)
# set data_dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)
target <- "NO2"
stations <- list(
Lünen = "DENW006", Bottrop = "DENW021",
Köln = "DENW212"
)
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")
# dates for 9 Euro ticket effect
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
buffer <- 0 # number of data points to be ignored before effect
trend <- "linear"
model_type <- "lightgbm"
window_size <- 14 # days of data to calculate the mean in prediction results
params <- load_params()
# adapt params programatically
params$target <- target
params$meteo_variables <- meteo_variables
results_all <- data.table::data.table()
for (station_name in names(stations)) {
env_data <- clean_data(data, station = stations[station_name])
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared,
application_start = application_start,
application_end = application_end
)
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = TRUE
)
predictions <- res$prediction
predictions[, station_name := station_name]
predictions[, station := stations[station_name]]
predictions[, model := model_type]
predictions[, trend := trend]
results_all <- rbind(results_all, predictions)
}
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000316 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000346 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60133, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000315 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1552
#> [LightGBM] [Info] Number of data points in the train set: 60494, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
results_all[, bias := prediction - value]
results_all[, station_label := paste0(station_name, " (", station, ")")]
results_all[, station := as.character(unlist(station))]
results_all[, smoothed_bias := data.table::frollmean(bias,
n = 24 * window_size,
align = "center"
),
by = station
]
bias_plot <- ggplot2::ggplot(
results_all,
ggplot2::aes(
x = date,
y = smoothed_bias,
color = station_label,
group = station_label
)
) +
geom_line() +
labs(
title = paste("NO2 Bias Over Time by Station (", window_size, "-day mean)"),
x = "Date",
y = "NO2 Bias \n(prediction - observation) [µg/m³]",
color = "Station"
) +
ggplot2::geom_vline(
xintercept = date_effect_start, linetype = 4,
colour = "black"
) +
ggplot2::theme_bw() +
ggplot2::scale_x_datetime(
date_minor_breaks = "1 month",
date_breaks = "2 month"
) +
ggplot2::theme(legend.position = "bottom")
bias_plot
#> Warning: Removed 1005 rows containing missing values or values outside the scale range
#> (`geom_line()`).
station_name_plot <- "Köln"
counterfactual_plot <- plot_counterfactual(
dplyr::filter(results_all, station_name == station_name_plot), params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
counterfactual_plot
```
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.