The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

user_sample_3

Prerequisites

Installation for user as described in README

Steps

  1. adapt the data_dir (in windows e.g. //de-inf-008/$/Eigene Dateien/ubair-master/data/)
library(ubair)
library(ggplot2)
# set data_dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)
  1. Set sample variables.
target <- "NO2"
stations <- list(
  Lünen = "DENW006", Bottrop = "DENW021",
  Köln = "DENW212"
)
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")

# dates for 9 Euro ticket effect
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time

buffer <- 0 # number of data points to be ignored before effect

trend <- "linear"
model_type <- "lightgbm"

window_size <- 14 # days of data to calculate the mean in prediction results
  1. create a params.yaml. Either
params <- load_params()
# adapt params programatically
params$target <- target
params$meteo_variables <- meteo_variables
  1. Prepare data of each station for training and run counterfactual. See function documentation for further details.
results_all <- data.table::data.table()
for (station_name in names(stations)) {
  env_data <- clean_data(data, station = stations[station_name])
  dt_prepared <- prepare_data_for_modelling(env_data, params)
  dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
  split_data <- split_data_counterfactual(
    dt_prepared,
    application_start = application_start,
    application_end = application_end
  )
  res <- run_counterfactual(split_data,
    params,
    detrending_function = trend,
    model_type = model_type,
    alpha = 0.9,
    log_transform = TRUE
  )
  predictions <- res$prediction
  predictions[, station_name := station_name]
  predictions[, station := stations[station_name]]
  predictions[, model := model_type]
  predictions[, trend := trend]
  results_all <- rbind(results_all, predictions)
}
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000316 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000346 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60133, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000315 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1552
#> [LightGBM] [Info] Number of data points in the train set: 60494, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
  1. Plot bias (prediction - observation) to compare multiple stations
results_all[, bias := prediction - value]
results_all[, station_label := paste0(station_name, " (", station, ")")]
results_all[, station := as.character(unlist(station))]
results_all[, smoothed_bias := data.table::frollmean(bias,
  n = 24 * window_size,
  align = "center"
),
by = station
]
bias_plot <- ggplot2::ggplot(
  results_all,
  ggplot2::aes(
    x = date,
    y = smoothed_bias,
    color = station_label,
    group = station_label
  )
) +
  geom_line() +
  labs(
    title = paste("NO2 Bias Over Time by Station (", window_size, "-day mean)"),
    x = "Date",
    y = "NO2 Bias \n(prediction - observation) [µg/m³]",
    color = "Station"
  ) +
  ggplot2::geom_vline(
    xintercept = date_effect_start, linetype = 4,
    colour = "black"
  ) +
  ggplot2::theme_bw() +
  ggplot2::scale_x_datetime(
    date_minor_breaks = "1 month",
    date_breaks = "2 month"
  ) +
  ggplot2::theme(legend.position = "bottom")
bias_plot
#> Warning: Removed 1005 rows containing missing values or values outside the scale range
#> (`geom_line()`).
plot of chunk bias_plot
plot of chunk bias_plot
  1. Plot counterfactual for Köln.
station_name_plot <- "Köln"
counterfactual_plot <- plot_counterfactual(
  dplyr::filter(results_all, station_name == station_name_plot), params,
  window_size = window_size,
  date_effect_start,
  buffer = buffer
)
counterfactual_plot

plot of chunk counterfactual_plot_3 ```

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.