The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
The following script is used to create, compare and save cached data
to the data/
directory.
Starting in R 3.6.0
the default kind of under-the-hood
random-number generator was changed. Now, in order to get the results
from set.seed()
to match, you have to first call the
function RNGkind(sample.kind = "Rounding")
.
It throws the expected warning:
Warning: non-uniform 'Rounding' sampler used
Currently, when a random.cdisc.data
data-generating
function is created or modified, then the below code chunk must be run
to build the new/updated cached dataset and add it to the
data/
directory. If a dataset that is a dependency for
another dataset has been updated then the dependent dataset will also be
updated. To manually specify which datasets should be updated, edit the
data_to_update
vector below, entering the desired dataset
names.
Note: Prior to running the following code chunk,
please ensure that you have reinstalled the
random.cdisc.data
package after completing all dataset
modifications.
# Helper functions
#
flatten_list_of_deps <- function(updated_data, data_deps) {
# Get higher deps fnc
get_higher_deps <- function(cur_dep, data_deps) {
sapply(seq_along(data_deps), function(x) {
if (any(cur_dep %in% data_deps[[x]])) {
names(data_deps)[x]
}
})
}
# Get lower deps fnc
get_lower_deps <- function(cur_dep, data_deps) {
data_deps[sapply(cur_dep, function(x) which(x == names(data_deps)))]
}
# Sort data_deps
sort_data_deps <- function(upd_data, data_deps) {
iup <- upd_data
for (ud in upd_data) {
up <- unlist(get_lower_deps(ud, data_deps))
if (any(up %in% upd_data)) {
iup <- unique(unlist(c(up[up %in% upd_data], iup)))
}
}
iup
}
# Firstly, lets sort by dependencies the initial updated data
fin_up <- sort_data_deps(updated_data, data_deps)
# Extracting higher dependencies for each value
cnt <- 1
while (cnt <= length(fin_up)) {
cur_deps <- unlist(
get_higher_deps(fin_up[cnt], data_deps)
)
if (!is.null(cur_deps)) {
cur_deps <- sort_data_deps(cur_deps, data_deps)
fin_up <- unique(c(fin_up[seq_len(cnt)], cur_deps, fin_up[-seq_len(cnt)]))
}
cnt <- cnt + 1
}
fin_up
}
rebuild_cached_data.R
library(random.cdisc.data)
library(diffdf)
library(dplyr)
# Call function to match random number generation from previous R versions
RNGkind(sample.kind = "Rounding")
# Datasets must be listed after all of their dependencies
# e.g. adsl is a dependency for all other datasets so it is listed first.
pkg_dir <- dirname(getwd())
# Listing source files and extraction of datasets' names
src_files <- list.files(paste0(pkg_dir, "/R"))
data_nms <- src_files[grepl("^ra*", src_files)] %>%
stringr::str_remove(pattern = "^r") %>%
stringr::str_remove(pattern = ".R$") %>%
sort()
# Exception handling
data_nms <- data_nms[data_nms != "adsaftte"] # Unbuilt for now
# Construction of dependency tree based on formals
data_deps <- sapply(
data_nms,
function(x) {
dat_args <- names(formals(paste0("r", x)))
dat_args[dat_args %in% data_nms]
}
)
git_call <- "git diff origin/main --name-only"
updated_files <- tryCatch(
system(git_call, intern = TRUE),
error = function(e) e
)
status_uf <- attr(updated_files, "status")
if (is(updated_files, "error") || (!is.null(status_uf) && status_uf == 1)) {
message("Found following error in git call: ", git_call)
message(e)
message(
"The calculation continues as default by recreating all datasets ",
"and updating the cached data if any change is found."
)
updated_data <- data_nms
} else {
updated_data <- updated_files[grepl("^R\\/", updated_files)] %>%
stringr::str_remove("^R\\/") %>%
stringr::str_remove(pattern = "^r") %>%
stringr::str_remove(pattern = ".R$")
}
if (length(updated_data) != 0) {
stopifnot(all(updated_data %in% names(data_deps)))
data_to_update <- flatten_list_of_deps(updated_data, data_deps)
default_args <- list(seed = 1, na_vars = list(), who_coding = TRUE, percent = 80, number = 2)
# Generate and save updated cached datasets
for (dat in data_to_update) {
# Match arguments with defaults
dat_args <- default_args[names(default_args) %in% names(formals(paste0("r", dat)))]
# Get the data deps cache that is already there (if adsl returns list())
dat_deps <- lapply(data_deps[[dat]], function(x) get(paste0("c", x)))
# Main call to creation function
cdataset <- do.call(paste0("r", dat), c(dat_args, dat_deps))
# Preview differences
cat("\nSaving cached data for dataset", paste0("*", dat, "*"), "with the following changes found (diffdf):\n")
diff_test <- diffdf(get(paste0("c", dat)), cdataset)
print(diff_test)
# Check if there is any actual change to the data
if (length(diff_test) > 0) { # If no difference -> list()
# Save new cached dataset
assign(paste0("c", dat), cdataset)
fl_save <- paste0(dirname(getwd()), "/data/c", dat, ".RData")
attr(cdataset, "creation date") <- lubridate::date() # This should NOT be updated if no changes in diffdf
save(list = paste0("c", dat), file = fl_save, compress = "xz")
cat("Cached dataset updated for", paste0("*", dat, "*"), "in", paste0("data/", basename(fl_save), "."), "\n")
} else {
message("No update detected on the final data. No cached data was updated for *", dat, "*.")
}
}
} else {
message("No source files changed: no cached datasets currently require updates.")
}
rebuild_cached_data.R
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.