# Targets pipeline template for converting SAS registers to Parquet.
# Generated by running `fastreg::use_targets_template()`.
#
# Setup:
#
# 1. Set the `input_dir` and `output_dir` under "Configuration" below.
# 2. Run `targets::tar_make()` (in the same directory) to convert
#    registers to Parquet.
#
# Note: this pipeline re-converts all files on every `tar_make()` call by
# deleting files in the output directory before converting. The main benefit of
# targets here is parallel execution across workers.
#
# For more information on targets, see https://books.ropensci.org/targets/

library(targets)

# Configuration ----------------------------------------------------------------

config <- list(
  # Path to locate SAS files in.
  input_dir = "/path/to/register/sas/files/directory",
  # Path to output Parquet files in. Parquet files will be located in
  # subdirectories of this directory.
  output_dir = "/path/to/output/directory"
)

# Check input directory.
if (!dir.exists(config$input_dir)) {
  cli::cli_abort(
    message = "Input directory does not exist: {config$input_dir}"
  )
}

# Target options ---------------------------------------------------------------

tar_option_set(
  packages = c("fs", "fastreg"),
  format = "qs",
  # Set controller with max 10 workers run as local R processes, launching
  # when there's work to do and exiting after 60 seconds if there's no task to
  # run.
  # NOTE: 10 workers might be too many for some systems.
  controller = crew::crew_controller_local(
    workers = 10,
    seconds_idle = 60
  ),
  # Delegate data management to the parallel crew workers.
  storage = "worker",
  retrieval = "worker",

  # Remove data from the R environment as soon as it's no longer needed. But
  # computer memory is not freed until garbage collection is run.
  memory = "transient",
  # Run gc() every 10th active target, both locally and on each parallel worker.
  garbage_collection = 10
)

# Pipeline ---------------------------------------------------------------------

list(
  tar_target(
    name = sas_paths,
    command = list_sas_files(config$input_dir),
    deployment = "main"
  ),

  # Empty output directory before writing to avoid outdated Parquet files.
  # Runs on every `tar_make()` call (mode = "always") to ensure a clean slate.
  tar_target(
    name = output_dir,
    command = {
      if (fs::dir_exists(config$output_dir)) {
        fs::dir_delete(config$output_dir)
      }
      fs::dir_create(config$output_dir)
      config$output_dir
    },
    deployment = "main",
    cue = tar_cue(mode = "always")
  ),

  # Convert each SAS file in parallel. mode = "always" is required because
  # `output_dir` returns the same path string on every run, so targets would
  # otherwise consider this target up-to-date and skip it despite the output
  # directory having been cleaned.
  tar_target(
    name = parquet_files,
    command = convert_file(path = sas_paths, output_dir = output_dir),
    pattern = map(sas_paths),
    cue = tar_cue(mode = "always")
  )
)
