## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = any(dir.exists(c("working_example_data", "benchmark_data", "new_benchmark_data", "topic_data", "valid_data", "new_stage_data"))),
  comment = "#>",
  warning = FALSE,
  fig.width = 8,
  fig.height = 6
)

## ----results = FALSE, message=FALSE, warning=FALSE----------------------------
#install.packages("CiteSource")
library(CiteSource)

## -----------------------------------------------------------------------------
file_path <- "../vignettes/new_benchmark_data/"
citation_files <- list.files(path = file_path, pattern = "\\.ris", full.names = TRUE)
citation_files

## -----------------------------------------------------------------------------
imported_tbl <- tibble::tribble(
  ~files,              ~cite_sources,  ~cite_labels,  ~cite_strings,
  "benchmark_15.ris",  NA,             "benchmark",   NA,
  "search1_166.ris",   "WoS",          "search",      "string 1",
  "search2_278.ris",   "WoS",          "search",      "string 2",
  "search3_302.ris",   "WoS",          "search",      "string 3",
  "search4_460.ris",   "WoS",          "search",      "string 4",
  "search5_495.ris",   "WoS",          "search",      "string 5"
) |>
  dplyr::mutate(files = paste0(file_path, files))

raw_citations <- read_citations(metadata = imported_tbl, verbose = FALSE)

## -----------------------------------------------------------------------------
unique_citations <- dedup_citations(raw_citations)
n_unique         <- count_unique(unique_citations)

# Compare by string rather than source
string_comparison <- compare_sources(unique_citations, comp_type = "strings")

## -----------------------------------------------------------------------------
initial_records <- calculate_initial_records(unique_citations)
create_initial_record_table(initial_records)

## ----fig.alt="Upset plot showing overlap between five search string variations run in Web of Science."----
plot_source_overlap_upset(string_comparison, groups = "string", decreasing = c(TRUE, TRUE))

## -----------------------------------------------------------------------------
plot_source_overlap_heatmap(string_comparison, cells = "string")
plot_source_overlap_heatmap(string_comparison, cells = "string", plot_type = "percentages")

## -----------------------------------------------------------------------------
plot_contributions(n_unique, facets = cite_string, center = TRUE)

## -----------------------------------------------------------------------------
unique_citations |>
  dplyr::filter(stringr::str_detect(cite_label, "benchmark")) |>
  record_level_table(return = "DT")

## -----------------------------------------------------------------------------
detailed_records <- calculate_detailed_records(unique_citations, n_unique)
create_detailed_record_table(detailed_records)