The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Getting Started

puremoe provides a unified interface to PubMed and NLM data. Search with search_pubmed(), then retrieve data from any of five endpoints with get_records().

library(puremoe)
library(dplyr)
library(DT)

Abstracts

abstracts <- puremoe::get_records(
  pmids_sub,
  endpoint = "pubmed_abstracts",
  cores    = 1L,
  sleep    = 0.5
)

abstracts <- abstracts |> mutate(pmid = as.character(pmid))
abstracts |>
  select(pmid, year, journal, articletitle) |>
  DT::datatable(rownames = FALSE)

The annotations column is a list of per-article data frames containing MeSH terms, chemical names, and keywords.

bind_rows(abstracts$annotations) |>
  head(20) |>
  DT::datatable(rownames = FALSE)

Affiliations

affiliations <- puremoe::get_records(
  head(pmids_sub, 25L),
  endpoint = "pubmed_affiliations",
  cores    = 1L,
  sleep    = 0.5
)

affiliations |>
  DT::datatable(rownames = FALSE)

iCite metrics

icites <- puremoe::get_records(
  pmids_sub,
  endpoint = "icites",
  cores    = 1L,
  sleep    = 0.25
)

icites |>
  mutate(pmid = as.character(pmid)) |>
  select(-citation_net, -cited_by_clin) |>
  DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))

PubTator annotations

pubtations <- puremoe::get_records(
  head(pmids_sub, 30L),
  endpoint = "pubtations",
  cores    = 1L
)

pubtations |>
  DT::datatable(rownames = FALSE)

Full text

Full-text retrieval requires open-access PMC articles. pmid_to_ftp() resolves PMIDs to XML URLs via the PMC Cloud Service on AWS S3, filtering to only those with open-access full text available. In August 2026, NCBI will complete its migration from the legacy PMC FTP Service to the Cloud Service; puremoe already uses the new service.

ftp <- puremoe::pmid_to_ftp(pmids = pmids_sub)
ftp |> DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))
fulltext <- puremoe::get_records(
  head(ftp$url, 2L),
  endpoint = "pmc_fulltext",
  cores    = 1L
)

fulltext |>
  mutate(text = sapply(strsplit(text, "\\s+"), function(w) paste0(paste(head(w, 15), collapse = " "), "..."))) |>
  slice(1:5) |>
  DT::datatable(rownames = FALSE, options = list(scrollX = TRUE))

Endpoint schemas

endpoint_info() returns column definitions, rate limits, and notes for any endpoint.

puremoe::endpoint_info()
#> [1] "pubmed_abstracts"    "pubmed_affiliations" "icites"             
#> [4] "pubtations"          "pmc_fulltext"
puremoe::endpoint_info("icites")
#> $description
#> [1] "NIH iCite citation metrics and influence scores"
#> 
#> $returns
#> [1] "data.frame"
#> 
#> $columns
#> $columns$pmid
#> [1] "PubMed ID - join key to link with pubmed_abstracts (character)"
#> 
#> $columns$citation_count
#> [1] "Total citations received (integer)"
#> 
#> $columns$relative_citation_ratio
#> [1] "RCR: field-adjusted citation rate comparing to NIH baseline (numeric)"
#> 
#> $columns$nih_percentile
#> [1] "Percentile rank vs NIH-funded publications (numeric)"
#> 
#> $columns$field_citation_rate
#> [1] "Expected citation rate for article's co-citation field (numeric)"
#> 
#> $columns$is_research_article
#> [1] "Flag for primary research articles (logical)"
#> 
#> $columns$is_clinical
#> [1] "Flag for clinical articles (logical)"
#> 
#> $columns$provisional
#> [1] "Flag indicating RCR is provisional due to recent publication (logical)"
#> 
#> $columns$citation_net
#> [1] "Citation network edge list: 'from' and 'to' PMIDs within result set (list-column)"
#> 
#> $columns$cited_by_clin
#> [1] "PMIDs of clinical articles citing this paper (character/list)"
#> 
#> 
#> $parameters
#> $parameters$cores
#> [1] "parallel workers"
#> 
#> $parameters$sleep
#> [1] "delay between requests"
#> 
#> 
#> $rate_limit
#> [1] "Relatively permissive"
#> 
#> $notes
#> [1] "Join to pubmed_abstracts on pmid for complete metadata (title, journal, authors, etc. not included to avoid redundancy). citation_net enables intra-corpus network analysis."

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.