README

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

tidyOhdsiSolutions

tidyOhdsiSolutions is a lightweight R package of utilities for working with OMOP CDM data in the OHDSI ecosystem. It is intentionally dependency-light: the only hard runtime dependency beyond base R is jsonlite.

Installation

Area	What it does
Functional helpers	Base-R reimplementations of `purrr` functions (`map`, `walk`, `imap`, `pluck`, …) — no `purrr` dependency
Concept set builders	Convert plain `data.frame`s into CIRCE concept set expression lists
SQL generators	Build SQL to resolve concept sets against an OMOP vocabulary schema — no Java / `CirceR` required
Cohort builders	Create CirceR-compatible cohort definition objects programmatically

# install.packages("remotes")
remotes::install_github("<owner>/tidyOhdsiSolutions")

Usage

library(tidyOhdsiSolutions)

1 — Convert a data.frame to a concept set expression

concepts <- data.frame(
  concept_id       = c(201826L, 442793L),
  concept_name     = c("Type 2 diabetes mellitus", "Type 1 diabetes mellitus"),
  domain_id        = "Condition",
  vocabulary_id    = "SNOMED",
  concept_class_id = "Clinical Finding",
  standard_concept = "S",
  concept_code     = c("44054006", "46635009"),
  invalid_reason   = "V",
  excluded         = FALSE,
  descendants      = TRUE,
  mapped           = FALSE
)

cs_expr <- toConceptSet(concepts, name = "Diabetes")
str(cs_expr, max.level = 2)
#> List of 1
#>  $ items:List of 2
#>   ..$ :List of 4
#>   ..$ :List of 4

cs_list <- toConceptSets(
  list(
    diabetes     = concepts,
    hypertension = data.frame(concept_id = 316866L)
  )
)
names(cs_list)
#> [1] "diabetes"     "hypertension"

2 — Generate concept-set SQL

sql <- buildConceptSetQuery(cs_expr, vocabularyDatabaseSchema = "cdm")
cat(sql)
#> select distinct I.concept_id FROM
#> ( 
#>   select concept_id from cdm.CONCEPT where (concept_id in (201826,442793))
#> UNION
#>   select c.concept_id
#>   from cdm.CONCEPT c
#>   join cdm.CONCEPT_ANCESTOR ca on c.concept_id = ca.descendant_concept_id
#>   WHERE c.invalid_reason is null
#>   and (ca.ancestor_concept_id in (201826,442793))
#> ) I

sql_list <- buildConceptSetQueries(cs_list, vocabularyDatabaseSchema = "cdm")

3 — Build a cohort definition (no Java / CirceR needed)

Single concept set

cohort <- createConceptSetCohort(
  conceptSetExpression = cs_expr,
  name                 = "Diabetes Cohort",
  limit                = "first",
  requiredObservation  = c(365L, 0L),
  end                  = "observation_period_end_date"
)

# Serialise to CirceR-compatible JSON
json <- cohortToJson(cohort)
cat(substr(json, 1, 300))
#> {
#>   "ConceptSets": [
#>     {
#>       "id": 0,
#>       "name": "Diabetes Cohort",
#>       "expression": {
#>         "items": [
#>           {
#>             "concept": {
#>               "CONCEPT_ID": 201826,
#>               "CONCEPT_NAME": "Type 2 diabetes mellitus",
#>               "STANDARD_CONCEPT": "S",
#>

Multiple concept sets

cohortFromConceptSet() accepts a named list of concept set expressions and builds a single cohort with all of them:

drug_df <- data.frame(
  concept_id   = 1503297L,
  concept_name = "Metformin",
  domain_id    = "Drug",
  vocabulary_id = "RxNorm",
  standard_concept = "S",
  descendants  = TRUE
)

multi_cs <- toConceptSets(list(
  diabetes  = concepts,
  metformin = drug_df
))

multi_cohort <- cohortFromConceptSet(
  conceptSetList      = multi_cs,
  limit               = "earliest",
  requiredObservation = c(365L, 0L),
  end                 = "observation_period_end_date"
)

# Each concept set gets its own id
vapply(multi_cohort$ConceptSets, `[[`, character(1), "name")
#> [1] "diabetes"  "metformin"

End-strategy variants

# Continuous drug era
cohort_drug <- createConceptSetCohort(
  cs_expr,
  end     = "drug_exit",
  endArgs = list(persistenceWindow = 30, surveillanceWindow = 0)
)

# Fixed offset from index
cohort_fixed <- createConceptSetCohort(
  cs_expr,
  end     = "fixed_exit",
  endArgs = list(index = "startDate", offsetDays = 365)
)

4 — Extract concept sets from an existing cohort definition

# cohort_def is a list produced by e.g. CirceR::cohortExpressionFromJson()
concept_sets <- collectCsFromCohort(cohort_def)
# Returns a named list keyed by lowerCamelCase concept set names

5 — Functional helpers (purrr-compatible, no purrr)

# map / map_chr / map_dbl / map_int / map_lgl
tidyOhdsiSolutions:::map(1:4, ~ .x^2)
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 4
#> 
#> [[3]]
#> [1] 9
#> 
#> [[4]]
#> [1] 16

# map2
tidyOhdsiSolutions:::map2_chr(c("hello", "foo"), c("world", "bar"), paste)
#>         hello           foo 
#> "hello world"     "foo bar"

# pluck — safely extract from nested structures
nested <- list(a = list(b = list(c = 42)))
tidyOhdsiSolutions:::pluck(nested, "a", "b", "c")
#> [1] 42
tidyOhdsiSolutions:::pluck(nested, "a", "missing", .default = 0)
#> [1] 0

# walk — side-effects only, returns .x invisibly
tidyOhdsiSolutions:::walk(1:3, ~ message("item ", .x))
#> item 1
#> item 2
#> item 3

# imap — index-aware map
tidyOhdsiSolutions:::imap(c(a = 10, b = 20), ~ paste(.y, "=", .x))
#> $a
#> [1] "a = 10"
#> 
#> $b
#> [1] "b = 20"

Supported OMOP domains

createConceptSetCohort(), cohortFromConceptSet(), and buildConceptSetQuery() support the following domains:

Condition, Drug, Procedure, Observation, Measurement, Visit, Device

Key design decisions

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.