The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Data management and disproportionality

Main (data management and disproportionality) script template

You may want to organize your research folder using small scripts, each designed for a part of your analysis plan.

Inspired by Reproducible Research in R

The main script might be located in scripts, under the name “main.R”.

If you haven’t, visit vignette("getting_started"), vignette("basic_workflow") and vignette("descriptive") for details on what this template does.

Paths

You may create a separate script with paths, that you would call at the beginning of your main script.

library(vigicaen)
library(rlang)
library(here) # if you like the here syntax for file paths
library(dplyr)
# #### PATHS #### ####

path_base   <- "../vigibase_ecl/main"

# #### IMPORT #### ####

demo     <- dt_parquet(path_base, "demo", in_memory = FALSE)
adr      <- dt_parquet(path_base, "adr",  in_memory = FALSE)
drug     <- dt_parquet(path_base, "drug", in_memory = FALSE)
link     <- dt_parquet(path_base, "link", in_memory = FALSE)
out      <- dt_parquet(path_base, "out",  in_memory = FALSE)
srce     <- dt_parquet(path_base, "srce", in_memory = FALSE)
followup <- dt_parquet(path_base, "followup", in_memory = FALSE)

suspdup  <- dt_parquet(path_base, "suspdup", in_memory = FALSE)

source("00_dict.R") # from template_dictionary

Demo

# ---- Deduplicating ---- ####

demo <- demo |> 
  filter(!(UMCReportId %in% suspdup$SuspectedduplicateReportId))

# ---- Drugs ---- ####

# From a list of drugs

demo <-
  demo |>
  add_drug(
    d_code = d_drecno,
    drug_data = drug
  )

# From ATC

demo <-
  demo |>
  add_drug(
    d_code = atc_drecno,
    drug_data = drug
  )

# ---- Reactions ---- ####

demo <-
  demo |>
  add_adr(
    a_code = a_llt,
    adr_data = adr
  )

# ---- Demographics ---- ####

# Age, sex

demo <-
  demo |>
  mutate(
    age = cut(as.integer(AgeGroup),
              breaks = c(0,4,5,6,7,8),
              include.lowest = TRUE, right = TRUE,
              labels = c("<18", "18-45","45-64", "65-74", "75+")),

    sex = case_when(Gender == "1" ~ 1,
                    Gender == "2" ~ 2,
                    Gender %in% c("-","0","9") ~ NA_real_,
                    TRUE ~ NA_real_)
  )

# Death + outcome availability

demo <- 
  demo |> 
  mutate(death = 
           ifelse(UMCReportId %in% out$UMCReportId,
                  UMCReportId %in% 
                    (out |> 
                    filter(Seriousness == "1") |> 
                    pull(UMCReportId)
                    ),
                  NA)
         )

# follow-up, seriousness

demo <-
  demo |>
  mutate(
    fup = if_else(UMCReportId %in% followup$UMCReportId, 1, 0),
    serious = 
      ifelse(
        UMCReportId %in% out$UMCReportId,
        UMCReportId %in% 
          (out |> 
          filter(Serious == "Y") |> 
          pull(UMCReportId)
          ),
        NA)
  )

# year

demo <- 
  demo |> 
  mutate(
    year = as.numeric(substr(FirstDateDatabase, start = 1, stop = 4))
    )

# type of reporter

demo <-
  demo |>
  left_join(
    srce |> transmute(UMCReportId, type_reporter = Type),
    by = "UMCReportId")

# explicit multi-level vars

demo <-
  demo |> 
  mutate(
    Type = factor(Type, levels = c("1", "2", "3", "4", "5")),
    type_reporter = factor(type_reporter,
                           levels = c("1", "2", "3", "4", "5")),
    Region = factor(Region, levels = c("1", "2", "3", "4", "5", "6"))
  )

levels(demo$Type) <-
  c("Spontaneous",                     
     "Report from study",                        
     "Other",   
     "Not available to sender (unknown)",   
     "PMS/Special monitoring")

levels(demo$type_reporter) <-
  c("Physician",
    "Pharmacist",
    "Other Health Professional",
    "Lawyer",
    "Consumer or other non health professional")

levels(demo$Region) <-
  c("African Region",                                    
    "Region of the Americas",                            
    "South-East Asia Region",                            
    "European Region",                                   
    "Eastern Mediterranean Region",                      
    "Western Pacific Region"  
  )

# ---- Check ---- ####

demo |>
  check_dm(cols = c(names(d_drecno), names(a_llt), "fup"))
#>               [,1]
#> ipilimumab      86
#> atezolizumab    69
#> durvalumab      68
#> nivolumab      225
#> pembrolizumab  298
#> avelumab        43
#> cemiplimab      27
#> tremelimumab    27
#> a_embolism       9
#> a_colitis      104
#> a_pneumonitis  103
#> fup            292

Basic models

# ---- Bivariate ---- ####

rb <- 
  demo |> 
  compute_dispro(
    y = names(a_llt),
    x = names(d_drecno)
  )

# remove the hashes to save your results
## write.csv2(rb, here("outputs", "rb.csv"), row.names = FALSE)

# ---- Multivariate ---- ####

mod <-
  glm(a_colitis ~ nivolumab + age + sex,
      family = "binomial",
      data = demo)

rm <-
  summary(mod)$coefficients |>
  compute_or_mod(
    estimate = Estimate,
    std_er = Std..Error
    )

# remove the hashes to save your results
## write.csv2(rm, here("outputs", "rm.csv"), row.names = FALSE)

Basic descriptive

# ---- General description ---- ####

r_desc <- 
  demo |> 
  desc_facvar(
    vf = c("age", "sex",
           "type_reporter",
           "Type",
           "year",
           names(d_drecno),
           names(a_llt),
           "serious", "death"),
    ncat_max = 20
  )

# ---- Time to onset ---- ####

r_tto <-
  desc_tto(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    ) 

# ---- Dechallenge ---- ####

r_dch <-
  desc_dch(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    )

# ---- Rechallenge ---- ####

r_rch <-
  desc_rch(
    link,
    demo_data = demo,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
  )

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.