Visualization of clinical data

Laure Cougnaud, Michela Pasetto

June 29, 2021

This vignette focuses on the visualizations available in the clinDataReview package.

We will use example data sets from the clinUtils package.

If you have doubts on the data format, please check first the vignette on data preprocessing available at: here.

If everything is clear on that side, let’s get started!

library(clinDataReview)
library(pander)
library(plotly)
library(clinUtils)

data(dataADaMCDISCP01)
labelVars <- attr(dataADaMCDISCP01, "labelVars")

varsLB <- c(
    "PARAM", "PARAMCD", "USUBJID", "TRTP", 
    "ADY", "VISITNUM", "VISIT", "LBSTRESN"
)
dataLB <- dataADaMCDISCP01$ADLBC[, varsLB]

varsAE <- c("USUBJID", "AESOC", "AEDECOD", "ASTDY", "AENDY", "AESEV")
dataAE <- dataADaMCDISCP01$ADAE[, varsAE]

varsDM <- c("RFSTDTC", "USUBJID")
dataDM <- dataADaMCDISCP01$ADSL[, varsDM]

1 Patient profiles

The interactive visualizations of the clinical data package include functionalities to link a plot to patient-specific report, e.g. patient profiles created with the patientProfilesVis package.

Such patient profiles can be created via a config file, with a dedicated template report available in the clinDataReview package.

A simple patient profile report for each subject in the example dataset is created below.

# create a directory to store the patient profiles:
patientProfilesDir <- "patientProfiles"
dir.create(patientProfilesDir)

# get examples of parameters for the report
configDir <- system.file("skeleton", "config", package = "clinDataReview")
params <- getParamsFromConfig(
    configDir = configDir, 
    configFile = "config-patientProfiles.yml"
)
# create patient profile with only one panel for the demo
params$patientProfilesParams <- params$patientProfilesParams[1]
# use dataset from the clinUtils package
params$pathDataFolder <- system.file("extdata", "cdiscpilot01", "SDTM", package = "clinUtils")
# store patient profile in this folder:
params$patientProfilePath <- patientProfilesDir

# create patient profiles
pathTemplate <- clinDataReview::getPathTemplate(params$template)
file.copy(from = pathTemplate, to = ".")
report <- rmarkdown::render(input = basename(pathTemplate), envir = new.env())

Please refer to the vignette about reporting for more details on how to set up a config file and use template reports available in the package.

You can directly skip to reporting vignette, which is available here or run in your console the command below.

vignette("clinDataReview-reporting", "clinDataReview")

2 Data visualization

All the visualizations available in the package are interactive.

2.1 Visualization of individual profiles

Visualization of individual profiles is available via the function scatterplotClinData.

2.1.1 Explore the visualization data

To facilitate the exploration of the data, the underlying data behind each visualization can be included as a table as well below the plot by setting the parameter table to TRUE.

Please note that this functionality is not demonstrated in this document to ensure a lightweight vignette in the package.

2.1.3 Spaghetti plot of time profile

labParam <- "ALT"
dataPlot <- subset(dataLB, PARAMCD == labParam)

visitLab <- with(dataPlot, tapply(ADY, VISIT, median))
names(visitLab) <- sub("-", "\n", names(visitLab))

# link to patient profiles
dataPlot$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataPlot$USUBJID), ".pdf"
)

scatterplotClinData(
    data = dataPlot, 
    xVar = "ADY",
    yVar = "LBSTRESN",
    aesPointVar = list(color = "TRTP", fill = "TRTP"),
    aesLineVar = list(group = "USUBJID", color = "TRTP"),
    hoverVars = c("USUBJID", "VISIT", "ADY", "LBSTRESN", "TRTP"),
    labelVars = labelVars,
    xPars = list(breaks = visitLab, labels = names(visitLab)),
    title = paste("Actual value of", 
        getLabelParamcd(
            paramcd = labParam, data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
        )
    ),
    # include link to patient profiles:
    pathVar = "patientProfilePath",
    table = FALSE, id = paste("subjectProfile", labParam, sep = "-"),
    verbose = TRUE
)

2.1.4 Scatterplot

# format data long -> wide format (one column per lab param)
dataPlot <- subset(dataLB, PARAMCD %in% c("ALT", "ALB"))
library(reshape2)
dataPlotWide <- dcast(
    data = dataPlot,
    formula = USUBJID + VISIT + VISITNUM ~ PARAMCD, 
    value.var = "LBSTRESN",
    fun.aggregate = mean
)

# link to patient profiles
dataPlotWide$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataPlotWide$USUBJID), ".pdf"
)

# scatterplot per visit
scatterplotClinData(
    data = dataPlotWide, 
    xVar = "ALT", yVar = "ALB",
    xLab = getLabelParamcd(
        paramcd = "ALT", data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
    ),
    yLab = getLabelParamcd(
        paramcd = "ALB", data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
    ),
    aesPointVar = list(color = "USUBJID", fill = "USUBJID"),
    facetPars = list(facets = ~ VISIT),
    labelVars = labelVars,
    pathVar = "patientProfilePath",
    table = FALSE,
    verbose = TRUE
)

2.1.5 eDish plot

dataALT <- subset(dataLB, PARAMCD == "ALT")
dataBILI <- subset(dataLB, PARAMCD == "BILI")

byVar <- c("USUBJID", "VISIT")

dataPlot <- merge(
    x = dataALT, y = dataBILI[, c(byVar, "LBSTRESN")], 
    by = c("USUBJID", "VISIT"), 
    suffixes = c(".ALT", ".BILI"),
    all = TRUE
)
labelVars[paste0("LBSTRESN.", c("ALT", "BILI"))] <-
    paste(
        "Actual value of", 
        getLabelParamcd(
            paramcd = c("ALT", "BILI"), data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
        )
    )

# link to patient profiles
dataPlot$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataPlot$USUBJID), ".pdf"
)

# scatterplot per visit
scatterplotClinData(
    data = dataPlot, 
    xVar = "LBSTRESN.ALT", yVar = "LBSTRESN.BILI",
    xLab = getLabelParamcd(
        paramcd = "ALT", data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
    ),
    yLab = getLabelParamcd(
        paramcd = "BILI", data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
    ),
    aesPointVar = list(color = "VISIT", fill = "VISIT"),
    xTrans = "log10", yTrans = "log10",
    hoverVars = c("USUBJID"),
    themePars = list(legend.position = "bottom"),
    labelVars = labelVars,
    table = FALSE, id = "eDish",
    pathVar = "patientProfilePath",
    verbose = TRUE
)

2.1.6 Visualization of time-intervals

Time-intervals are displayed with the timeProfileIntervalPlot function:

# link to patient profiles
dataAE$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataAE$USUBJID), ".pdf"
)
timeProfileIntervalPlot(
    data = dataAE,
    paramVar = "USUBJID",
    # time-variables
    timeStartVar = "ASTDY",
    timeEndVar = "ASTDY",
    colorVar = "AESEV",
    hoverVars = c("USUBJID", "AEDECOD", "ASTDY", "AENDY", "AESEV"),
    labelVars = labelVars,
    table = FALSE, pathVar = "patientProfilePath",
    tableVars = c("USUBJID", "AEDECOD", "ASTDY", "AENDY", "AESEV"),
    verbose = TRUE
)

By default, empty intervals are represented if the start/end time variables are missing. Missing start/end time can be imputed, or different symbols can be used to represent such cases:

# create variable to indicate status of start/end date
dataAE$AESTFLG <- ifelse(is.na(dataAE$ASTDY), "Missing start", "Complete")
dataAE$AEENFLG <- ifelse(is.na(dataAE$AENDY), "Missing end", "Complete")
shapePalette <- c(
    `Missing start` = "triangle-left", 
    `Complete` = "square-open", 
    `Missing end` = "triangle-right"
)

# 'simple'-imputation:
# if start is missing, 'Missing' symbol displayed at end interval
dataAE$AESTDYIMP <- with(dataAE, ifelse(is.na(ASTDY), AENDY, ASTDY))
# if end is missing, 'Missing' symbol displayed at start interval
dataAE$AEENDYIMP <- with(dataAE, ifelse(is.na(AENDY), ASTDY, AENDY))

timeProfileIntervalPlot(
    data = dataAE,
    paramVar = "USUBJID", 
    # time-variables
    timeStartVar = "AESTDYIMP", timeStartLab = "Start day",
    timeEndVar = "AEENDYIMP", timeEndLab = "End day",
    # shape variables
    timeStartShapeVar = "AESTFLG",
    timeStartShapeLab = "Status of start date",
    timeEndShapeVar = "AEENFLG",
    timeEndShapeLab = "Status of end date",
    shapePalette = shapePalette,
    hoverVars = c("USUBJID", "AEDECOD", "AESEV", "ASTDY", "AESTFLG", "AENDY", "AEENFLG"),
    labelVars = labelVars,
    table = FALSE, 
    tableVars = c("USUBJID", "AEDECOD", "AESEV", "ASTDY", "AESTFLG", "AENDY", "AEENFLG"),
    pathVar = "patientProfilePath"
)

2.2 Visualization of summary statistics

Summary statistics can also be visualized with the package, via different types of visualizations: sunburst, treemap and barplot.

These functions take as input a table of summary statistics, especially counts. Such table can e.g. computed with the inTextSummaryTable R package (see corresponding package vignette for more information).

2.2.2 Compute count statistics

In this example, counts of adverse events are extracted for each Primary System Organ Class and Dictionary-Derived Term. Besides the counts of the number of subjects, the paths to the patient profile report for each subgroup are extracted and combined.

# sunburst takes as input table with counts
library(inTextSummaryTable)

# total counts: Safety Analysis Set (patients with start date for the first treatment)
dataTotal <- subset(dataDM, RFSTDTC != "")

## patient profiles report

# add path in data
dataAE$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataAE$USUBJID), ".pdf"
)

# add link in data (for attached table)
dataAE$patientProfileLink <- with(dataAE,
    paste0(
        '<a href="', patientProfilePath, 
        '" target="_blank">', USUBJID, '</a>'
    )
)

# combine all paths across patients
# the paths should be collapsed with: ', '
statsExtraPP <- list(
    statPatientProfilePath = function(data) 
      toString(sort(unique(data$patientProfilePath))),
    statPatientProfileLink = function(data)
      toString(sort(unique(data$patientProfileLink)))
)

# get default counts + stats with subjects profiles path
statsPP <- c(
    getStats(type = "count-default"),
    list(
        patientProfilePath = quote(statPatientProfilePath),
        patientProfileLink = quote(statPatientProfileLink)
    )
)

# compute adverse event table
tableAE <- getSummaryStatisticsTable(
    
    data = dataAE,
    rowVar = c("AESOC", "AEDECOD"),
    dataTotal = dataTotal,
    rowOrder = "total",
    labelVars = labelVars,
    
    # plotly treemap requires records (rows) for each group
    rowVarTotalInclude = "AEDECOD",
    
    ## DT-output specific:
    outputType = "data.frame-base",
    # statistics of interest
    # for DT output, include columns with patients
    stats = statsPP, 
    # add extra 'statistic': concatenate subject IDs
    statsExtra = statsExtraPP

)
pander(head(tableAE),
    caption = paste("Extract of the Adverse Event summary table",
        "used for the sunburst and barplot visualization"
    )
)
Extract of the Adverse Event summary table used for the sunburst and barplot visualization (continued below)
AESOC AEDECOD isTotal statN
CARDIAC DISORDERS MYOCARDIAL INFARCTION FALSE 1
GASTROINTESTINAL DISORDERS DYSPEPSIA FALSE 1
GASTROINTESTINAL DISORDERS NAUSEA FALSE 2
GENERAL DISORDERS AND ADMINISTRATION SITE CONDITIONS APPLICATION SITE DERMATITIS FALSE 1
GENERAL DISORDERS AND ADMINISTRATION SITE CONDITIONS APPLICATION SITE ERYTHEMA FALSE 3
GENERAL DISORDERS AND ADMINISTRATION SITE CONDITIONS APPLICATION SITE IRRITATION FALSE 2
Table continues below
statm statPatientProfilePath
1 patientProfiles/subjectProfile-01-710-1083.pdf
1 patientProfiles/subjectProfile-01-701-1148.pdf
7 patientProfiles/subjectProfile-01-718-1371.pdf, patientProfiles/subjectProfile-01-718-1427.pdf
2 patientProfiles/subjectProfile-01-718-1427.pdf
3 patientProfiles/subjectProfile-01-701-1148.pdf, patientProfiles/subjectProfile-01-701-1192.pdf, patientProfiles/subjectProfile-01-701-1211.pdf
4 patientProfiles/subjectProfile-01-701-1192.pdf, patientProfiles/subjectProfile-01-718-1371.pdf
Table continues below
statPatientProfileLink statPercTotalN
01-710-1083 7
01-701-1148 7
01-718-1371, 01-718-1427 7
01-718-1427 7
01-701-1148, 01-701-1192, 01-701-1211 7
01-701-1192, 01-718-1371 7
Table continues below
statPercN n % patientProfilePath
14.29 1 14.3 patientProfiles/subjectProfile-01-710-1083.pdf
14.29 1 14.3 patientProfiles/subjectProfile-01-701-1148.pdf
28.57 2 28.6 patientProfiles/subjectProfile-01-718-1371.pdf, patientProfiles/subjectProfile-01-718-1427.pdf
14.29 1 14.3 patientProfiles/subjectProfile-01-718-1427.pdf
42.86 3 42.9 patientProfiles/subjectProfile-01-701-1148.pdf, patientProfiles/subjectProfile-01-701-1192.pdf, patientProfiles/subjectProfile-01-701-1211.pdf
28.57 2 28.6 patientProfiles/subjectProfile-01-701-1192.pdf, patientProfiles/subjectProfile-01-718-1371.pdf
patientProfileLink
01-710-1083
01-701-1148
01-718-1371, 01-718-1427
01-718-1427
01-701-1148, 01-701-1192, 01-701-1211
01-701-1192, 01-718-1371

2.2.3 Sunburst

The sunburstClinData function visualizes the counts of hierarchical data in nested circles.

The different groups are visualized from the biggest class (root node) in the center of the visualization to the smallest sub-groups (leaves) on the outside of the circles.

The size of the different segments is relative the respective counts.

dataSunburst <- tableAE

dataSunburst$n <- as.numeric(dataSunburst$n)

sunburstClinData(
    data = dataSunburst,
    vars = c("AESOC", "AEDECOD"),
    valueVar = "n", valueLab = "Number of patients with adverse events",
    pathVar = "patientProfileLink",
    pathLab = clinUtils::getLabelVar(var = "USUBJID", labelVars = labelVars),
    table = FALSE,
    verbose = TRUE,
    labelVars = labelVars
)

2.2.4 Treemap

A treemap visualizes the counts of the hierarchical data in nested rectangles. The area of each rectangle is proportional to the counts of the respective group.

Note, that a treemap can also be colored accordingly to a meaningful variable. For instance, if we show adverse events, we might color the plot by severity. This can be achieved with the colorVar parameter.

dataTreemap <- tableAE

dataTreemap$n <- as.numeric(dataTreemap$n)

treemapClinData(
    data = dataTreemap,
    vars = c("AESOC", "AEDECOD"),
    valueVar = "n",
    valueLab = "Number of patients with adverse events",
    pathVar = "patientProfileLink",
    pathLab = clinUtils::getLabelVar(var = "USUBJID", labelVars = labelVars),
    table = FALSE,
    verbose = TRUE,
    labelVars = labelVars
)

2.2.5 Barplot

A barplot visualizes the counts for one single variable in a specific order.

dataPlot <- subset(tableAE, AEDECOD != "Total")

dataPlot$n <- as.numeric(dataPlot$n)

# create plot
barplotClinData(
    data = dataPlot,
    xVar = "AESOC", colorVar = "AEDECOD",
    yVar = "n",
    yLab = "Number of patients with adverse events",
    labelVars = labelVars,
    pathVar = "patientProfileLink",
    pathLab = clinUtils::getLabelVar(var = "USUBJID", labelVars = labelVars),
    table = FALSE,
    verbose = TRUE,
    barmode = "stack"
)

2.3 Multiple visualizations in a loop

To include multiple clinical data visualizations (with or without attached table) in a loop (in the same Rmarkdown chunk), the list of visualizations should be passed to the knitPrintListObjects function of the clinUtils package.

# consider only restricted set of lab parameters
dataPlot <- subset(dataLB, PARAMCD %in% c("SODIUM", "K"))

# link to patient profiles
dataPlot$patientProfilePath <- paste0(
    "patientProfiles/subjectProfile-", 
    sub("/", "-", dataPlot$USUBJID), ".pdf"
)

# 1) create plot+table for each laboratory parameter:
library(plyr) # for ddply
plotsLab <- dlply(dataPlot, "PARAMCD", function(dataLBParam){
      
      paramcd <- unique(dataLBParam$PARAMCD)
      
      scatterplotClinData(
          data = dataLBParam, 
          xVar = "ADY",
          yVar = "LBSTRESN",
          aesPointVar = list(color = "TRTP"),
          aesLineVar = list(group = "USUBJID", color = "TRTP"),
          labelVars = labelVars,
          title = paste("Actual value of", 
              getLabelParamcd(
                  paramcd = paramcd, data = dataLBParam, paramcdVar = "PARAMCD", paramVar = "PARAM"
              )
          ),
          # include link to patient profiles:
          pathVar = "patientProfilePath",
          table = FALSE, 
          # important: each plot should have an unique ID!
          # for unique relationship of interactivity between plot <-> table
          id = paste("labProfileLoop", paramcd, sep = "-"),
          verbose = TRUE
      )
      
    })

# include this output in the report:
listLabels <- getLabelParamcd(
    paramcd = names(plotsLab), data = dataLB, paramcdVar = "PARAMCD", paramVar = "PARAM"
)
clinUtils::knitPrintListObjects(
    xList = plotsLab, 
    titles = listLabels, titleLevel = 4
)

2.3.0.1 Potassium (mmol/L)

2.3.0.2 Sodium (mmol/L)

3 Palettes

3.1 Set palette for the entire session

Palette for the colors and shapes associated with specific variables can be set for all clinical data visualizations at once by setting the clinDataReview.colors and clinDataReview.shapes options at the start of the R session.

Please see the clinUtils package for the default colors and shapes.

# display default palettes
colorsDefault <- getOption("clinDataReview.colors")
str(colorsDefault)
## function (n, alpha = 1, begin = 0, end = 1, direction = 1, option = "D")
shapesDefault <- getOption("clinDataReview.shapes")
str(shapesDefault)
##  int [1:24] 21 22 23 24 25 0 1 2 3 4 ...
timeProfileIntervalPlot(
    data = dataAE,
    paramVar = "USUBJID",
    # time-variables
    timeStartVar = "ASTDY",
    timeEndVar = "AENDY",
    colorVar = "AESEV",
    timeStartShapeVar = "AESTFLG",
    timeEndShapeVar = "AEENFLG",
    labelVars = labelVars
)

The palettes can be set for all visualizations, e.g. at the start of the R session, with:

# change palettes for the entire R session
options(clinDataReview.colors = c("gold", "pink", "cyan"))
options(clinDataReview.shapes = clinShapes)

In case the palette contains less elements than available in the data, these are replicated.

timeProfileIntervalPlot(
    data = dataAE,
    paramVar = "USUBJID",
    # time-variables
    timeStartVar = "ASTDY",
    timeEndVar = "AENDY",
    colorVar = "AESEV",
    timeStartShapeVar = "AESTFLG",
    timeEndShapeVar = "AEENFLG",
    labelVars = labelVars
)

Palettes are reset to the default patient profiles palettes at the start of a new R session, or by setting:

# change palettes for the entire R session
options(clinDataReview.colors = colorsDefault)
options(clinDataReview.shapes = shapesDefault)

4 Appendix

4.1 Session info

R version 4.1.0 (2021-05-18)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_GB.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_GB.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_GB.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_GB.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: plyr(v.1.8.6), inTextSummaryTable(v.3.0.0), reshape2(v.1.4.4), patientProfilesVis(v.2.0.0), clinUtils(v.0.0.1), plotly(v.4.9.3), ggplot2(v.3.3.3), pander(v.0.6.3), clinDataReview(v.1.0.0) and knitr(v.1.33)

loaded via a namespace (and not attached): httr(v.1.4.2), sass(v.0.4.0), tidyr(v.1.1.3), jsonlite(v.1.7.2), viridisLite(v.0.4.0), bslib(v.0.2.5.1), shiny(v.1.6.0), ggrepel(v.0.9.1), yaml(v.2.2.1), gdtools(v.0.2.3), pillar(v.1.6.1), glue(v.1.4.2), uuid(v.0.1-4), digest(v.0.6.27), promises(v.1.2.0.1), colorspace(v.2.0-1), cowplot(v.1.1.1), htmltools(v.0.5.1.1), httpuv(v.1.6.1), pkgconfig(v.2.0.3), haven(v.2.4.1), bookdown(v.0.22), purrr(v.0.3.4), xtable(v.1.8-4), scales(v.1.1.1), later(v.1.2.0), officer(v.0.3.18), tibble(v.3.1.2), generics(v.0.1.0), farver(v.2.1.0), ellipsis(v.0.3.2), DT(v.0.18), withr(v.2.4.2), lazyeval(v.0.2.2), magrittr(v.2.0.1), crayon(v.1.4.1), mime(v.0.10), evaluate(v.0.14), fansi(v.0.5.0), forcats(v.0.5.1), xml2(v.1.3.2), tools(v.4.1.0), data.table(v.1.14.0), hms(v.1.1.0), lifecycle(v.1.0.0), stringr(v.1.4.0), flextable(v.0.6.6), V8(v.3.4.2), munsell(v.0.5.0), zip(v.2.1.1), compiler(v.4.1.0), jquerylib(v.0.1.4), systemfonts(v.1.0.2), rlang(v.0.4.11), grid(v.4.1.0), htmlwidgets(v.1.5.3), crosstalk(v.1.1.1), base64enc(v.0.1-3), labeling(v.0.4.2), rmarkdown(v.2.8), gtable(v.0.3.0), jsonvalidate(v.1.1.0), curl(v.4.3.1), R6(v.2.5.0), gridExtra(v.2.3), dplyr(v.1.0.6), fastmap(v.1.1.0), utf8(v.1.2.1), readr(v.1.4.0), stringi(v.1.6.2), parallel(v.4.1.0), Rcpp(v.1.0.6), vctrs(v.0.3.8), tidyselect(v.1.1.1) and xfun(v.0.23)