gtsummary Gallery

Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.

library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats)

Summary Tables

Add a spanning header over the group columns for increased clarity, and modify column headers.

trial[c("trt", "age", "grade")] %>%
  tbl_summary(by = trt, missing = "no") %>%
  modify_header(stat_by = md("**{level}** N =  {n} ({style_percent(p)}%)")) %>%
  add_n() %>%
  bold_labels() %>%
  as_gt() %>%
  tab_spanner(columns = starts_with("stat_"), md("**Randomization Group**"))
Characteristic1 N Randomization Group
Drug N = 107 (54%) Placebo N = 93 (46%)
Age, yrs 191 47 (39, 58) 45 (36, 54)
Grade 200
I 38 (36%) 29 (31%)
II 34 (32%) 24 (26%)
III 35 (33%) 40 (43%)

1 Statistics presented: median (IQR); n (%)


Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial[!is.na(trial$response), c("response", "age", "grade")] %>%
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
  tbl_summary(
    by = response, 
    missing = "no",
    label = list(vars(age) ~ "Patient Age", vars(grade) ~ "Tumor Grade")
  ) %>%
  add_p(pvalue_fun = partial(style_pvalue, digits = 2)) %>%
  add_q()
Characteristic1 No Tumor Response, N = 108 Tumor Responded, N = 83 p-value2 q-value3
Patient Age 48 (38, 57) 45 (38, 55) 0.54 0.54
Tumor Grade 0.49 0.54
I 32 (30%) 31 (37%)
II 35 (32%) 22 (27%)
III 41 (38%) 30 (36%)

1 Statistics presented: median (IQR); n (%)

2 Statistical tests performed: Wilcoxon rank-sum test; chi-square test of independence

3 False discovery rate correction for multiple testing


Include missing tumor response as column using fct_explicit_na().

trial[c("response", "age", "grade")] %>%
  mutate(
    response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% 
      fct_explicit_na(na_level = "Missing Response Status")
  ) %>%
  tbl_summary(
    by = response, 
    label = list(vars(age) ~ "Patient Age", vars(grade) ~ "Tumor Grade")
  )  
Characteristic1 No Tumor Response, N = 108 Tumor Responded, N = 83 Missing Response Status, N = 9
Patient Age 48 (38, 57) 45 (38, 55) 38 (34, 53)
Unknown 5 4 0
Tumor Grade
I 32 (30%) 31 (37%) 4 (44%)
II 35 (32%) 22 (27%) 1 (11%)
III 41 (38%) 30 (36%) 4 (44%)

1 Statistics presented: median (IQR); n (%)


Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial[c("response", "age", "grade")] %>%
  tbl_uvregression(
    method = glm,
    y = response, 
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) %>%
  add_nevent()
Characteristic N Event N OR1 95% CI1 p-value
Age, yrs 182 79 1.00 0.98, 1.02 0.7
Grade 191 83
I
II 0.65 0.31, 1.34 0.2
III 0.76 0.38, 1.49 0.4

1 OR = Odds Ratio, CI = Confidence Interval


Include two related models side-by-side with descriptive statistics.

gt_r1 <- glm(response ~ age + trt, trial, family = binomial) %>%
  tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ age + trt, trial) %>%
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("age", "trt")] %>% tbl_summary(missing = "no") %>% add_n()

tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c("**Summary Statistics**", "**Tumor Response**", "**Time to Death**")
)
Characteristic Summary Statistics Tumor Response Time to Death
N N = 2001 OR2 95% CI2 p-value HR2 95% CI2 p-value
Age, yrs 191 46 (37, 57) 0.99 0.97, 1.02 0.6 1.01 1.00, 1.02 0.2
Treatment Randomization 200
Drug 107 (54%)
Placebo 93 (46%) 0.53 0.29, 0.96 0.038 1.39 0.94, 2.04 0.10

1 Statistics presented: median (IQR); n (%)

2 OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio


Include the number of events at each level of a categorical predictor.

gt_model <-
  trial[c("ttdeath", "death", "stage", "grade")] %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
    hide_n = TRUE
  )

gt_eventn <-
  trial %>%
  filter(death ==  1) %>%
  select(stage, grade) %>%
  tbl_summary(
    statistic = all_categorical() ~ "{n}",
    label = list(vars(stage) ~ "T Stage", vars(grade) ~ "Grade")
  ) %>%
  modify_header(stat_0 = md("**Event N**"))

tbl_merge(list(gt_eventn, gt_model)) %>%
  bold_labels() %>%
  italicize_levels() %>%
  as_gt(exclude = "tab_spanner")
Characteristic Event N1 HR2 95% CI2 p-value
T Stage
T1 23
T2 23 1.16 0.65, 2.07 0.6
T3 22 1.38 0.77, 2.48 0.3
T4 39 2.10 1.25, 3.52 0.005
Grade
I 29
II 28 1.25 0.74, 2.10 0.4
III 50 2.00 1.27, 3.16 0.003

1 Statistics presented: n

2 HR = Hazard Ratio, CI = Confidence Interval


Regression model where the covariates remain the same, and the outcome changes.

df_models <-
  tibble(outcome = c("age", "marker")) %>%
  mutate(
    outcome_label = map_chr(outcome,
                        ~attr(trial[[.x]], 'label')),
    lm_model = map(outcome,
                   ~lm(str_glue("{.x} ~ trt"), trial)),
    tbl_reg = map2(
      lm_model, outcome_label,
      ~tbl_regression(.x, show_single_row = "trt", label = list(trt = .y))
    )
  )

df_models %>%
  pull(tbl_reg) %>%
  tbl_stack() %>%
  modify_header(
    label = md("**Model Outcome**"),
    estimate = md("**Treatment Coef.**")
  ) %>%
  as_gt() %>%
  tab_footnote(
    footnote = "Values larger than 0 indicate larger values in the Drug group.", 
    locations = cells_column_labels(columns = vars(estimate))
  )
Model Outcome Treatment Coef.1 95% CI2 p-value
Age, yrs -1.9 -6.0, 2.2 0.4
Marker Level, ng/mL 0.06 -0.18, 0.31 0.6

1 Values larger than 0 indicate larger values in the Drug group.

2 CI = Confidence Interval

Add descriptive statistics by treatment group to the table above to produce a table often reported for randomized trials.

gt_sum <- 
  trial[c("age", "marker", "trt")] %>%
  mutate(trt = fct_rev(trt)) %>%
  tbl_summary(by = trt, 
              statistic = all_continuous() ~ "{mean} ({sd})",
              missing = "no") %>%
  add_n() %>%
  modify_header(stat_by = md("**{level}**"))

# before we merge we need to update the variable name in df_models gtsummary tables 
# to match the tbl_summary object
df_models2 <-
  df_models %>%
  mutate(tbl_reg = map2(tbl_reg, outcome,
                        function(x, y){
                          x$table_body$variable <- y 
                          x
                        }))


tbl_merge(list(
  gt_sum,
  pull(df_models2, tbl_reg) %>% tbl_stack()
)) %>%
  modify_header(estimate_2 = md("**Difference**")) %>%
  as_gt(exclude = "tab_spanner")
Characteristic N Placebo1 Drug1 Difference 95% CI2 p-value
Age, yrs 191 46 (13) 48 (15) -1.9 -6.0, 2.2 0.4
Marker Level, ng/mL 191 0.96 (0.83) 0.90 (0.89) 0.06 -0.18, 0.31 0.6

1 Statistics presented: mean (SD)

2 CI = Confidence Interval