Forest plots for regression models

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Forest plots are not limited to meta-analyses. Any table of point estimates with confidence intervals can be displayed as a forest plot — results from regression models, dose-response analyses, multi-outcome studies, and more.

This vignette shows how to go from a fitted R model to a forest plot in a few lines of code. broom::tidy() extracts parameters; broom is not a hard dependency and may be replaced by any function that returns a data frame with the right columns. marginaleffects powers the causal-inference examples in the final sections.

Simulated cohort

set.seed(42)
n <- 800

cohort <- data.frame(
  age      = round(runif(n, 30, 75)),
  female   = rbinom(n, 1, 0.52),
  bmi      = rnorm(n, 26, 4.5),
  smoker   = rbinom(n, 1, 0.20),
  activity = rbinom(n, 1, 0.45),   # 1 = physically active
  educ     = sample(c("Low", "Medium", "High"), n,
                    replace = TRUE, prob = c(0.25, 0.45, 0.30))
)

# Systolic blood pressure (mmHg): continuous outcome with true signal
cohort$sbp <- 110 +
  0.40 * cohort$age    -
  4.50 * cohort$female +
  0.50 * cohort$bmi    -
  2.00 * cohort$smoker +
  2.50 * cohort$activity +
  rnorm(n, sd = 8)

# Hypertension (SBP ≥ 140): binary outcome
cohort$htn <- as.integer(cohort$sbp >= 140)

# Education as ordered factor (for dose-response example)
cohort$educ <- factor(cohort$educ,
                      levels = c("Low", "Medium", "High"), ordered = FALSE)
cohort$educ <- relevel(cohort$educ, ref = "Low")

Multiple predictors from one model

The simplest workflow: fit a model, call broom::tidy(conf.int = TRUE), and pass the result to forrest(). Column names estimate, conf.low, and conf.high map directly to the estimate, lower, and upper arguments.

fit_lm <- lm(sbp ~ age + female + bmi + smoker + activity, data = cohort)

coefs <- broom::tidy(fit_lm, conf.int = TRUE)
coefs <- coefs[coefs$term != "(Intercept)", ]
coefs$term <- c(
  "Age (per 1 year)",
  "Female sex",
  "BMI (per 1 kg/m\u00b2)",
  "Current smoker",
  "Physically active"
)

# Formatted coefficient + CI text for right-hand column
coefs$coef_ci <- sprintf(
  "%.2f (%.2f, %.2f)",
  coefs$estimate, coefs$conf.low, coefs$conf.high
)

forrest(
  coefs,
  estimate = "estimate",
  lower    = "conf.low",
  upper    = "conf.high",
  label    = "term",
  header   = "Predictor",
  cols     = c("Coef (95% CI)" = "coef_ci"),
  widths   = c(3, 4, 2.5),
  xlab     = "Regression coefficient (95% CI)",
  stripe   = TRUE
)

For logistic regression, set exponentiate = TRUE in broom::tidy() to get odds ratios directly, then set log_scale = TRUE and ref_line = 1 in forrest():

fit_glm <- glm(htn ~ age + female + bmi + smoker + activity,
               data = cohort, family = binomial)

or_coefs <- broom::tidy(fit_glm, conf.int = TRUE, exponentiate = TRUE)
or_coefs <- or_coefs[or_coefs$term != "(Intercept)", ]
or_coefs$term <- c(
  "Age (per 1 year)",
  "Female sex",
  "BMI (per 1 kg/m\u00b2)",
  "Current smoker",
  "Physically active"
)

or_coefs$or_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  or_coefs$estimate, or_coefs$conf.low, or_coefs$conf.high
)

forrest(
  or_coefs,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "term",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Predictor",
  cols       = c("OR (95% CI)" = "or_text"),
  widths     = c(3, 4, 2),
  xlab       = "Odds ratio (95% CI)"
)

Same predictor across progressive adjustment models

A common reporting pattern is to show how an association changes as covariates are progressively added.

m1 <- glm(htn ~ activity,
          data = cohort, family = binomial)
m2 <- glm(htn ~ activity + age + female,
          data = cohort, family = binomial)
m3 <- glm(htn ~ activity + age + female + bmi + smoker,
          data = cohort, family = binomial)

extract_activity <- function(mod, label) {
  row         <- broom::tidy(mod, conf.int = TRUE, exponentiate = TRUE)
  row         <- row[row$term == "activity", ]
  row$model   <- label
  row
}

adj_dat <- rbind(
  extract_activity(m1, "Model 1: unadjusted"),
  extract_activity(m2, "Model 2: + age, sex"),
  extract_activity(m3, "Model 3: + BMI, smoking")
)

adj_dat$or_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  adj_dat$estimate, adj_dat$conf.low, adj_dat$conf.high
)

forrest(
  adj_dat,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "model",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Adjustment set",
  cols       = c("OR (95% CI)" = "or_text"),
  widths     = c(4, 3.5, 2),
  xlab       = "OR for physical activity on hypertension (95% CI)"
)

Same predictor across multiple outcomes

To compare one exposure against several outcomes, loop over outcomes, extract the exposure row from each model, and stack into a single data frame. Use group to colour by outcome domain.

outcomes <- list(
  list(var = "sbp",      label = "Systolic BP (mmHg)",    domain = "Continuous"),
  list(var = "htn",      label = "Hypertension",          domain = "Binary"),
  list(var = "bmi",      label = "BMI (kg/m\u00b2)",      domain = "Continuous"),
  list(var = "smoker",   label = "Current smoker",        domain = "Binary")
)

rows <- lapply(outcomes, function(o) {
  if (o$domain == "Continuous") {
    fit <- lm(as.formula(paste(o$var, "~ activity + age + female")),
              data = cohort)
    r   <- broom::tidy(fit, conf.int = TRUE)
  } else {
    fit <- glm(as.formula(paste(o$var, "~ activity + age + female")),
               data = cohort, family = binomial)
    r   <- broom::tidy(fit, conf.int = TRUE, exponentiate = TRUE)
  }
  r          <- r[r$term == "activity", ]
  r$outcome  <- o$label
  r$domain   <- o$domain
  r
})

multi_out <- do.call(rbind, rows)
multi_out$est_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  multi_out$estimate,
  multi_out$conf.low,
  multi_out$conf.high
)

forrest(
  multi_out,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "outcome",
  group      = "domain",
  ref_line   = 0,
  legend_pos = "topright",
  header     = "Outcome",
  cols       = c("Estimate (95% CI)" = "est_text"),
  widths     = c(3.5, 3.5, 2.5),
  xlab       = "Effect of physical activity (coef or OR, 95% CI)"
)

Dose-response (exposure categories)

For categorical exposures (quartiles, tertiles, and so on), the reference category has estimate = NA because there is no contrast to estimate. forrest() renders this row without a CI or point but keeps the label, making the reference position visually clear. Supply a text column to print "1.00 (ref)" in the right-hand panel. Set ref_label = TRUE to have forrest() append " (Ref.)" to the label automatically.

# Quartiles of age
cohort$age_q <- cut(
  cohort$age,
  breaks         = quantile(cohort$age, 0:4 / 4),
  include.lowest = TRUE,
  labels         = c("Q1 (\u226445 y)", "Q2 (45\u201356 y)",
                     "Q3 (56\u201366 y)", "Q4 (>66 y)")
)
cohort$age_q <- relevel(cohort$age_q, ref = "Q1 (\u226445 y)")

fit_q   <- glm(htn ~ age_q + female + bmi + smoker + activity,
               data = cohort, family = binomial)
q_coefs <- broom::tidy(fit_q, conf.int = TRUE, exponentiate = TRUE)
q_coefs <- q_coefs[grep("^age_q", q_coefs$term), ]

q_plot <- rbind(
  data.frame(
    label     = "Q1 (\u226445 y, ref)",
    estimate  = NA_real_,
    conf.low  = NA_real_,
    conf.high = NA_real_,
    or_ci     = "1.00 (ref)",
    is_sum    = FALSE
  ),
  data.frame(
    label     = gsub("^age_q", "", q_coefs$term),
    estimate  = q_coefs$estimate,
    conf.low  = q_coefs$conf.low,
    conf.high = q_coefs$conf.high,
    or_ci     = sprintf("%.2f (%.2f, %.2f)",
                        q_coefs$estimate,
                        q_coefs$conf.low,
                        q_coefs$conf.high),
    is_sum    = FALSE
  )
)

forrest(
  q_plot,
  estimate   = "estimate",
  lower      = "conf.low",
  upper      = "conf.high",
  label      = "label",
  is_summary = "is_sum",
  log_scale  = TRUE,
  ref_line   = 1,
  header     = "Age quartile",
  cols       = c("OR (95% CI)" = "or_ci"),
  widths     = c(3, 4, 2),
  xlab       = "OR for hypertension (95% CI)"
)

Marginal effects: g-computation

Parametric g-computation estimates the average treatment effect (ATE) by predicting outcomes under two counterfactual worlds — everyone treated vs. no one treated — and averaging the difference. marginaleffects::avg_comparisons() implements this in one line.

The example below estimates the marginal effect of physical activity on SBP, overall and stratified by sex and education level.

library(marginaleffects)

# Outcome model with treatment × covariate interactions so that
# g-computation captures heterogeneous treatment effects
fit_sbp <- lm(
  sbp ~ activity * (age + female + bmi + smoker),
  data = cohort
)

# Helper: extract one avg_comparisons result into a plot row
make_row <- function(me, label, is_sum = FALSE) {
  data.frame(
    label    = label,
    estimate = me$estimate,
    lower    = me$conf.low,
    upper    = me$conf.high,
    is_sum   = is_sum
  )
}

# Overall ATE
ate_sbp <- avg_comparisons(fit_sbp, variables = "activity")

# ATE by sex: subset newdata to each stratum
sex_rows <- lapply(
  list(`  Female` = 0, `  Male` = 1),
  function(f) avg_comparisons(fit_sbp, variables = "activity",
                               newdata = subset(cohort, female == f))
)

# ATE by education level
educ_rows <- setNames(
  lapply(c("Low", "Medium", "High"), function(e)
    avg_comparisons(fit_sbp, variables = "activity",
                    newdata = subset(cohort, educ == e))),
  c("  Low", "  Medium", "  High")
)

# Stack into a single data frame with a stratum column for section headers.
# No manual header or spacer rows needed.
gcomp_dat <- rbind(
  make_row(ate_sbp, "Overall (ATE)", is_sum = TRUE) |>
    transform(stratum = "Overall"),
  do.call(rbind, lapply(
    list(list(label = "Female", f = 0), list(label = "Male", f = 1)),
    function(s) {
      r <- make_row(avg_comparisons(fit_sbp, variables = "activity",
                                    newdata = subset(cohort, female == s$f)),
                   label = s$label)
      r$stratum <- "Sex"
      r
    }
  )),
  do.call(rbind, lapply(
    list(list(label = "Low", e = "Low"),
         list(label = "Medium", e = "Medium"),
         list(label = "High",   e = "High")),
    function(s) {
      r <- make_row(avg_comparisons(fit_sbp, variables = "activity",
                                    newdata = subset(cohort, educ == s$e)),
                   label = s$label)
      r$stratum <- "Education level"
      r
    }
  ))
)

gcomp_dat$est_text <- sprintf(
  "%.2f (%.2f, %.2f)",
  gcomp_dat$estimate, gcomp_dat$lower, gcomp_dat$upper
)

forrest(
  gcomp_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "label",
  section    = "stratum",
  is_summary = "is_sum",
  cols       = c("Marginal effect (95% CI)" = "est_text"),
  widths     = c(3.5, 4, 3),
  ref_line   = 0,
  header     = "Stratum",
  xlab       = "Marginal effect on SBP (mmHg): physical activity (g-computation)"
)

Marginal effects: inverse probability weighting (IPW)

IPW re-weights observations by the inverse of their propensity score so that the weighted sample resembles a randomised trial with respect to observed confounders. The example below uses marginaleffects to implement the Hajek estimator and compares it to g-computation for five strata (overall + sex + education). Using group and dodge, both sets of estimates are displayed side-by-side in one figure.

# ── Step 1: propensity score model ────────────────────────────────────────
ps_mod       <- glm(activity ~ age + female + bmi + smoker + educ,
                    data = cohort, family = binomial)
cohort$ps    <- predict(ps_mod, type = "response")
cohort$ipw   <- ifelse(cohort$activity == 1,
                       1 / cohort$ps,
                       1 / (1 - cohort$ps))

# Trim extreme weights at the 1st / 99th percentile for numerical stability
cohort$ipw_t <- pmin(pmax(cohort$ipw,
                           quantile(cohort$ipw, 0.01)),
                     quantile(cohort$ipw, 0.99))

# ── Step 2: IPW-weighted outcome model ────────────────────────────────────
fit_sbp_ipw <- lm(
  sbp ~ activity * (age + female + bmi + smoker),
  data = cohort, weights = ipw_t
)

# ── Step 3: build comparison rows — no header rows, just labelled strata ──
# Each stratum appears twice: once for each method.  The dodge layout
# places the two symbols side-by-side so CIs can be read off the same axis.
strata <- list(
  list(label = "Overall",         nd = cohort,                       is_sum = TRUE),
  list(label = "  Female",        nd = subset(cohort, female == 0),  is_sum = FALSE),
  list(label = "  Male",          nd = subset(cohort, female == 1),  is_sum = FALSE),
  list(label = "  Low educ.",     nd = subset(cohort, educ == "Low"), is_sum = FALSE),
  list(label = "  High educ.",    nd = subset(cohort, educ == "High"),is_sum = FALSE)
)

rows <- do.call(rbind, lapply(strata, function(s) {
  # G-computation: standard avg_comparisons on the interaction model
  gc  <- avg_comparisons(fit_sbp,     variables = "activity",
                          newdata = s$nd)
  # IPW (Hajek): weighted outcome model + Hajek normalisation via `wts`
  ipw <- avg_comparisons(fit_sbp_ipw, variables = "activity",
                          wts = "ipw_t", newdata = s$nd)
  rbind(
    data.frame(label = s$label, method = "G-computation",
               estimate = gc$estimate,  lower = gc$conf.low,
               upper    = gc$conf.high, is_sum = s$is_sum),
    data.frame(label = s$label, method = "IPW (Hajek)",
               estimate = ipw$estimate, lower = ipw$conf.low,
               upper    = ipw$conf.high, is_sum = s$is_sum)
  )
}))

# Separate text columns per method for cols_by_group display
rows$text_gc  <- ifelse(rows$method == "G-computation",
                         sprintf("%.2f (%.2f, %.2f)",
                                 rows$estimate, rows$lower, rows$upper), "")
rows$text_ipw <- ifelse(rows$method == "IPW (Hajek)",
                         sprintf("%.2f (%.2f, %.2f)",
                                 rows$estimate, rows$lower, rows$upper), "")

forrest(
  rows,
  estimate      = "estimate",
  lower         = "lower",
  upper         = "upper",
  label         = "label",
  group         = "method",
  is_summary    = "is_sum",
  dodge         = TRUE,
  cols_by_group = TRUE,
  cols          = c("G-comp (95% CI)" = "text_gc",
                    "IPW (95% CI)"    = "text_ipw"),
  widths        = c(3, 3.5, 2.5, 2.5),
  ref_line      = 0,
  legend_pos    = "topright",
  header        = "Subgroup",
  xlab          = "Marginal effect on SBP (mmHg): physical activity (95% CI)"
)

Time-varying exposures and longitudinal studies

Any analysis that produces estimates at multiple time points — distributed lag models, life-course analyses, repeated measures, cumulative exposure windows — yields a data frame where time is just another dimension. The same section / dodge / group building blocks that handle predictor groups and subgroup analyses apply here without modification.

Distributed lag models

A distributed lag model (DLM) estimates the association between an exposure and an outcome separately at each lag. The result is one row per lag per exposure. Use section = "exposure" to group lags under each exposure, and add an overall (cumulative) estimate as an is_summary = TRUE diamond at the bottom of each section.

set.seed(2025)

# Simulate lag-specific estimates for three environmental exposures
# Each exposure has a distinct lag-response shape
make_lags <- function(pattern) {
  se  <- runif(length(pattern), 0.03, 0.06)
  data.frame(
    estimate = pattern + rnorm(length(pattern), sd = 0.02),
    lower    = pattern - 1.96 * se,
    upper    = pattern + 1.96 * se
  )
}

pm25  <- make_lags(c(0.04, 0.11, 0.10, 0.06, 0.03,  0.01,  0.00))
noise <- make_lags(c(0.09, 0.05,  0.02, 0.01, 0.00, -0.01,  0.00))
green <- make_lags(c(-0.02, -0.04, -0.07, -0.09, -0.10, -0.09, -0.08))

lag_labels <- paste("Lag", 0:6, "(weeks)")

dlm_dat <- rbind(
  data.frame(exposure = "PM2.5",        lag = lag_labels, pm25,  is_sum = FALSE),
  data.frame(exposure = "PM2.5",        lag = "Cumulative", estimate = 0.35,
             lower = 0.18, upper = 0.52, is_sum = TRUE),
  data.frame(exposure = "Road noise",   lag = lag_labels, noise, is_sum = FALSE),
  data.frame(exposure = "Road noise",   lag = "Cumulative", estimate = 0.16,
             lower = 0.04, upper = 0.28, is_sum = TRUE),
  data.frame(exposure = "Green space",  lag = lag_labels, green, is_sum = FALSE),
  data.frame(exposure = "Green space",  lag = "Cumulative", estimate = -0.49,
             lower = -0.68, upper = -0.30, is_sum = TRUE)
)

dlm_dat$est_text <- ifelse(
  dlm_dat$is_sum,
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper),
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper)
)

forrest(
  dlm_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "lag",
  section    = "exposure",
  is_summary = "is_sum",
  ref_line   = 0,
  header     = "Lag",
  cols       = c("Coef (95% CI)" = "est_text"),
  widths     = c(3.5, 4, 2.8),
  xlab       = "Regression coefficient per IQR increase (95% CI)"
)

Life-course analysis: same exposure across developmental periods

A life-course design estimates the same exposure–outcome association at multiple developmental stages. Use section = "life_stage" to organise the stages as named groups, with one row per exposure within each stage.

set.seed(99)

make_row <- function(exposure, life_stage, est, se) {
  data.frame(
    life_stage = life_stage,
    exposure   = exposure,
    estimate   = est  + rnorm(1, sd = 0.01),
    lower      = est  - 1.96 * se,
    upper      = est  + 1.96 * se
  )
}

lc_dat <- rbind(
  make_row("Noise exposure",         "Prenatal",        0.08, 0.03),
  make_row("Green space access",     "Prenatal",       -0.05, 0.04),
  make_row("Traffic-related air",    "Prenatal",        0.06, 0.03),
  make_row("Noise exposure",         "Early childhood", 0.12, 0.04),
  make_row("Green space access",     "Early childhood",-0.09, 0.05),
  make_row("Traffic-related air",    "Early childhood", 0.10, 0.04),
  make_row("Noise exposure",         "Mid-childhood",   0.15, 0.05),
  make_row("Green space access",     "Mid-childhood",  -0.13, 0.05),
  make_row("Traffic-related air",    "Mid-childhood",   0.14, 0.05),
  make_row("Noise exposure",         "Adolescence",     0.09, 0.04),
  make_row("Green space access",     "Adolescence",    -0.07, 0.05),
  make_row("Traffic-related air",    "Adolescence",     0.08, 0.04)
)

forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  section  = "life_stage",
  group    = "exposure",
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)

Life-course analysis: comparing stages side by side (dodge)

When the question is how does the association at one developmental stage compare to another for the same exposure, flip the structure: label is the exposure, group is the life stage, and dodge = TRUE places all stages side by side within each exposure row.

# Same data — one row per exposure × life stage
forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  group    = "life_stage",
  dodge    = TRUE,
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)

Saving plots

save_forrest(
  file   = "regression_forest.pdf",
  plot   = function() {
    forrest(
      coefs,
      estimate = "estimate",
      lower    = "conf.low",
      upper    = "conf.high",
      label    = "term",
      stripe   = TRUE,
      xlab     = "Regression coefficient (95% CI)"
    )
  },
  width  = 9,
  height = 5
)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.