Bootstrapping standardized rates and decomposition effects

The basic premise of bootstrapping Das Gupta’s rate decomposition can be thought of as expanding out to unit-level data, re-sampling, then aggregating back up.

Data from Wang 2000:

library(tidyverse)
library(DasGuptR)

eg.wang2000 <- data.frame(
  pop = rep(c("male", "female"), e = 12),
  ethnicity = rep(1:3, e = 4),
  age_group = rep(1:4, 3),
  size = c(
    130, 1305, 1539, 316, 211, 697, 334, 48, 105, 475, 424, 49,
    70, 604, 428, 43, 55, 127, 44, 9, 72, 178, 103, 12
  ),
  rate = c(
    12.31, 34.90, 52.91, 44.44, 16.67, 36.40, 51.20, 41.67, 12.38, 19.20, 21.23, 12.50,
    17.14, 35.55, 48.71, 55.81, 14.55, 39.37, 32.56, 55.56, 22.22, 20.34, 13.59, 8.33
  )
)

To do this with the DasGuptR package, we must first create a function to essentially “uncount” data into individual level data (where nrow = size of total population), then re-sample with replacement, then aggregate back up to the level of the original data.

getBS <- function() {
  # expand out
  exp <- eg.wang2000 |>
    mutate(
      r1 = (rate / 100) * size,
      r0 = (1 - (rate / 100)) * size
    ) |>
    select(pop, ethnicity, age_group, r0:r1) |>
    pivot_longer(r0:r1, names_to = "r", values_to = "n") |>
    mutate(n = as.integer(n)) |>
    uncount(n)

  # sample for each pop
  bs <- lapply(unique(eg.wang2000$pop), \(p)
  slice_sample(exp[exp$pop == p, ],
    prop = 1, replace = TRUE
  ) |>
    group_by(ethnicity, age_group) |>
    reframe(
      pop = p,
      size = n(),
      rate = mean(r == "r1") * 100
    ) |>
    ungroup())

  do.call(rbind, bs)
}

Take 500 resamples:

bsamps <-
  tibble(
    k = 1:500,
    i = map(1:500, ~ getBS())
  )

and apply the standardisation to each resample:

bsamps <- bsamps |>
  mutate(
    dgo = map(i, ~ dgnpop(.,
      pop = "pop", factors = "rate",
      id_vars = c("ethnicity", "age_group"),
      crossclassified = "size"
    ))
  )

Here are the original difference effects:

dgnpop(eg.wang2000,
  pop = "pop", factors = "rate",
  id_vars = c("ethnicity", "age_group"),
  crossclassified = "size"
) |>
  dg_table()
                   female     male        diff decomp
age_group_struct 34.80709 36.73462  1.92753072  68.93
ethnicity_struct 35.79043 35.75128 -0.03914904  -1.40
rate             35.35659 36.26448  0.90788961  32.47
crude            34.59755 37.39382  2.79627129 100.00

And here are the standard errors:

bsamps |>
  select(k, dgo) |>
  unnest(dgo) |>
  group_by(k, factor) |>
  reframe(diff = rate[pop == "female"] - rate[pop == "male"]) |>
  group_by(factor) |>
  reframe(se = sd(diff))
# A tibble: 4 × 2
  factor              se
  <chr>            <dbl>
1 age_group_struct 0.313
2 crude            1.36 
3 ethnicity_struct 0.338
4 rate             1.34 

Which we can take as a proportion of the crude rate difference:

Uncertainty in rates

We can use this same approach to estimate uncertainty in adjusted rates. For instance, using the Scottish Reconvictions data:

data(reconv)

src <- reconv |>
  transmute(
    pop = year, sex = Sex, age = Age,
    rate = prev_rate,
    size = offenders,
    r1 = rate * size,
    r0 = (1 - rate) * size
  )

getBS <- function() {
  # expand out
  exp <- src |>
    select(pop, sex, age, r0, r1) |>
    pivot_longer(r0:r1, names_to = "r", values_to = "n") |>
    mutate(n = as.integer(n)) |>
    uncount(n)

  # sample for each pop
  bs <- lapply(unique(src$pop), \(p)
  slice_sample(exp[exp$pop == p, ],
    prop = 1, replace = TRUE
  ) |>
    group_by(sex, age) |>
    reframe(
      pop = p,
      size = n(),
      rate = mean(r == "r1")
    ) |>
    ungroup())

  do.call(rbind, bs)
}


# 500 resamples
bsamps <-
  tibble(
    k = 1:500,
    i = map(1:500, ~ getBS())
  )

# apply DG on each resample
bsamps <- bsamps |>
  mutate(
    dgo = map(i, ~ dgnpop(.,
      pop = "pop", factors = "rate",
      id_vars = c("age", "sex"),
      crossclassified = "size"
    ))
  )

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.