GSCUSUM charts

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

GSCUSUM charts

Lena Hubig

2019-10-02

Overview

This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.

Data preparation

Following information have to be available:

patient-individual outcomes
block-identifier in continuous sequence (can be obtain for example with dplyr::group_indices())
(patient-individual risk scores / risk of adverse event/failure)

These information are collected in a numeric matrix.

Non-risk-adjusted example data:

head(gscusum_example_data)
#> # A tibble: 6 x 4
#>       t y      year block_identifier
#>   <int> <lgl> <dbl>            <int>
#> 1     1 FALSE  2016                1
#> 2     2 FALSE  2016                1
#> 3     3 FALSE  2016                1
#> 4     4 FALSE  2016                1
#> 5     5 FALSE  2016                1
#> 6     6 FALSE  2016                1

Risk-adjusted example data:

head(ragscusum_example_data)
#> # A tibble: 6 x 5
#>       t y       score  year block_identifier
#>   <int> <lgl>   <dbl> <dbl>            <int>
#> 1     1 FALSE 0.00829  2016                1
#> 2     2 FALSE 0.00237  2016                1
#> 3     3 FALSE 0.00926  2016                1
#> 4     4 FALSE 0.00394  2016                1
#> 5     5 FALSE 0.0241   2016                1
#> 6     6 FALSE 0.00557  2016                1

Non-risk-adjusted GSCUSUM chart

Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,

failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])

n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])

and control limits have to be estimated:


cusum_limit <- cusum_limit_sim(failure_probability,
                            n_patients,
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)


print(cusum_limit)
#> [1] 4.91128

GSCUSUM charts are constructed on performance data from 2017.


gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]

input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)


gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.

gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.08151062   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.13900012   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.15609100   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17359918   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5        0 0.19099323   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19285629   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557

gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))

col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)

ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
  theme_bw()

The complete run can be plotted with:

nblock <- max(gcs$block_identifier)

p <- ggplot(gcs)

for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)

  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)

}

p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "CUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "GSCUSUM")
p

Risk-adjusted GSCUSUM chart

Like in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,

n_patients <- nrow(ragscusum_example_data[ragscusum_example_data$year == 2016,])

and control limits are set:


racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016],
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)


print(racusum_limit)
#> [1] 2.403465

GSCUSUM charts are constructed on performance data from 2017.


ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,]

input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)


gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.

gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.07905548   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.12831284   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.16331018   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17408902   0   0   0 0.00000000 0.2889099 0.5778198 0.9820557
#> 5        0 0.19183142   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19487628   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557

gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))

col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)

ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limit = c(0,1))+
  theme_bw()

The complete run can be plotted with:

nblock <- max(gcs$block_identifier)

p <- ggplot(gcs)

for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)

  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)

}

p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "RACUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "RA-GSCUSUM")
p

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.