The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.
Following information have to be available:
dplyr::group_indices()
)These information are collected in a numeric matrix.
Non-risk-adjusted example data:
head(gscusum_example_data)
#> # A tibble: 6 x 4
#> t y year block_identifier
#> <int> <lgl> <dbl> <int>
#> 1 1 FALSE 2016 1
#> 2 2 FALSE 2016 1
#> 3 3 FALSE 2016 1
#> 4 4 FALSE 2016 1
#> 5 5 FALSE 2016 1
#> 6 6 FALSE 2016 1
Risk-adjusted example data:
Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,
failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])
n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])
and control limits have to be estimated:
cusum_limit <- cusum_limit_sim(failure_probability,
n_patients,
odds_multiplier = 2,
n_simulation = 1000,
alpha = 0.05,
seed = 2046)
print(cusum_limit)
#> [1] 4.91128
GSCUSUM charts are constructed on performance data from 2017.
gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
failure_probability = failure_probability,
odds_multiplier = 2,
limit = cusum_limit,
max_num_shuffles = 1000,
quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))
This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#> sig_prob avg min q05 q25 median q75 q95 max
#> 1 0 0.08151062 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2 0 0.13900012 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3 0 0.15609100 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4 0 0.17359918 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5 0 0.19099323 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6 0 0.19285629 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
geom_line(data = gcs, aes(x = t, y = sig_prob)) +
geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
scale_color_manual(guide=FALSE, values = palette) +
scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
theme_bw()
The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
dblock <- gcs[gcs$block_identifier == i,]
col <- ifelse(i %% 2 == 0,col2,col1)
dblock_before <- dblock[1,]
dblock_before$t <- dblock_before$t - .5
dblock_after <- dblock[nrow(dblock),]
dblock_after$t <- dblock_after$t + .5
dblock_n <- rbind(dblock, dblock_before, dblock_after)
p <- p +
geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
geom_line(data = gcs, aes(x = t, y = median)) +
geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
theme_bw() +
scale_y_continuous(name = "CUSUM Distribution") +
scale_x_continuous(name = "Sequence of Observations") +
scale_fill_manual(values = palette, guide = FALSE) +
labs(subtitle = "GSCUSUM")
p
Like in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,
and control limits are set:
racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016],
odds_multiplier = 2,
n_simulation = 1000,
alpha = 0.05,
seed = 2046)
print(racusum_limit)
#> [1] 2.403465
GSCUSUM charts are constructed on performance data from 2017.
ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
failure_probability = failure_probability,
odds_multiplier = 2,
limit = cusum_limit,
max_num_shuffles = 1000,
quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))
This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#> sig_prob avg min q05 q25 median q75 q95 max
#> 1 0 0.07905548 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2 0 0.12831284 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3 0 0.16331018 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4 0 0.17408902 0 0 0 0.00000000 0.2889099 0.5778198 0.9820557
#> 5 0 0.19183142 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6 0 0.19487628 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
geom_line(data = gcs, aes(x = t, y = sig_prob)) +
geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
scale_color_manual(guide=FALSE, values = palette) +
scale_y_continuous(name = "Signal Probability", limit = c(0,1))+
theme_bw()
The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
dblock <- gcs[gcs$block_identifier == i,]
col <- ifelse(i %% 2 == 0,col2,col1)
dblock_before <- dblock[1,]
dblock_before$t <- dblock_before$t - .5
dblock_after <- dblock[nrow(dblock),]
dblock_after$t <- dblock_after$t + .5
dblock_n <- rbind(dblock, dblock_before, dblock_after)
p <- p +
geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
geom_line(data = gcs, aes(x = t, y = median)) +
geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
theme_bw() +
scale_y_continuous(name = "RACUSUM Distribution") +
scale_x_continuous(name = "Sequence of Observations") +
scale_fill_manual(values = palette, guide = FALSE) +
labs(subtitle = "RA-GSCUSUM")
p
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.