The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This vignette presents a simulation study that evaluates how different item selection criteria and stopping rules affect classification accuracy and test efficiency in CD-CAT.
We use the k3_GDINA_HDLV dataset from
cdcat_sim: a 3-attribute, 70-item GDINA bank with
high-discrimination / low-variability item parameters, and 160 examinees
covering all possible attribute profiles (20 per profile).
| Factor | Levels |
|---|---|
| Item selection criterion | KL, PWKL, MPWKL, SHE |
| Stopping rule | Single threshold (\(\tau = 0.80\)), Dual threshold (\(\tau_1 = 0.70,\ \tau_2 = 0.10\)) |
All conditions share the same settings:
library(cdCAT)
# Dataset: K = 3, GDINA model, high-discrimination / low-variability
d <- cdcat_sim[["k3_GDINA_HDLV"]]
Q <- d$Q # 70 x 3
alpha <- d$alpha # 160 x 3 (true profiles)
responses <- d$responses # 160 x 70
K <- ncol(Q)
N <- nrow(alpha)
J <- nrow(Q)
# Build the item bank
items <- cdcat_items(Q, "GDINA", gdina_params = d$parameters)
items## cdCAT Item Bank
## Model : GDINA
## Items : 70
## Attrs : 3
# Draw one start item, fixed for all conditions and examinees
set.seed(42)
start_item <- sample(J, 1L)
cat("Fixed start item:", start_item, "\n")## Fixed start item: 49
conditions <- expand.grid(
criterion = c("KL", "PWKL", "MPWKL", "SHE"),
stop_rule = c("single", "dual"),
stringsAsFactors = FALSE
)run_condition <- function(criterion, stop_rule) {
threshold <- if (stop_rule == "single") 0.80 else c(0.70, 0.10)
results <- vector("list", N)
for (i in seq_len(N)) {
session <- CdcatSession$new(
items,
criterion = criterion,
method = "MAP",
prior = NULL, # uniform prior
threshold = threshold,
min_items = 1L,
max_items = J, # effectively no maximum: bounded by bank size
start_item = start_item
)
repeat {
item <- session$next_item()
if (item == 0L) break
session$update(item, responses[i, item])
}
res <- session$result()
results[[i]] <- list(
alpha_hat = res$alpha_hat,
n_items = res$n_items
)
}
results
}
set.seed(2025)
sim_results <- lapply(seq_len(nrow(conditions)), function(ci) {
run_condition(conditions$criterion[ci], conditions$stop_rule[ci])
})
names(sim_results) <- paste(conditions$criterion, conditions$stop_rule, sep = "_")compute_metrics <- function(results, alpha) {
N <- length(results)
K <- ncol(alpha)
alpha_hat_mat <- do.call(rbind, lapply(results, `[[`, "alpha_hat"))
n_items_vec <- sapply(results, `[[`, "n_items")
# PCCR: all K attributes correct
pccr <- mean(apply(alpha_hat_mat == alpha, 1, all))
# ACCR per attribute
accr <- colMeans(alpha_hat_mat == alpha)
names(accr) <- paste0("ACCR_A", seq_len(K))
# ATL
atl <- mean(n_items_vec)
c(PCCR = round(pccr, 4),
round(accr, 4),
ATL = round(atl, 2))
}
metrics_list <- lapply(sim_results, compute_metrics, alpha = alpha)
metrics_df <- as.data.frame(do.call(rbind, metrics_list))
metrics_df <- cbind(conditions, metrics_df)
rownames(metrics_df) <- NULLknitr::kable(
metrics_df,
col.names = c("Criterion", "Stop rule", "PCCR",
"ACCR A1", "ACCR A2", "ACCR A3", "ATL"),
caption = "Classification accuracy and average test length by condition.",
digits = 4,
align = "llccccc"
)| Criterion | Stop rule | PCCR | ACCR A1 | ACCR A2 | ACCR A3 | ATL |
|---|---|---|---|---|---|---|
| KL | single | 0.8188 | 0.9750 | 0.9312 | 0.8812 | 5.66 |
| PWKL | single | 0.7688 | 0.9188 | 0.9188 | 0.8812 | 3.67 |
| MPWKL | single | 0.7812 | 0.9188 | 0.9188 | 0.8938 | 3.67 |
| SHE | single | 0.7812 | 0.9188 | 0.9188 | 0.8938 | 3.67 |
| KL | dual | 0.9312 | 1.0000 | 0.9500 | 0.9812 | 7.68 |
| PWKL | dual | 0.8438 | 0.9250 | 0.9188 | 0.9625 | 4.31 |
| MPWKL | dual | 0.8438 | 0.9250 | 0.9188 | 0.9625 | 4.28 |
| SHE | dual | 0.8375 | 0.9312 | 0.9188 | 0.9562 | 4.24 |
agg_crit <- aggregate(
cbind(PCCR, ATL) ~ criterion,
data = metrics_df,
FUN = mean
)
knitr::kable(
agg_crit,
col.names = c("Criterion", "Mean PCCR", "Mean ATL"),
caption = "Average PCCR and ATL across stopping rules, by criterion.",
digits = 4
)| Criterion | Mean PCCR | Mean ATL |
|---|---|---|
| KL | 0.8750 | 6.670 |
| MPWKL | 0.8125 | 3.975 |
| PWKL | 0.8063 | 3.990 |
| SHE | 0.8094 | 3.955 |
agg_stop <- aggregate(
cbind(PCCR, ATL) ~ stop_rule,
data = metrics_df,
FUN = mean
)
knitr::kable(
agg_stop,
col.names = c("Stop rule", "Mean PCCR", "Mean ATL"),
caption = "Average PCCR and ATL across criteria, by stopping rule.",
digits = 4
)| Stop rule | Mean PCCR | Mean ATL |
|---|---|---|
| dual | 0.8641 | 5.1275 |
| single | 0.7875 | 4.1675 |
pccr_mat <- matrix(
metrics_df$PCCR,
nrow = 4,
dimnames = list(
Criterion = c("KL", "PWKL", "MPWKL", "SHE"),
Stop = c("single", "dual")
)
)
image(
t(pccr_mat),
col = hcl.colors(20, "Blues", rev = TRUE),
xaxt = "n", yaxt = "n",
zlim = c(min(pccr_mat) - 0.02, 1),
main = "PCCR",
xlab = "Stopping rule",
ylab = "Criterion"
)
axis(1, at = c(0, 1), labels = c("single", "dual"))
axis(2, at = seq(0, 1, length.out = 4),
labels = c("KL", "PWKL", "MPWKL", "SHE"), las = 2)
for (i in 1:2) for (j in 1:4) {
text((i - 1), (j - 1) / 3,
labels = sprintf("%.3f", pccr_mat[j, i]),
cex = 0.85)
}The table below recaps the best-performing condition per metric.
best_pccr <- metrics_df[which.max(metrics_df$PCCR), c("criterion","stop_rule","PCCR","ATL")]
best_atl <- metrics_df[which.min(metrics_df$ATL), c("criterion","stop_rule","PCCR","ATL")]
knitr::kable(
rbind(
cbind(Optimises = "PCCR (max)", best_pccr),
cbind(Optimises = "ATL (min)", best_atl)
),
row.names = FALSE,
caption = "Best condition per optimisation target."
)| Optimises | criterion | stop_rule | PCCR | ATL |
|---|---|---|---|---|
| PCCR (max) | KL | dual | 0.9312 | 7.68 |
| ATL (min) | PWKL | single | 0.7688 | 3.67 |
Note. All results are based on a single replication. For publication-quality conclusions, increase
Nor run multiple replications with different seeds.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.