## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", echo = FALSE)
library(knitr)

# Load a scenario's $summary from simulations/results/<name>.rds if present
# (so the tables reflect the most recent run); otherwise fall back to the cached
# values below. The simulations/ tree is not shipped with the package, so on a
# clean install the cached values are shown.
load_summary <- function(name) {
  candidates <- c(
    file.path("..", "simulations", "results", paste0(name, ".rds")),
    file.path("simulations", "results", paste0(name, ".rds"))
  )
  for (p in candidates) {
    if (file.exists(p)) {
      s <- tryCatch(readRDS(p)$summary, error = function(e) NULL)
      if (!is.null(s)) return(s)
    }
  }
  NULL
}

show_cols <- function(df, cols, ...) {
  cols <- intersect(cols, names(df))
  kable(df[, cols, drop = FALSE], row.names = FALSE, ...)
}

## ----design-table-------------------------------------------------------------
design <- data.frame(
  Regime = c("R1: perfect prediction (F=Y)", "R2: same-DGP draw",
             "R3: independent noise", "R4: LLM shift"),
  `Predicted F` = c(
    "predicted = observed (exact)",
    "fresh binary draw from the true model",
    "binary draw from scrambled item parameters",
    "binary draw from attenuated/shifted parameters"),
  `Role` = c("perfect predictor", "modest real signal",
             "uninformative LLM", "biased but informative LLM"),
  check.names = FALSE
)
kable(design, caption = "Four predictor regimes (all binar responses).")

## ----lambda-table-------------------------------------------------------------
lam <- load_summary("lambda_selection")
if (is.null(lam)) {
  lam <- data.frame(
    label       = c("R1: perfect (F=Y)", "R2: same-DGP draw",
                    "R3: independent noise", "R4: LLM shift"),
    mean_risk   = c(0.750, 0.119, 0.063, 0.105),
    median_risk = c(0.750, 0.108, 0.040, 0.104),
    prop_zero   = c(0.00, 0.07, 0.35, 0.16),
    mean_ppi    = c(0.750, 0.004, 0.000, 0.002)
  )
}
show_cols(lam, c("label", "mean_risk", "median_risk", "prop_zero", "mean_ppi"),
          caption = "Selected lambda by regime (ability-risk tuning).")

## ----coverage-table-----------------------------------------------------------
cov <- load_summary("coverage")
if (is.null(cov)) {
  cov <- data.frame(
    label        = c("R1 perfect (F=Y)", "R2 same-DGP draw",
                     "R3 independent noise", "R4 LLM shift"),
    louis_cov_90 = c(0.909, 0.916, 0.914, 0.905),
    louis_cov_95 = c(0.955, 0.957, 0.961, 0.954),
    em_cov_90    = c(0.713, 0.727, 0.721, 0.726),
    em_cov_95    = c(0.787, 0.797, 0.795, 0.792),
    mean_se_ratio = c(1.626, 1.616, 1.651, 1.622)
  )
}
show_cols(cov, c("label", "louis_cov_90", "louis_cov_95",
                 "em_cov_90", "em_cov_95", "mean_se_ratio"),
          caption = paste("Item-parameter CI coverage, all four regimes",
            "(200 reps). Nominal targets 0.90 and 0.95."))

## ----downstream-table---------------------------------------------------------
dwn <- load_summary("downstream")
if (is.null(dwn)) {
  dwn <- data.frame(
    label       = c("R1: perfect (F=Y)", "R2: same-DGP draw",
                    "R3: independent noise", "R4: LLM shift"),
    mean_lambda = c(0.750, 0.119, 0.063, 0.105),
    rmse_human  = c(1.4961, 1.5023, 1.5037, 1.5064),
    rmse_tuned  = c(1.4923, 1.5005, 1.5030, 1.5046),
    mean_delta  = c(-0.0038, -0.0019, -0.0006, -0.0018),
    delta_lo    = c(-0.0073, -0.0027, -0.0009, -0.0030),
    delta_hi    = c(-0.0003, -0.0011, -0.0003, -0.0007),
    prop_improve = c(0.48, 0.59, 0.50, 0.50),
    bias_a      = c(0.010, 0.025, 0.035, 0.008)
  )
}
show_cols(dwn, c("label", "mean_lambda", "rmse_human", "rmse_tuned",
                 "mean_delta", "delta_lo", "delta_hi", "prop_improve", "bias_a"),
          caption = "Downstream ability-score RMSE.")

## ----crossfit-table-----------------------------------------------------------
cf <- load_summary("crossfit")
if (is.null(cf)) {
  cf <- data.frame(
    label       = c("R1 perfect (F=Y)", "R2 same-DGP draw", "R4 LLM shift"),
    lambda_nocf = c(0.750, 0.119, 0.100),
    lambda_cf   = c(0.858, 0.135, 0.115),
    bias_a_nocf = c(0.0104, 0.0250, 0.0374),
    bias_a_cf   = c(0.0100, 0.0258, 0.0372),
    rmse_nocf   = c(1.4923, 1.5005, 1.5027),
    rmse_cf     = c(1.4927, 1.5005, 1.5029),
    cover_nocf  = c(0.954, 0.954, 0.953),
    cover_cf    = c(0.949, 0.956, 0.956)
  )
}
show_cols(cf, c("label", "lambda_nocf", "lambda_cf", "bias_a_nocf", "bias_a_cf",
                "rmse_nocf", "rmse_cf", "cover_nocf", "cover_cf"),
          caption = "Cross-fitted vs non-cross-fitted tuning (100 reps, R1/R2/R4).")

## ----coverage-tuned-table-----------------------------------------------------
ct <- load_summary("coverage_tuned")
if (is.null(ct)) {
  ct <- data.frame(
    label        = c("R1 perfect (F=Y)", "R2 same-DGP draw",
                     "R3 independent noise", "R4 LLM shift"),
    lambda_sd    = c(0.751, 0.116, 0.053, 0.103),
    lambda_xf    = c(0.859, 0.135, 0.074, 0.130),
    fixed_95     = c(0.955, 0.957, 0.961, 0.954),
    samedata_95  = c(0.955, 0.956, 0.958, 0.958),
    crossfit_95  = c(0.953, 0.956, 0.958, 0.958),
    bias_a_fixed    = c(0.0097, 0.0280, 0.0365, 0.0207),
    bias_a_samedata = c(0.0063, 0.0224, 0.0286, 0.0159),
    bias_a_crossfit = c(0.0051, 0.0222, 0.0289, 0.0166)
  )
}
show_cols(ct, c("label", "lambda_sd", "lambda_xf",
                "fixed_95", "samedata_95", "crossfit_95"),
          caption = paste("95% coverage rates of the true item parameters at the",
            "fixed, same-data-tuned, and cross-fit-tuned λ (200 reps).",
            "lambda_sd / lambda_xf are the mean selected λ for each tuner."))

## ----coverage-tuned-bias------------------------------------------------------
show_cols(ct, c("label", "bias_a_fixed", "bias_a_samedata", "bias_a_crossfit"),
          caption = paste("Mean discrimination bias E[a-hat - a] at each operating",
            "point. Same-data vs cross-fit differ by <= 0.001 except R1."))

