diagFDR: generic diagnostics from Spectronaut exports

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

This vignette demonstrates how to compute diagFDR diagnostics from a Spectronaut export.

Runnable toy example (no external files required)

This section provides a small toy dataset that mimics the elution group-level universe returned by read_spectronaut_efficient().

library(diagFDR)

set.seed(3)

n <- 7000
pi_decoy <- 0.03

# Decoy indicator
is_decoy <- sample(c(FALSE, TRUE), n, replace = TRUE, prob = c(1 - pi_decoy, pi_decoy))

# Targets are a mixture: some null-like (close to decoys), some true (higher score)
# This yields realistic separation and non-trivial discoveries at 1% FDR.
is_true_target <- (!is_decoy) & (runif(n) < 0.30)  # 30% of targets are "true"
is_null_target <- (!is_decoy) & (!is_true_target)

score <- numeric(n)
score[is_decoy]       <- rnorm(sum(is_decoy), mean = 0.0, sd = 1.0)
score[is_null_target] <- rnorm(sum(is_null_target), mean = 0.2, sd = 1.0)
score[is_true_target] <- rnorm(sum(is_true_target), mean = 3.0, sd = 1.0)

toy <- data.frame(
  id = paste0("psm", seq_len(n)),
  is_decoy = is_decoy,
  run = sample(paste0("run", 1:4), n, replace = TRUE),
  score = score,
  pep = NA_real_
)

# Score-based TDC q-values (higher score is better)
toy <- toy[order(toy$score, decreasing = TRUE), ]
toy$D_cum <- cumsum(toy$is_decoy)
toy$T_cum <- cumsum(!toy$is_decoy)
toy$FDR_hat <- (toy$D_cum + 1) / pmax(toy$T_cum, 1)
toy$q <- rev(cummin(rev(toy$FDR_hat)))
toy <- toy[, c("id","is_decoy","q","pep","run","score")]

x_toy <- as_dfdr_tbl(
  toy,
  unit = "psm",
  scope = "global",
  q_source = "toy TDC from score",
  q_max_export = 1,
  provenance = list(tool = "toy")
)

diag <- dfdr_run_all(
  xs = list(univ = x_toy),
  alpha_main = 0.01,
  alphas = c(1e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1, 2e-1),
  eps = 0.2,
  win_rel = 0.2,
  truncation = "warn_drop",
  low_conf = c(0.2, 0.5)
)

Headline stability at 1%

diag$tables$headline
#> # A tibble: 1 × 24
#>   alpha T_alpha D_alpha FDR_hat CV_hat FDR_minus1 FDR_plus1 FDR_minusK FDR_plusK
#>   <dbl>   <int>   <int>   <dbl>  <dbl>      <dbl>     <dbl>      <dbl>     <dbl>
#> 1  0.01    2819      27 0.00958  0.192    0.00922   0.00993    0.00603    0.0131
#> # ℹ 15 more variables: k2sqrtD <int>, FDR_minus2sqrtD <dbl>,
#> #   FDR_plus2sqrtD <dbl>, list <chr>, D_alpha_win <int>, effect_abs <dbl>,
#> #   IPE <dbl>, flag_Dalpha <chr>, flag_CV <chr>, flag_Dwin <chr>,
#> #   flag_IPE <chr>, flag_FDR <chr>, flag_equalchance <chr>, status <chr>,
#> #   interpretation <chr>

if (nrow(diag$tables$headline) > 0 && diag$tables$headline$T_alpha[1] == 0) {
  cat("\nNote: No discoveries at alpha_main for this toy run. ",
      "For demonstration, consider using alpha_main = 0.02.\n", sep = "")
}

Tail support and stability versus threshold

diag$plots$dalpha

diag$plots$cv

Local boundary support and elasticity

diag$plots$dwin

diag$plots$elasticity
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_line()`).
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_point()`).

Equal-chance plausibility by q-band

diag$tables$equal_chance_pooled
#> # A tibble: 1 × 12
#>   qmax_export low_lo low_hi N_test N_D_test pi_D_hat effect_abs ci95_lo ci95_hi
#>         <dbl>  <dbl>  <dbl>  <int>    <int>    <dbl>      <dbl>   <dbl>   <dbl>
#> 1           1    0.2    0.5      0        0       NA         NA      NA      NA
#> # ℹ 3 more variables: p_value_binom <dbl>, pass_minN <lgl>, list <chr>
diag$plots$equal_chance__mzid_PSM
#> NULL

Application to Spectronaut

The code below shows how to run diagFDR directly from an export of Spectronaut at the “elution group” level. The export is assumed to be a “normal” report (not “pivot”) that contains peptides characterized by “elution groups”. It has the columns “EG.Qvalue”, “EG.Cscore” and “EG.IsDecoy”.

library(diagFDR)

# Read efficiently (when facing big datasets)
rep <- read_spectronaut_efficient("path/to/search_results.Report-Peptide normal.tsv",
                                  minimal = TRUE, dec = ",")

univ_runwise <- spectronaut_runxprecursor(
  rep,
  q_col = "EG.Qvalue",
  score_col = "EG.Cscore"
)

# Run diagnostics
diag <- dfdr_run_all(
      list(runwise = univ_runwise),
      compute_pseudo_pvalues=TRUE
 )

# Export outputs
dfdr_write_report(
  diag,
  out_dir = "diagFDR_spectronaut_out",
  formats = c("csv", "png", "manifest", "readme", "summary")
)

# Optional: render a single HTML report 
dfdr_render_report(diag, out_dir = "diagFDR_spectronaut_out")

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.