dicepro - Real Data Workflow (BlueCode + CellMixtures)

dicepro Team

2026-06-24

Note: All code chunks have eval = FALSE and are shown for illustration only. To run them interactively:

library(dicepro)
# copy-paste the chunks below into your R session

1 Overview

This vignette demonstrates a complete dicepro deconvolution workflow on real RNA-seq data using:


2 Data Loading

2.1 BlueCode Reference Signature Matrix

library(dicepro)

data(BlueCode)

cat("BlueCode dimensions     :", dim(BlueCode), "\n")
cat("Number of cell types    :", ncol(BlueCode), "\n")
cat("Number of genes         :", nrow(BlueCode), "\n")
print(head(colnames(BlueCode), 5L))
compartments <- list(
  Immune      = colnames(BlueCode)[1:9],
  Stromal     = colnames(BlueCode)[10:17],
  Endothelial = colnames(BlueCode)[18:20],
  Epithelial  = colnames(BlueCode)[21:25],
  Muscle      = colnames(BlueCode)[26:34]
)
for (comp in names(compartments)) {
  cat(sprintf("  %s (%d): %s\n",
              comp,
              length(compartments[[comp]]),
              paste(compartments[[comp]], collapse = ", ")))
}

2.2 CellMixtures Bulk Dataset

data(CellMixtures)

cat("CellMixtures dimensions :", dim(CellMixtures), "\n")
cat("Sample names            :", colnames(CellMixtures), "\n")
cat("First 5 gene names      :", head(rownames(CellMixtures), 5L), "\n")

3 Data Inspection

3.1 Gene Overlap

n_ref    <- nrow(BlueCode)
n_bulk   <- nrow(CellMixtures)
n_common <- length(intersect(rownames(BlueCode), rownames(CellMixtures)))

cat(sprintf(
  "Reference genes  : %d\nBulk genes       : %d\nCommon genes     : %d (%.1f%% of reference)\n",
  n_ref, n_bulk, n_common, 100 * n_common / n_ref
))

3.2 Expression Distribution

log2_bulk <- log2(as.matrix(CellMixtures) + 1)
boxplot(
  log2_bulk,
  las  = 2,
  col  = "#2c7bb680",
  ylab = expression(log[2](counts + 1)),
  main = "CellMixtures: expression distribution per sample"
)

4 Deconvolution with dicepro()

out <- dicepro(
  reference             = as.matrix(BlueCode),
  bulk                  = as.matrix(CellMixtures),
  methodDeconv          = "FARDEEP",
  W_prime               = 0,
  bulkName              = "CellMixtures",
  refName               = "BlueCode",
  hp_max_evals          = 150L,
  algo_select           = "random",
  output_path           = tempdir(),
  hspaceTechniqueChoose = "all"
)

5 Results

5.1 Optimal Hyperparameters

cat("Best lambda :", out$hyperparameters$lambda, "\n")
cat("Best gamma  :", out$hyperparameters$gamma,  "\n")
cat("Loss        :", out$metrics$loss,            "\n")
cat("Constraint  :", out$metrics$constraint,      "\n")

5.2 Hyperparameter Optimization Report

out$plot_hyperopt

5.3 Pareto Frontier

out$plot

5.4 Estimated Cell-Type Proportions

prop_mat    <- as.matrix(out$H)
prop_sorted <- prop_mat[, order(colMeans(prop_mat), decreasing = TRUE)]

heatmap(
  prop_sorted,
  Rowv    = NA,
  Colv    = NA,
  col     = colorRampPalette(c("white", "#2c7bb6", "#d7191c"))(50L),
  scale   = "none",
  margins = c(12, 6),
  main    = "Estimated cell-type proportions -- CellMixtures",
  xlab    = "Cell type",
  ylab    = "Sample"
)

5.5 Top Contributing Cell Types

mean_props <- sort(colMeans(prop_mat), decreasing = TRUE)

par(mar = c(10, 4, 3, 1))
barplot(
  mean_props,
  las       = 2,
  col       = "#2c7bb6",
  ylab      = "Mean proportion",
  main      = "Mean estimated proportions across samples",
  cex.names = 0.65
)

5.6 Per-Sample Composition

top10      <- names(sort(colMeans(prop_mat), decreasing = TRUE))[seq_len(10L)]
prop_top10 <- prop_mat[, top10, drop = FALSE]

cols <- colorRampPalette(
  c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c",
    "#1a9641", "#a6d96a", "#762a83", "#c2a5cf", "#e7d4e8")
)(10L)

barplot(
  t(prop_top10),
  col         = cols,
  legend      = colnames(prop_top10),
  args.legend = list(x = "topright", cex = 0.55, ncol = 2L),
  las         = 1,
  ylab        = "Proportion",
  xlab        = "Sample",
  main        = "Per-sample cell composition (top 10 cell types)",
  border      = NA
)

5.7 Compartment-Level Summary

ct_to_comp <- c(
  setNames(rep("Immune",      9L), compartments$Immune),
  setNames(rep("Stromal",     8L), compartments$Stromal),
  setNames(rep("Endothelial", 3L), compartments$Endothelial),
  setNames(rep("Epithelial",  5L), compartments$Epithelial),
  setNames(rep("Muscle",      9L), compartments$Muscle)
)

shared_ct  <- intersect(colnames(prop_mat), names(ct_to_comp))
comp_props <- vapply(
  unique(ct_to_comp),
  function(comp) {
    cts <- names(ct_to_comp)[ct_to_comp == comp & names(ct_to_comp) %in% shared_ct]
    if (length(cts) == 0L) return(NA_real_)
    mean(rowSums(prop_mat[, cts, drop = FALSE]))
  },
  numeric(1L)
)

comp_cols <- c(
  Immune      = "#2c7bb6",
  Stromal     = "#fdae61",
  Endothelial = "#1a9641",
  Epithelial  = "#d7191c",
  Muscle      = "#762a83"
)

barplot(
  sort(comp_props, decreasing = TRUE),
  col    = comp_cols[names(sort(comp_props, decreasing = TRUE))],
  ylab   = "Mean proportion",
  las    = 1,
  main   = "Mean estimated proportion by tissue compartment",
  border = NA
)

6 Session Info

sessionInfo()