Note: All code chunks have
eval = FALSEand are shown for illustration only. To run them interactively:
This vignette demonstrates a complete dicepro deconvolution workflow on real RNA-seq data using:
library(dicepro)
data(BlueCode)
cat("BlueCode dimensions :", dim(BlueCode), "\n")
cat("Number of cell types :", ncol(BlueCode), "\n")
cat("Number of genes :", nrow(BlueCode), "\n")
print(head(colnames(BlueCode), 5L))compartments <- list(
Immune = colnames(BlueCode)[1:9],
Stromal = colnames(BlueCode)[10:17],
Endothelial = colnames(BlueCode)[18:20],
Epithelial = colnames(BlueCode)[21:25],
Muscle = colnames(BlueCode)[26:34]
)
for (comp in names(compartments)) {
cat(sprintf(" %s (%d): %s\n",
comp,
length(compartments[[comp]]),
paste(compartments[[comp]], collapse = ", ")))
}dicepro()out <- dicepro(
reference = as.matrix(BlueCode),
bulk = as.matrix(CellMixtures),
methodDeconv = "FARDEEP",
W_prime = 0,
bulkName = "CellMixtures",
refName = "BlueCode",
hp_max_evals = 150L,
algo_select = "random",
output_path = tempdir(),
hspaceTechniqueChoose = "all"
)prop_mat <- as.matrix(out$H)
prop_sorted <- prop_mat[, order(colMeans(prop_mat), decreasing = TRUE)]
heatmap(
prop_sorted,
Rowv = NA,
Colv = NA,
col = colorRampPalette(c("white", "#2c7bb6", "#d7191c"))(50L),
scale = "none",
margins = c(12, 6),
main = "Estimated cell-type proportions -- CellMixtures",
xlab = "Cell type",
ylab = "Sample"
)top10 <- names(sort(colMeans(prop_mat), decreasing = TRUE))[seq_len(10L)]
prop_top10 <- prop_mat[, top10, drop = FALSE]
cols <- colorRampPalette(
c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c",
"#1a9641", "#a6d96a", "#762a83", "#c2a5cf", "#e7d4e8")
)(10L)
barplot(
t(prop_top10),
col = cols,
legend = colnames(prop_top10),
args.legend = list(x = "topright", cex = 0.55, ncol = 2L),
las = 1,
ylab = "Proportion",
xlab = "Sample",
main = "Per-sample cell composition (top 10 cell types)",
border = NA
)ct_to_comp <- c(
setNames(rep("Immune", 9L), compartments$Immune),
setNames(rep("Stromal", 8L), compartments$Stromal),
setNames(rep("Endothelial", 3L), compartments$Endothelial),
setNames(rep("Epithelial", 5L), compartments$Epithelial),
setNames(rep("Muscle", 9L), compartments$Muscle)
)
shared_ct <- intersect(colnames(prop_mat), names(ct_to_comp))
comp_props <- vapply(
unique(ct_to_comp),
function(comp) {
cts <- names(ct_to_comp)[ct_to_comp == comp & names(ct_to_comp) %in% shared_ct]
if (length(cts) == 0L) return(NA_real_)
mean(rowSums(prop_mat[, cts, drop = FALSE]))
},
numeric(1L)
)
comp_cols <- c(
Immune = "#2c7bb6",
Stromal = "#fdae61",
Endothelial = "#1a9641",
Epithelial = "#d7191c",
Muscle = "#762a83"
)
barplot(
sort(comp_props, decreasing = TRUE),
col = comp_cols[names(sort(comp_props, decreasing = TRUE))],
ylab = "Mean proportion",
las = 1,
main = "Mean estimated proportion by tissue compartment",
border = NA
)