The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This vignette shows how to use SignacX with Seurat and SPRING to learn a new cell type category from single cell data.
We start with CITE-seq data that were already classified with SignacX using the SPRING pipeline.
library(Seurat)
library(SignacX)
Load CITE-seq data from 10X Genomics processed with SPRING and classified with SignacX already.
# load CITE-seq data
= './CITESEQ_EXPLORATORY_CITESEQ_5K_PBMCS/FullDataset_v1_protein'
data.dir = CID.LoadData(data.dir = data.dir)
E
# Load labels
= rjson::fromJSON(file=paste0(data.dir,'/categorical_coloring_data.json')) json_data
Create a Seurat object for the protein expression data; we will use this as a reference.
# separate protein and gene expression data
= grepl("Total", rownames(E))
logik = E[logik,]
P = E[!logik,]
E
# CLR normalization in Seurat
colnames(P) <- 1:ncol(P)
colnames(E) <- 1:ncol(E)
<- CreateSeuratObject(E)
reference "ADT"]] <- CreateAssayObject(counts = P)
reference[[<- NormalizeData(reference, assay = "ADT", normalization.method = "CLR") reference
Identify CD56 bright NK cells based on protein expression data.
# generate labels
= json_data$CellStates$label_list
lbls != "NK"] = "Unclassified"
lbls[lbls = reference@assays$ADT@counts[rownames(reference@assays$ADT@counts) == "CD16-TotalSeqB-CD16",]
CD16 = reference@assays$ADT@counts[rownames(reference@assays$ADT@counts) == "CD56-TotalSeqB-CD56",]
CD56 = log2(CD56) > 10 & log2(CD16) < 7.5 & lbls == "NK"; sum(logik)
logik = "NK.CD56bright" lbls[logik]
Generate a training data set from the reference data and save it for later use. Note:
# generate bootstrapped single cell data
= SignacBoot(E = E, spring.dir = data.dir, L = c("NK", "NK.CD56bright"), labels = lbls, logfc.threshold = 1)
R_learned
# save the training data
save(R_learned, file = "training_NKBright_v207.rda")
Load expression data for a different data set (this was also previously processed through SPRING and SignacX)
# Classify another data set with new model
# load new data
= "./PBMCs_5k_10X/FullDataset_v1"
new.data.dir = CID.LoadData(data.dir = new.data.dir)
E # load cell types identified with Signac
= rjson::fromJSON(file=paste0(new.data.dir,'/categorical_coloring_data.json')) json_data
Generate new labels. Note:
# generate new labels
= Signac(E = E, R = R_learned, spring.dir = new.data.dir) cr_learned
Now we amend the existing labels (classified previously with SignacX); we add the new labels and generate a new SPRING layout.Note:
# modify the existing labels
= lapply(json_data, function(x) x$label_list)
cr = cr$CellStates == 'NK'
logik $CellStates[logik] = cr_learned[logik]
cr= cr$CellStates_novel == 'NK'
logik $CellStates_novel[logik] = cr_learned[logik]
cr= paste0(new.data.dir, "_Learned") new.data.dir
Save results
# save
= CID.writeJSON(cr, spring.dir = new.data.dir, new_colors = c('red'), new_populations = c( 'NK.CD56bright')) dat
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_4.0.3 magrittr_2.0.1 formatR_1.7 htmltools_0.5.1.1
## [5] tools_4.0.3 yaml_2.2.1 stringi_1.5.3 rmarkdown_2.6
## [9] highr_0.8 knitr_1.30 stringr_1.4.0 digest_0.6.27
## [13] xfun_0.20 rlang_0.4.10 evaluate_0.14
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.