The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

scGOclust_vignette

Load packages

scGOclust is a package that leverages Gene Ontology to analyse the functional profile of cells with scRNA-seq data.

# load required libraries

library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#> 'SeuratObject' was built under R 4.3.1 but the current version is
#> 4.3.2; it is recomended that you reinstall 'SeuratObject' as the ABI
#> for R may have changed
#> 
#> Attaching package: 'SeuratObject'
#> The following object is masked from 'package:base':
#> 
#>     intersect
library(pheatmap)
library(httr)

## if (!require("devtools")) install.packages("devtools")

## install latest from source
## for reproducibility we do not update dependencies
# devtools::install_github("Papatheodorou-Group/scGOclust", upgrade = FALSE)

library(scGOclust)

2. Load input data

# get a gene to GO BP terms mapping table
# remove electronically inferred records

# sometimes ensembl complains about ssh certificate has expired, this is a known issue, run this code
httr::set_config(httr::config(ssl_verifypeer = FALSE)) 

#mmu_tbl = ensemblToGo(species = 'mmusculus', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))
#dme_tbl = ensemblToGo(species = 'dmelanogaster', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))

# here we load the example data for convenience
data(mmu_tbl)
data(dme_tbl)
# load the gene expression raw count objects
data(mmu_subset)
data(dme_subset)
ls()
#> [1] "dme_subset" "dme_tbl"    "mmu_subset" "mmu_tbl"

3. Build GO BP profile

## construct a Seurat object with GO BP as features

mmu_go_obj <- makeGOSeurat(ensembl_to_GO = mmu_tbl, feature_type = 'external_gene_name', seurat_obj = mmu_subset)
#> collect data
#> compute GO to cell matrix, might take a few secs
#> Warning: Data is of class matrix. Coercing to dgCMatrix.
#> time used: 0.66 secs
#> removing 0 all zero terms
#> returning GO Seurat object

dme_go_obj <- makeGOSeurat(ensembl_to_GO = dme_tbl, feature_type = 'external_gene_name', seurat_obj = dme_subset)
#> collect data
#> compute GO to cell matrix, might take a few secs
#> Warning: Data is of class matrix. Coercing to dgCMatrix.
#> time used: 0.2 secs
#> removing 0 all zero terms
#> returning GO Seurat object

4. Calculate cell type average GO BP profile

# specify the column with cell type annotation in seurat_obj@meta.data

mmu_ct_go <- getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation')
#> perform normalization and log1p for mmu_go_obj
#> Normalizing layer: counts
#> Centering and scaling data matrix
#> As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
#> Names of identity class contain underscores ('_'), replacing with dashes ('-')
#> This message is displayed once per session.
dme_ct_go <- getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation')
#> perform normalization and log1p for dme_go_obj
#> Normalizing layer: counts
#> Centering and scaling data matrix

5. Calculate within-species cell type functional similariy

# heatmap of Pearson's correlation coefficient of cell type average BP profiles within species

mmu_corr = cellTypeGOCorr(cell_type_go = mmu_ct_go, corr_method = 'pearson')
pheatmap(mmu_corr)

dme_corr = cellTypeGOCorr(cell_type_go = dme_ct_go, corr_method = 'pearson')
pheatmap(dme_corr)

5. Calculate cross-species cell type functional similariy


# calculate Pearson's correlation coefficient of cell type average BP profiles across species

corr = crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_ct_go, cell_type_go_sp2 = dme_ct_go, corr_method = 'pearson')

# tries to put cells with higher values on the diagonal
# helpful when cross-species cell type similarity signal is less clear

plotCellTypeCorrHeatmap(corr, width = 9, height = 10)


# scale per column or row to see the relative similarity
plotCellTypeCorrHeatmap(corr, scale = 'column', width = 9, height = 10)

6. Dimensional reduction and UMAP visualization of cells with GO profile


# analyze the cell-by-GO BP profile as a count matrix
# Note that the example data has very few cells (for reducing data size), so I am using a small UMAP min_dist and small spread here. Default min_dist is 0.3 and spread is 1.

mmu_go_analyzed = analyzeGOSeurat(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation', min.dist = 0.1, spread = 0.5)
#> perform normalization and log1p for mmu_go_obj
#> Normalizing layer: counts
#> Warning: The following arguments are not used: spread
#> Finding variable features for layer counts
#> Warning: The following arguments are not used: spread

#> Warning: The following arguments are not used: spread

#> Warning: The following arguments are not used: spread
#> Computing nearest neighbor graph
#> Computing SNN
#> Warning: The following arguments are not used: spread

#> Warning: The following arguments are not used: spread
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 219
#> Number of edges: 9790
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.4765
#> Number of communities: 4
#> Elapsed time: 0 seconds
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 17:05:29 UMAP embedding parameters a = 5.069 b = 1.003
#> Found more than one class "dist" in cache; using the first, from namespace 'spam'
#> Also defined by 'BiocGenerics'
#> 17:05:29 Read 219 rows and found 50 numeric columns
#> 17:05:29 Using Annoy for neighbor search, n_neighbors = 30
#> Found more than one class "dist" in cache; using the first, from namespace 'spam'
#> Also defined by 'BiocGenerics'
#> 17:05:29 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 17:05:29 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpDbhzmO/file123c6576dc9b7
#> 17:05:29 Searching Annoy index using 1 thread, search_k = 3000
#> 17:05:29 Annoy recall = 100%
#> 17:05:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
#> 17:05:30 Initializing from normalized Laplacian + noise (using RSpectra)
#> 17:05:30 Commencing optimization for 500 epochs, with 7794 positive edges
#> 17:05:30 Optimization finished
# UMAP plot of the analyzed cell-by-GO BP profile
# labeled by previously specified cell annotation column in meta.data

DimPlot(mmu_go_analyzed, label = TRUE) + NoLegend()

dme_go_analyzed = analyzeGOSeurat(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation', min_dist=0.1, spread=0.5)
#> perform normalization and log1p for dme_go_obj
#> Normalizing layer: counts
#> Warning: The following arguments are not used: min_dist, spread
#> Finding variable features for layer counts
#> Warning: The following arguments are not used: min_dist, spread

#> Warning: The following arguments are not used: min_dist, spread

#> Warning: The following arguments are not used: min_dist, spread
#> Computing nearest neighbor graph
#> Computing SNN
#> Warning: The following arguments are not used: min_dist, spread

#> Warning: The following arguments are not used: min_dist, spread
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 180
#> Number of edges: 6265
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.5638
#> Number of communities: 4
#> Elapsed time: 0 seconds
#> Warning: The following arguments are not used: min_dist
#> 17:05:31 UMAP embedding parameters a = 3.244 b = 1.447
#> Found more than one class "dist" in cache; using the first, from namespace 'spam'
#> Also defined by 'BiocGenerics'
#> 17:05:31 Read 180 rows and found 50 numeric columns
#> 17:05:31 Using Annoy for neighbor search, n_neighbors = 30
#> Found more than one class "dist" in cache; using the first, from namespace 'spam'
#> Also defined by 'BiocGenerics'
#> 17:05:31 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 17:05:31 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpDbhzmO/file123c67377bc6e
#> 17:05:31 Searching Annoy index using 1 thread, search_k = 3000
#> 17:05:31 Annoy recall = 100%
#> 17:05:31 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
#> 17:05:32 Initializing from normalized Laplacian + noise (using RSpectra)
#> 17:05:32 Commencing optimization for 500 epochs, with 6618 positive edges
#> 17:05:32 Optimization finished
DimPlot(dme_go_analyzed, label = TRUE) + NoLegend()

7. Get co-up and co-down regulated terms between pairs of cell types


## calculation takes a few minutes due to the Wilcox signed rank test

ct_shared_go = scGOclust::getCellTypeSharedGO(species_1 = 'mmusculus', species_2 = 'dmelanogaster', analyzed_go_seurat_sp1 = mmu_go_analyzed, analyzed_go_seurat_sp2 = dme_go_analyzed, cell_type_col_sp1 = 'cell_type_annotation', cell_type_col_sp2 = 'annotation', layer_use = 'data')
head(ct_shared_go$shared_sig_markers)

# query shared GO terms for specific cell type pairs

getCellTypeSharedTerms(shared_go = ct_shared_go,
                       cell_type_sp1 = 'intestine_Enteroendocrine cell', 
                       cell_type_sp2 = 'enteroendocrine cell',
                       return_full = FALSE)

# viola
sessionInfo()
#> R version 4.3.2 (2023-10-31)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Sonoma 14.2.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
#> 
#> locale:
#> [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: Europe/London
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] scGOclust_0.2.1    httr_1.4.7         pheatmap_1.0.12    Seurat_5.0.1      
#> [5] SeuratObject_5.0.1 sp_2.1-1          
#> 
#> loaded via a namespace (and not attached):
#>   [1] RcppAnnoy_0.0.21        splines_4.3.2           later_1.3.1            
#>   [4] bitops_1.0-7            filelock_1.0.2          tibble_3.2.1           
#>   [7] polyclip_1.10-6         XML_3.99-0.14           fastDummies_1.7.3      
#>  [10] lifecycle_1.0.4         globals_0.16.2          lattice_0.21-9         
#>  [13] MASS_7.3-60             magrittr_2.0.3          limma_3.56.2           
#>  [16] plotly_4.10.3           sass_0.4.7              rmarkdown_2.25         
#>  [19] jquerylib_0.1.4         yaml_2.3.7              httpuv_1.6.12          
#>  [22] sctransform_0.4.1       spam_2.10-0             spatstat.sparse_3.0-3  
#>  [25] reticulate_1.34.0       cowplot_1.1.1           pbapply_1.7-2          
#>  [28] DBI_1.1.3               RColorBrewer_1.1-3      abind_1.4-5            
#>  [31] zlibbioc_1.46.0         Rtsne_0.16              purrr_1.0.2            
#>  [34] BiocGenerics_0.46.0     RCurl_1.98-1.12         pracma_2.4.2           
#>  [37] rappdirs_0.3.3          GenomeInfoDbData_1.2.10 IRanges_2.34.1         
#>  [40] S4Vectors_0.38.2        ggrepel_0.9.4           irlba_2.3.5.1          
#>  [43] listenv_0.9.0           spatstat.utils_3.0-4    goftest_1.2-3          
#>  [46] RSpectra_0.16-1         spatstat.random_3.2-1   fitdistrplus_1.1-11    
#>  [49] parallelly_1.36.0       leiden_0.4.3.1          codetools_0.2-19       
#>  [52] xml2_1.3.5              tidyselect_1.2.0        farver_2.1.1           
#>  [55] matrixStats_1.1.0       stats4_4.3.2            BiocFileCache_2.8.0    
#>  [58] spatstat.explore_3.2-5  jsonlite_1.8.7          ellipsis_0.3.2         
#>  [61] progressr_0.14.0        ggridges_0.5.4          survival_3.5-7         
#>  [64] tools_4.3.2             progress_1.2.2          ica_1.0-3              
#>  [67] Rcpp_1.0.11             glue_1.6.2              gridExtra_2.3          
#>  [70] xfun_0.41               GenomeInfoDb_1.36.4     dplyr_1.1.4            
#>  [73] withr_2.5.2             fastmap_1.1.1           fansi_1.0.5            
#>  [76] digest_0.6.33           R6_2.5.1                mime_0.12              
#>  [79] colorspace_2.1-0        networkD3_0.4           scattermore_1.2        
#>  [82] tensor_1.5              spatstat.data_3.0-3     biomaRt_2.56.1         
#>  [85] RSQLite_2.3.1           utf8_1.2.4              tidyr_1.3.0            
#>  [88] generics_0.1.3          data.table_1.14.8       prettyunits_1.2.0      
#>  [91] htmlwidgets_1.6.2       slanter_0.2-0           uwot_0.1.16            
#>  [94] pkgconfig_2.0.3         gtable_0.3.4            blob_1.2.4             
#>  [97] lmtest_0.9-40           XVector_0.40.0          htmltools_0.5.7        
#> [100] dotCall64_1.1-0         scales_1.2.1            Biobase_2.60.0         
#> [103] png_0.1-8               knitr_1.45              rstudioapi_0.15.0      
#> [106] reshape2_1.4.4          nlme_3.1-163            curl_5.1.0             
#> [109] cachem_1.0.8            zoo_1.8-12              stringr_1.5.1          
#> [112] KernSmooth_2.23-22      parallel_4.3.2          miniUI_0.1.1.1         
#> [115] AnnotationDbi_1.62.2    pillar_1.9.0            grid_4.3.2             
#> [118] vctrs_0.6.4             RANN_2.6.1              promises_1.2.1         
#> [121] dbplyr_2.3.4            xtable_1.8-4            cluster_2.1.4          
#> [124] evaluate_0.23           cli_3.6.1               compiler_4.3.2         
#> [127] rlang_1.1.2             crayon_1.5.2            future.apply_1.11.0    
#> [130] labeling_0.4.3          plyr_1.8.9              stringi_1.8.1          
#> [133] viridisLite_0.4.2       deldir_1.0-9            munsell_0.5.0          
#> [136] Biostrings_2.68.1       lazyeval_0.2.2          spatstat.geom_3.2-7    
#> [139] Matrix_1.6-3            RcppHNSW_0.5.0          hms_1.1.3              
#> [142] patchwork_1.1.3         bit64_4.0.5             future_1.33.0          
#> [145] ggplot2_3.4.4           KEGGREST_1.40.1         shiny_1.7.5.1          
#> [148] highr_0.10              ROCR_1.0-11             igraph_1.5.1           
#> [151] memoise_2.0.1           bslib_0.5.1             bit_4.0.5

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.