The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Analyze snATAC-seq data of basal cell carcinoma sample SU008_Tumor_Pre in GEO (GSE129785).
library(scPloidy)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(gplots)
#>
#> Attaching package: 'gplots'
#> The following object is masked from 'package:stats':
#>
#> lowess
You can skip the preprocessing and start from section CNV.
Download GSE129785_scATAC-TME-All.cell_barcodes.txt.gz from below and gunzip https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE129785&format=file&file=GSE129785%5FscATAC%2DTME%2DAll%2Ecell%5Fbarcodes%2Etxt%2Egz
Download GSM3722064_SU008_Tumor_Pre_fragments.tsv.gz from https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM3722064&format=file&file=GSM3722064%5FSU008%5FTumor%5FPre%5Ffragments%2Etsv%2Egz
The input window file window.hg37.20MB.bed
and resultant
peak file multi_tissue_peaks.hg37.20MB.bed
can be
downloaded from https://doi.org/10.6084/m9.figshare.23574066
To reproduce by yourself, download chromatin accessibility DHS_Index_and_Vocabulary_hg19_WM20190703.txt.gz from https://doi.org/10.5281/zenodo.3838751
Generate peaks for 20MB windows using peak_sum.R by yardimcilab in https://github.com/yardimcilab/RIDDLER/blob/main/util/peak_sum.R
See vignette of R package scPloidy. Load setting for hg19 genome.
simpleRepeat = readr::read_tsv(
"~/human/publichuman/hg37_ucsc/simpleRepeat.chrom_chromStart_chromEnd.txt.gz",
col_names = c("chrom", "chromStart", "chromEnd"))
rmsk = readr::read_tsv(
"~/human/publichuman/hg37_ucsc/rmsk.Simple_repeat.genoName_genoStart_genoEnd.txt.gz",
col_names = c("chrom", "chromStart", "chromEnd"))
simpleRepeat = rbind(simpleRepeat, rmsk)
rm(rmsk)
# convert from 0-based position to 1-based
simpleRepeat[, 2] = simpleRepeat[, 2] + 1
simpleRepeat = GenomicRanges::makeGRangesFromDataFrame(
as.data.frame(simpleRepeat),
seqnames.field = "chrom",
start.field = "chromStart",
end.field = "chromEnd")
# remove duplicates
simpleRepeat = GenomicRanges::union(simpleRepeat, GenomicRanges::GRanges())
window = read.table("window.hg37.20MB.bed", header = FALSE)
colnames(window) = c("chr", "start", "end", "window")
at = GenomicRanges::makeGRangesFromDataFrame(window[, 1:3])
barcodesuffix = paste0(".", window$window)
Compute and save fragmentoverlap.
sample = "GSM3722064"
tissue = "SU008_Tumor_Pre"
bc = sc$Barcodes[sc$Group == tissue]
SU008_Tumor_Pre_fragmentoverlap =
fragmentoverlapcount(
paste0("SRX5679934/", sample, "_", tissue, "_fragments.tsv.gz"),
at,
excluderegions = simpleRepeat,
targetbarcodes = bc,
Tn5offset = c(1, 0),
barcodesuffix = barcodesuffix
)
You can skip above and load preprocessed data attached to the package. The data file GSE129785_SU008_Tumor_Pre.RData is also available from https://doi.org/10.6084/m9.figshare.23574066
Infer CNVs.
levels = c(2, 4)
result = cnv(SU008_Tumor_Pre_fragmentoverlap,
SU008_Tumor_Pre_windowcovariates,
levels = levels,
deltaBICthreshold = -600)
#> [1] "Computing span = 2"
#> [1] "Computing span = 3"
#> [1] "Computing span = 4"
#> [1] "Computing span = 5"
#> [1] "Computing span = 6"
#> [1] "Computing span = 7"
#> [1] "Computing span = 8"
#> [1] "Computing span = 9"
#> [1] "Computing span = 10"
#> [1] "Computing span = 2"
#> [1] "Computing span = 3"
#> [1] "Computing span = 4"
#> [1] "Computing span = 5"
#> [1] "Computing span = 6"
#> [1] "Computing span = 7"
#> [1] "Computing span = 8"
#> [1] "Computing span = 9"
#> [1] "Computing span = 10"
Attach the result to fragmentoverlap
.
windowcovariates = SU008_Tumor_Pre_windowcovariates
windowcovariates$w =
as.numeric(sub("window_", "", windowcovariates$window))
fragmentoverlap = SU008_Tumor_Pre_fragmentoverlap
fragmentoverlap$cell =
sub(".window.*", "", fragmentoverlap$barcode)
fragmentoverlap$window =
sub(".*window", "window", fragmentoverlap$barcode)
fragmentoverlap$w =
as.numeric(sub("window_", "", fragmentoverlap$window))
x = match(fragmentoverlap$barcode,
result$cellwindowCN$barcode)
fragmentoverlap$CN = result$cellwindowCN$CN[x]
fragmentoverlap$ploidy.moment.cell = result$cellwindowCN$ploidy.moment.cell[x]
fragmentoverlap = fragmentoverlap[!is.na(fragmentoverlap$CN), ]
# For better hierarchical clustering
fragmentoverlap$pwindownormalizedcleanedceiled =
pmin(fragmentoverlap$CN, min(levels) * 2)
Make dataframe for plotting.
dataplot =
fragmentoverlap %>%
dplyr::select("w", "cell", "pwindownormalizedcleanedceiled") %>%
tidyr::pivot_wider(names_from = "w", values_from = "pwindownormalizedcleanedceiled")
dataplot = as.data.frame(dataplot)
rownames(dataplot) = dataplot$cell
dataplot = dataplot[, colnames(dataplot) != "cell"]
dataplot = as.matrix(dataplot)
n = max(as.numeric(colnames(dataplot)))
dataplot = dataplot[, match(as.character(1:n), colnames(dataplot))]
colnames(dataplot) = as.character(1:n)
Plot.
breaks = c(0, min(levels) - 1, min(levels) + 1, min(levels) * 2)
x = windowcovariates
x$chr[duplicated(windowcovariates$chr)] = NA
x = x$chr[match(colnames(dataplot), x$w)]
RowSideColors =
unlist(
lapply(
fragmentoverlap$ploidy.moment.cell[
match(rownames(dataplot), fragmentoverlap$cell)],
function (x) { which(sort(levels) == x)}))
RowSideColors = topo.colors(length(levels))[RowSideColors]
gplots::heatmap.2(
dataplot,
Colv = FALSE,
dendrogram = "none",
breaks = breaks,
col = c("blue", "gray80", "red"),
trace = "none", labRow = FALSE, na.color = "white",
labCol = x,
RowSideColors= RowSideColors)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.