R/qtl2ggplot Vignette

Brian S. Yandell

2021-05-25

Setup

Load example DO data from web

library(qtl2)
library(qtl2ggplot)
library(ggplot2)
DOex <- 
  read_cross2(
    file.path(
      "https://raw.githubusercontent.com/rqtl",
       "qtl2data/master/DOex",
       "DOex.zip"))

Create artificial second phenotype as arcsic sqrt of first one.

DOex$pheno <- cbind(DOex$pheno, asin = asin(sqrt(DOex$pheno[,1]/100)))
DOex$pheno[,"asin"] <- DOex$pheno[,"asin"] *
  sd(DOex$pheno[,"OF_immobile_pct"], na.rm = TRUE) /
  sd(DOex$pheno[,"asin"], na.rm = TRUE)

Genome scan

pr <- calc_genoprob(DOex, error_prob=0.002)
apr <- genoprob_to_alleleprob(pr)
scan_apr <- scan1(apr, DOex$pheno)
find_peaks(scan_apr, DOex$pmap)
##   lodindex       lodcolumn chr      pos       lod
## 1        1 OF_immobile_pct   2 96.84223 10.173529
## 2        1 OF_immobile_pct   3 15.02006  5.969786
## 3        1 OF_immobile_pct   X 74.57257  6.938781
## 4        2            asin   2 96.84223  9.423097
## 5        2            asin   3 15.02006  6.188887
## 6        2            asin   X 74.57257  6.382851

The basic plot of genome scan

plot(scan_apr, DOex$pmap)

and the grammary of graphics (ggplot2) version

autoplot(scan_apr, DOex$pmap)

Focus on one chromosome

Subset to chr 2.

DOex <- DOex[,"2"]

Calculate genotype probabilities and convert to allele probabilities

pr <- calc_genoprob(DOex, error_prob=0.002)
apr <- genoprob_to_alleleprob(pr)

Genome Scan

scan_apr <- scan1(apr, DOex$pheno)
find_peaks(scan_apr, DOex$pmap)
##   lodindex       lodcolumn chr      pos       lod
## 1        1 OF_immobile_pct   2 96.84223 10.173529
## 2        2            asin   2 96.84223  9.423097
plot(scan_apr, DOex$pmap)

autoplot(scan_apr, DOex$pmap)

coefs <- scan1coef(apr, DOex$pheno)
## Warning in scan1coef(apr, DOex$pheno): Considering only the first phenotype.
plot(coefs, DOex$pmap, 1:8, col = CCcolors)

autoplot(coefs, DOex$pmap)

Plot allele effects over LOD scan.

plot(coefs, DOex$pmap, 1:8, col = CCcolors, scan1_output = scan_apr)

autoplot(coefs, DOex$pmap, scan1_output = scan_apr,
         legend.position = "none")

Examine just some of the founder effects, without centering.

plot(coefs, DOex$pmap, c(5,8), col = CCcolors[c(5,8)])

autoplot(coefs, DOex$pmap, c(5,8))

autoplot(coefs, DOex$pmap, c(5,8), facet = "geno")

plot(coefs, DOex$pmap, 4:5, col = CCcolors[4:5], scan1_output = scan_apr)

autoplot(coefs, DOex$pmap, 4:5, scan1_output = scan_apr, legend.position = "none")

SNP Association Mapping

Download snp info from web

filename <- file.path("https://raw.githubusercontent.com/rqtl",
                      "qtl2data/master/DOex", 
                      "c2_snpinfo.rds")
tmpfile <- tempfile()
download.file(filename, tmpfile, quiet=TRUE)
snpinfo <- readRDS(tmpfile)
unlink(tmpfile)

Convert to snp probabilities

snpinfo <- index_snps(DOex$pmap, snpinfo)
snppr <- genoprob_to_snpprob(apr, snpinfo)

Perform SNP association analysis (here, ignoring residual kinship)

scan_snppr <- scan1(snppr, DOex$pheno)

Plot results

plot(scan_snppr, snpinfo, drop_hilit = 1.5)

autoplot(scan_snppr, snpinfo, facet = "pheno", drop_hilit = 1.5)

Plot just subset of distinct SNPs

plot(scan_snppr, snpinfo, show_all_snps=FALSE, drop_hilit = 1.5)

autoplot(scan_snppr, snpinfo, show_all_snps=FALSE, drop_hilit = 1.5)

Highlight the top snps (with LOD within 1.5 of max). Show as open circles of size 1.

plot(scan_snppr, snpinfo, drop_hilit=1.5, cex=1, pch=1)

autoplot(scan_snppr, snpinfo, drop_hilit=1.5, cex=1, pch=1)

Highlight SDP patterns in SNPs within 3 of max; connect with lines.

autoplot(scan_snppr, snpinfo, patterns="all",drop_hilit=3,cex=2)

Highlight only top SDP patterns in SNPs.

autoplot(scan_snppr, snpinfo, patterns="hilit",drop_hilit=3,cex=2,
     ylim = c(3.6,6.6))

autoplot(coefs, scan1_output = scan_apr, DOex$pmap)

Multiple phenotypes

Plot routines (except scan patterns for now) can accommodate multiple phenotypes. At present, it is best to stick to under 10. In the preambl of this document, a second phenotype, asin, was artifically created for illustration purposes.

Genome Scans

plot(scan_apr, DOex$pmap, 1)
plot(scan_apr, DOex$pmap, 2, add = TRUE, col = "red")

autoplot(scan_apr, DOex$pmap, 1:2)

autoplot(scan_apr, DOex$pmap, 1:2, facet="pheno")

SNP Scans

Plot results.

plot(scan_snppr, snpinfo, lodcolumn=1, cex=1, pch=1, drop_hilit = 1.5)

plot(scan_snppr, snpinfo, lodcolumn=2, cex=1, pch=1, drop_hilit = 1.5)

autoplot(scan_snppr, snpinfo, 1:2, facet="pheno", cex=1, pch=1, 
         drop_hilit = 1.5)

plot(scan_snppr, snpinfo, lodcolumn=1, cex=1, pch=1, 
     show_all_snps = FALSE, drop_hilit = 1.5)

plot(scan_snppr, snpinfo, lodcolumn=2, cex=1, pch=1, 
     show_all_snps = FALSE, drop_hilit = 1.5)

autoplot(scan_snppr, snpinfo, 1:2, show_all_snps = FALSE, facet="pheno", cex=2, pch=1, drop_hilit = 1.5)

Note that in the autoplot (using qtl2ggplot), the hilit points for the second trait are fewer than with the plot (using qtl2). This is because the maxlod for the faceted autoplot is across both traits, and the other points for the second trait are too low.

autoplot(scan_snppr, snpinfo, 2, show_all_snps = FALSE, facet="pheno", cex=1, pch=1, 
         drop_hilit = 1.5)

autoplot(scan_snppr, snpinfo, 1:2,show_all_snps = FALSE,
             drop_hilit = 2, col=1:2, col_hilit=3:4,
             cex=2, pch=1)

Play with the colors.

autoplot(scan_snppr, snpinfo, 1:2,show_all_snps = FALSE, facet_var = "pheno",
             drop_hilit = 2, col=1:2, col_hilit=2:1,
             cex=2, pch=1)

autoplot(scan_snppr, snpinfo, 2, patterns = "all",
             cex=2, pch=1, drop_hilit=2)

autoplot(scan_snppr, snpinfo, 1:2, patterns = "all", cex=2, pch=1,
             facet = "pheno", drop_hilit=3)

autoplot(scan_snppr, snpinfo, 1:2, patterns = "hilit", cex=2, pch=1,
             drop_hilit=3, ylim=c(3.6,6.6), facet = "pheno")

autoplot(scan_snppr, snpinfo, 1:2, patterns = "hilit", show_all_snps = TRUE, cex=2, pch=1,
             drop_hilit=3, ylim=c(3,7), facet = "pattern")

LOD Peaks

(peaks <- find_peaks(scan_apr, DOex$pmap, drop = 1.5))
##   lodindex       lodcolumn chr      pos       lod    ci_lo    ci_hi
## 1        1 OF_immobile_pct   2 96.84223 10.173529 91.60175 103.8778
## 2        2            asin   2 96.84223  9.423097 91.60175 103.8778
plot_peaks(peaks, DOex$pmap)

ggplot_peaks(peaks, DOex$pmap)

Coefficients for 36 allele pairs

QTL effects for 36 allele pair model. Note that they are quite unstable, and the 36 allele pair max LOD is far from the peak for the additive (haplotype) model. Only showing effects with at least one E allele. Plots are truncated at +/-100 for viewability. Note also that qtl2ggplot routines have some centering built in.

36 allele pair genome scan.

scan_pr <- scan1(pr, DOex$pheno)
coefs36 <- scan1coef(pr, DOex$pheno)
## Warning in scan1coef(pr, DOex$pheno): Considering only the first phenotype.

All 36 allele pair QTL effects.

plot(coefs36, DOex$pmap, 1:36, col = 1:36, ylim=c(-100,100))

autoplot(coefs36, DOex$pmap, ylim=c(-100,100), colors = NULL, legend.position = "none")

The autoplot is centered by default (so mean across all alleles is mean of trait) to make coefficient plots easier to view. This can be turned off with the hidden center option.

autoplot(coefs36, DOex$pmap, ylim=c(-100,100), center = FALSE, 
         colors = NULL, legend.position = "none")

Only 8 allele pair QTL effects that contain E.

tmp <- qtl2ggplot:::modify_object(coefs36, 
                    coefs36[, stringr::str_detect(dimnames(coefs36)[[2]], "E")])
autoplot(tmp, DOex$pmap, ylim=c(-100,100))