Introduction to BioInstaller

Jianfeng Li

2017-06-24

Introduction

Conda and BioContainer have made it easy to install many packages and bio-softwares conveniently. Yet, learning how to install and compile bioinformatics softwares were still necessary. Because, the experience will help you to improve the ability of debugging.

Especialy, when start a NGS analysis work in a new computer or system, you need costs so much time and energy to establish a complete set of softwares and dependce of a analysis pipeline and set the corresponding configuration file.

BioInstaller can be used to download/install bioinformatics tools, dependences and databases in R relatively easily, and the information of installed softwares will be saved which can be used to generate configuration file.

Moreover, BioInstaller provide a different way to provide softwares download/install for others.

Feature:

Core function in BioInstaller

library(BioInstaller)

# Show all avaliable softwares/dependece in default inst/extdata/github.toml 
# and inst/extdata/nongithub.toml
install.bioinfo(show.all.names = TRUE)
#>   [1] "abyss"                 "asap"                 
#>   [3] "backspin"              "bamtools"             
#>   [5] "bamutil"               "bcftools"             
#>   [7] "bearscc"               "bedtools"             
#>   [9] "bowtie"                "bowtie2"              
#>  [11] "breakdancer"           "brie"                 
#>  [13] "bwa"                   "cnvkit"               
#>  [15] "cnvnator"              "delly"                
#>  [17] "fastx_toolkit"         "freebayes"            
#>  [19] "fsclvm"                "github_demo"          
#>  [21] "hisat2"                "htseq"                
#>  [23] "igraph"                "isop"                 
#>  [25] "jvarkit"               "libgtextutils"        
#>  [27] "lofreq"                "macs"                 
#>  [29] "mdseq"                 "mimosca"              
#>  [31] "oases"                 "outrigger"            
#>  [33] "picard"                "pindel"               
#>  [35] "pxz"                   "raceid"               
#>  [37] "rca"                   "rum"                  
#>  [39] "samtools_old"          "sclvm"                
#>  [41] "scnorm"                "seqtk"                
#>  [43] "seurat"                "singlesplice"         
#>  [45] "sleuth"                "somaticsniper"        
#>  [47] "sparsehash"            "speedseq"             
#>  [49] "star"                  "tmap"                 
#>  [51] "tophat2"               "tracer"               
#>  [53] "trinityrnaseq"         "varscan2"             
#>  [55] "vcflib"                "vcftools"             
#>  [57] "zifa"                  "annovar"              
#>  [59] "armadillo"             "bcl2fastq2"           
#>  [61] "blat"                  "bzip2"                
#>  [63] "cesa"                  "cnvnator_samtools"    
#>  [65] "curl"                  "edena"                
#>  [67] "ensemble_grch37_reffa" "ensemble_grch38_reffa"
#>  [69] "fastqc"                "fatotwobit"           
#>  [71] "fusioncatcher"         "fusioncatcher_reffa"  
#>  [73] "gatk"                  "gatk_bundle"          
#>  [75] "gmap"                  "hisat2_reffa"         
#>  [77] "htslib"                "imagej"               
#>  [79] "liftover"              "lzo"                  
#>  [81] "lzop"                  "mapsplice2"           
#>  [83] "mutect"                "novoalign"            
#>  [85] "pcre"                  "pigz"                 
#>  [87] "prinseq"               "r"                    
#>  [89] "reditools"             "root"                 
#>  [91] "samstat"               "samtools"             
#>  [93] "snpeff"                "solexaqa"             
#>  [95] "sqlite"                "sratools"             
#>  [97] "ssaha2"                "svtoolkit"            
#>  [99] "tvc"                   "ucsc_reffa"           
#> [101] "ucsc_utils"            "velvet"               
#> [103] "xz"                    "zlib"

# Fetching versions of softwares
install.bioinfo('samtools', show.all.versions = TRUE)
#> INFO [2017-06-24 00:37:27] Fetching samtools versions....
#>  [1] "1.5"        "1.4.1"      "1.4"        "1.3.1"      "1.3"       
#>  [6] "1.2"        "1.1"        "1.0"        "0.2.0-rc12" "0.2.0-rc11"
#> [11] "0.2.0-rc10" "0.2.0-rc9"  "0.2.0-rc8"  "0.2.0-rc7"  "0.2.0-rc6" 
#> [16] "0.2.0-rc5"  "0.2.0-rc4"  "0.2.0-rc3"  "0.2.0-rc2"  "0.2.0-rc1" 
#> [21] "0.1.20"     "0.1.19"     "0.1.18"     "0.1.17"     "0.1.16"    
#> [26] "0.1.15"     "0.1.14"     "0.1.13"     "0.1.12"     "master"

# Install 'demo' quite
download.dir <- sprintf('%s/demo_1', tempdir())
install.bioinfo('demo', download.dir = download.dir, verbose = FALSE)
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"

# Install 'demo' with debug infomation
download.dir <- sprintf('%s/demo_2', tempdir())
install.bioinfo('demo', download.dir = download.dir, verbose = TRUE)
#> INFO [2017-06-24 00:37:33] Debug:name:demo
#> INFO [2017-06-24 00:37:33] Debug:destdir:
#> INFO [2017-06-24 00:37:33] Debug:db:~/.BioInstaller
#> INFO [2017-06-24 00:37:33] Debug:github.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/github.toml
#> INFO [2017-06-24 00:37:33] Debug:nongithub.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/nongithub.toml
#> INFO [2017-06-24 00:37:33] Fetching demo versions....
#> INFO [2017-06-24 00:37:34] Install versions:GRCh37
#> INFO [2017-06-24 00:37:34] Now start to install demo in /tmp/RtmpueRscR/demo_2.
#> INFO [2017-06-24 00:37:34] Running before install steps.
#> INFO [2017-06-24 00:37:34] Now start to download demo in /tmp/RtmpueRscR/demo_2.
#> INFO [2017-06-24 00:37:35] Running install steps.
#> INFO [2017-06-24 00:37:35] Running after install successful steps.
#> INFO [2017-06-24 00:37:35] Running CMD:echo 'successful!'
#> INFO [2017-06-24 00:37:35] Running change.info for demo and be saved to ~/.BioInstaller
#> INFO [2017-06-24 00:37:39] Debug:Install by Github configuration file: 
#> INFO [2017-06-24 00:37:39] Debug:Install by Non Github configuration file: demo
#> INFO [2017-06-24 00:37:39] Installed successful list: demo
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"

# Download demo source code
download.dir <- sprintf('%s/demo_3', tempdir())
install.bioinfo('demo', download.dir = download.dir,
  download.only = TRUE, verbose = TRUE)
#> INFO [2017-06-24 00:37:39] Debug:name:demo
#> INFO [2017-06-24 00:37:39] Debug:destdir:
#> INFO [2017-06-24 00:37:39] Debug:db:~/.BioInstaller
#> INFO [2017-06-24 00:37:39] Debug:github.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/github.toml
#> INFO [2017-06-24 00:37:39] Debug:nongithub.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/nongithub.toml
#> INFO [2017-06-24 00:37:40] Fetching demo versions....
#> INFO [2017-06-24 00:37:40] Install versions:GRCh37
#> INFO [2017-06-24 00:37:40] Now start to download demo in /tmp/RtmpueRscR/demo_3.
#> INFO [2017-06-24 00:37:40] demo be downloaded in /tmp/RtmpueRscR/demo_3 successful
#> [1] TRUE

# Set download.dir rrr destdir (destdir like /usr/local 
# including bin, lib, include and others), 
# destdir will work if install step {{destdir}} be used
download.dir <- sprintf('%s/demo_source', tempdir())
destdir <- sprintf('%s/demo', tempdir())
install.bioinfo('demo', download.dir = download.dir, destdir = destdir)
#> INFO [2017-06-24 00:37:40] Debug:name:demo
#> INFO [2017-06-24 00:37:40] Debug:destdir:/tmp/RtmpueRscR/demo
#> INFO [2017-06-24 00:37:40] Debug:db:~/.BioInstaller
#> INFO [2017-06-24 00:37:40] Debug:github.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/github.toml
#> INFO [2017-06-24 00:37:40] Debug:nongithub.cfg:/tmp/Rtmp4fAUbp/Rinst137eb6da2ff95/BioInstaller/extdata/nongithub.toml
#> INFO [2017-06-24 00:37:41] Fetching demo versions....
#> INFO [2017-06-24 00:37:41] Install versions:GRCh37
#> INFO [2017-06-24 00:37:41] Now start to install demo in /tmp/RtmpueRscR/demo.
#> INFO [2017-06-24 00:37:41] Running before install steps.
#> INFO [2017-06-24 00:37:41] Now start to download demo in /tmp/RtmpueRscR/demo_source.
#> INFO [2017-06-24 00:37:41] Running install steps.
#> INFO [2017-06-24 00:37:41] Running after install successful steps.
#> INFO [2017-06-24 00:37:41] Running CMD:echo 'successful!'
#> INFO [2017-06-24 00:37:41] Running change.info for demo and be saved to ~/.BioInstaller
#> INFO [2017-06-24 00:37:46] Debug:Install by Github configuration file: 
#> INFO [2017-06-24 00:37:46] Debug:Install by Non Github configuration file: demo
#> INFO [2017-06-24 00:37:46] Installed successful list: demo
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"

Storage meta information of databases and softwares

When I install and download massive softwares and databases, I facing the problem how to found it. If we not to save the meta information when you download or install these softwares or databases, you would be in really dire straits.

In fact, version, path, source code path and update time will be saved if you using BioInstaller to install some of softwares. Moreover, you can use some of function in BioInstaller to modify the information in BIO_SOFWARES_DB_ACTIVE database, a TOML format file.

temp.db <- tempfile()
set.biosoftwares.db(temp.db)
is.biosoftwares.db.active(temp.db)
#> [1] TRUE
params <- list(name = 'demo', comments = 'This is a demo.')
do.call(change.info, params)
#> INFO [2017-06-24 00:37:46] Running change.info for demo and be saved to /tmp/RtmpueRscR/file137fb31374993
#> [1] TRUE
get.info('demo')
#> $installed
#> [1] TRUE
#> 
#> $source.dir
#> [1] ""
#> 
#> $bin_dir
#> [1] ""
#> 
#> $executable_files
#> [1] ""
#> 
#> $comments
#> [1] "This is a demo."
#> 
#> attr(,"config")
#> [1] "demo"
#> attr(,"configtype")
#> [1] "yaml"
#> attr(,"file")
#> [1] "/tmp/RtmpueRscR/file137fb31374993"
del.info('demo')
#> [1] TRUE