Introduction

BioInstaller is a downloader and installer of bio-softwares and bio-databases. The inspiration for this project comes from various types of convenient package manager, such as pip for Python package, install.packages for R package, biocLite for Bioconductor R package, etc.

Why we do not have an integrated bioinformatics database and software package manager?

In fact, there are already some tools can complete part of the work:

Conda and BioConda have done a lot of work and we can use them to conveniently install some of bioinformatics softwares. But there are still many problems with these package managers, such as version updating not timely, incompatible to some precompiled programs, little support for the database and other non-software files.

docker is another kind very promising tool to complete the migration of the analytical environment. But the root authority is required that it's difficult for you to always get root privileges.

Futhermore, learning how to install and compile bioinformatics softwares is still necessary, because these 'unpleasant' experience will help you to improve the ability to debug and modify programs.

As for me, when starting some NGS analysis work in a new computer or operating system, I have to spend much time and energy to establish a complete set of softwares and dependent files and set the corresponding configuration file.

BioInstaller can help us to download, install and manage a variety of bioinformatics tools and databases more easily and systematically.

What's more, BioInstaller provides a different way to download and install your files, softwares and databases for others, more detail can be found in another vignette Examples of Templet Configuration File.

Feature:

Core function in BioInstaller

library(BioInstaller)

# Show all avaliable softwares/dependece in default inst/extdata/github.toml 
# and inst/extdata/nongithub.toml
install.bioinfo(show.all.names = TRUE)
#>   [1] "abyss"                 "arnapipe"             
#>   [3] "asap"                  "backspin"             
#>   [5] "bamtools"              "bamutil"              
#>   [7] "bcftools"              "bearscc"              
#>   [9] "bedtools"              "bowtie"               
#>  [11] "bowtie2"               "breakdancer"          
#>  [13] "brie"                  "bwa"                  
#>  [15] "cnvkit"                "cnvnator"             
#>  [17] "dart"                  "delly"                
#>  [19] "fastq_tools"           "fastx_toolkit"        
#>  [21] "freebayes"             "fsclvm"               
#>  [23] "github_demo"           "hisat2"               
#>  [25] "htseq"                 "igraph"               
#>  [27] "isop"                  "jvarkit"              
#>  [29] "libgtextutils"         "lofreq"               
#>  [31] "macs"                  "mdseq"                
#>  [33] "mimosca"               "multiqc"              
#>  [35] "oases"                 "oncotator"            
#>  [37] "outrigger"             "picard"               
#>  [39] "pindel"                "pxz"                  
#>  [41] "raceid"                "rca"                  
#>  [43] "rum"                   "samtools_old"         
#>  [45] "sclvm"                 "scnorm"               
#>  [47] "seqtk"                 "seurat"               
#>  [49] "singlesplice"          "sleuth"               
#>  [51] "somaticsniper"         "sparsehash"           
#>  [53] "speedseq"              "star"                 
#>  [55] "tmap"                  "tophat2"              
#>  [57] "tracer"                "trimgalore"           
#>  [59] "trinityrnaseq"         "varscan2"             
#>  [61] "vcflib"                "vcftools"             
#>  [63] "vep"                   "zifa"                 
#>  [65] "annovar"               "armadillo"            
#>  [67] "bcl2fastq2"            "blat"                 
#>  [69] "bzip2"                 "cesa"                 
#>  [71] "cnvnator_samtools"     "curl"                 
#>  [73] "demo_2"                "edena"                
#>  [75] "ensemble_grch37_reffa" "ensemble_grch38_reffa"
#>  [77] "fastqc"                "fatotwobit"           
#>  [79] "fusioncatcher"         "fusioncatcher_reffa"  
#>  [81] "gatk"                  "gatk_bundle"          
#>  [83] "gmap"                  "hisat2_reffa"         
#>  [85] "htslib"                "imagej"               
#>  [87] "liftover"              "lzo"                  
#>  [89] "lzop"                  "mapsplice2"           
#>  [91] "miniconda2"            "miniconda3"           
#>  [93] "mutect"                "ngs_qc_toolkit"       
#>  [95] "novoalign"             "pcre"                 
#>  [97] "pigz"                  "prinseq"              
#>  [99] "r"                     "reditools"            
#> [101] "root"                  "samstat"              
#> [103] "samtools"              "snpeff"               
#> [105] "solexaqa"              "sqlite"               
#> [107] "sratools"              "ssaha2"               
#> [109] "strelka"               "svtoolkit"            
#> [111] "tvc"                   "ucsc_reffa"           
#> [113] "ucsc_utils"            "velvet"               
#> [115] "xz"                    "zlib"

# Fetching versions of softwares
install.bioinfo('samtools', show.all.versions = TRUE)
#> INFO [2017-10-31 16:11:47] Fetching samtools versions....
#>  [1] "1.6"        "1.5"        "1.4.1"      "1.4"        "1.3.1"     
#>  [6] "1.3"        "1.2"        "1.1"        "1.0"        "0.2.0-rc12"
#> [11] "0.2.0-rc11" "0.2.0-rc10" "0.2.0-rc9"  "0.2.0-rc8"  "0.2.0-rc7" 
#> [16] "0.2.0-rc6"  "0.2.0-rc5"  "0.2.0-rc4"  "0.2.0-rc3"  "0.2.0-rc2" 
#> [21] "0.2.0-rc1"  "0.1.20"     "0.1.19"     "0.1.18"     "0.1.17"    
#> [26] "0.1.16"     "0.1.15"     "0.1.14"     "0.1.13"     "master"

# Install 'demo' with debug infomation
download.dir <- sprintf('%s/demo_2', tempdir())
install.bioinfo('demo', download.dir = download.dir, verbose = TRUE)
#> INFO [2017-10-31 16:11:48] Debug:name:demo
#> INFO [2017-10-31 16:11:48] Debug:destdir:
#> INFO [2017-10-31 16:11:48] Debug:db:~/.BioInstaller
#> INFO [2017-10-31 16:11:48] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:11:48] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/nongithub.toml
#> INFO [2017-10-31 16:11:49] Fetching demo versions....
#> INFO [2017-10-31 16:11:49] Install versions:GRCh37
#> INFO [2017-10-31 16:11:49] Now start to install demo in /tmp/Rtmphb4wYO/demo_2.
#> INFO [2017-10-31 16:11:49] Running before install steps.
#> INFO [2017-10-31 16:11:49] Now start to download demo in /tmp/Rtmphb4wYO/demo_2.
#> INFO [2017-10-31 16:11:51] Running install steps.
#> INFO [2017-10-31 16:11:52] Running after install successful steps.
#> INFO [2017-10-31 16:11:52] Running CMD:echo 'successful!'
#> INFO [2017-10-31 16:11:52] Running change.info for demo and be saved to ~/.BioInstaller
#> INFO [2017-10-31 16:11:56] Debug:Install by Github configuration file: 
#> INFO [2017-10-31 16:11:56] Debug:Install by Non Github configuration file: demo
#> INFO [2017-10-31 16:11:56] Installed successful list: demo
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"

# Download demo source code
download.dir <- sprintf('%s/demo_3', tempdir())
install.bioinfo('demo', download.dir = download.dir,
  download.only = TRUE, verbose = TRUE)
#> INFO [2017-10-31 16:11:56] Debug:name:demo
#> INFO [2017-10-31 16:11:56] Debug:destdir:
#> INFO [2017-10-31 16:11:56] Debug:db:~/.BioInstaller
#> INFO [2017-10-31 16:11:56] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:11:56] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/nongithub.toml
#> INFO [2017-10-31 16:11:57] Fetching demo versions....
#> INFO [2017-10-31 16:11:57] Install versions:GRCh37
#> INFO [2017-10-31 16:11:57] Now start to download demo in /tmp/Rtmphb4wYO/demo_3.
#> INFO [2017-10-31 16:11:58] demo be downloaded in /tmp/Rtmphb4wYO/demo_3 successful
#> [1] TRUE

# Set download.dir and destdir (destdir like /usr/local 
# including bin, lib, include and others), 
# destdir will work if install step {{destdir}} be used
download.dir <- sprintf('%s/demo_source', tempdir())
destdir <- sprintf('%s/demo', tempdir())
install.bioinfo('demo', download.dir = download.dir, destdir = destdir)
#> INFO [2017-10-31 16:11:59] Debug:name:demo
#> INFO [2017-10-31 16:11:59] Debug:destdir:/tmp/Rtmphb4wYO/demo
#> INFO [2017-10-31 16:11:59] Debug:db:~/.BioInstaller
#> INFO [2017-10-31 16:11:59] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:11:59] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/nongithub.toml
#> INFO [2017-10-31 16:11:59] Fetching demo versions....
#> INFO [2017-10-31 16:11:59] Install versions:GRCh37
#> INFO [2017-10-31 16:11:59] Now start to install demo in /tmp/Rtmphb4wYO/demo.
#> INFO [2017-10-31 16:11:59] Running before install steps.
#> INFO [2017-10-31 16:11:59] Now start to download demo in /tmp/Rtmphb4wYO/demo_source.
#> INFO [2017-10-31 16:12:02] Running install steps.
#> INFO [2017-10-31 16:12:03] Running after install successful steps.
#> INFO [2017-10-31 16:12:03] Running CMD:echo 'successful!'
#> INFO [2017-10-31 16:12:03] Running change.info for demo and be saved to ~/.BioInstaller
#> INFO [2017-10-31 16:12:07] Debug:Install by Github configuration file: 
#> INFO [2017-10-31 16:12:07] Debug:Install by Non Github configuration file: demo
#> INFO [2017-10-31 16:12:07] Installed successful list: demo
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"

Storing useful information of databases and softwares

It takes time to find the routes of the softwares and databases after downloading and installing them, what’s worse is that you would be in really dire straits if you didn't save the useful information.

Fortunately, version, path, source code path and update time will be saved in BIO_SOFWARES_DB_ACTIVE database, a YAML format file, if you did that work with BioInstaller.

temp.db <- tempfile()
set.biosoftwares.db(temp.db)
is.biosoftwares.db.active(temp.db)
#> [1] TRUE

# Install 'demo' quite
download.dir <- sprintf('%s/demo_1', tempdir())
install.bioinfo('demo', download.dir = download.dir, verbose = FALSE)
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo"
config <- get.info('demo')
config
#> $installed
#> [1] TRUE
#> 
#> $source.dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $bin_dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $executable_files
#> [1] ""
#> 
#> $install.dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $version
#> [1] "GRCh37"
#> 
#> $last.update.time
#> [1] "2017-10-31 16:12:11"
#> 
#> attr(,"config")
#> [1] "demo"
#> attr(,"configtype")
#> [1] "yaml"
#> attr(,"file")
#> [1] "/tmp/Rtmphb4wYO/file1b94e339b60ba"

config <- configr::read.config(temp.db)
config$demo$comments <- 'This is a demo.'
params <- list(config.dat = config, file.path = temp.db)
do.call(configr::write.config, params)
#> [1] TRUE
get.info('demo')
#> $installed
#> [1] "TRUE"
#> 
#> $source.dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $bin_dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $executable_files
#> [1] ""
#> 
#> $install.dir
#> [1] "/tmp/Rtmphb4wYO/demo_1"
#> 
#> $version
#> [1] "GRCh37"
#> 
#> $last.update.time
#> [1] "2017-10-31 16:12:11"
#> 
#> $comments
#> [1] "This is a demo."
#> 
#> attr(,"config")
#> [1] "demo"
#> attr(,"configtype")
#> [1] "ini"
#> attr(,"file")
#> [1] "/tmp/Rtmphb4wYO/file1b94e339b60ba"
del.info('demo')
#> [1] TRUE

Install softwares from local source

BioInstaller can be used to install softwares from local source. To install github softwares, a cloned directory were required, and nongithub softwares can be installed from decompressed directory or a compressed archive.

download.dir <- sprintf('%s/github_demo_local', tempdir())
install.bioinfo('github_demo', download.dir = download.dir, download.only = TRUE, verbose = FALSE)
#> cloning into '/tmp/Rtmphb4wYO/github_demo_local'...
#> Receiving objects:  16% (1/6),    0 kb
#> Receiving objects:  33% (2/6),    0 kb
#> Receiving objects:  50% (3/6),    0 kb
#> Receiving objects:  66% (4/6),    0 kb
#> Receiving objects:  83% (5/6),    0 kb
#> Receiving objects: 100% (6/6),    0 kb, done.
#> [1] TRUE
install.bioinfo('github_demo', local.source = download.dir)
#> INFO [2017-10-31 16:12:14] Debug:name:github_demo
#> INFO [2017-10-31 16:12:14] Debug:destdir:
#> INFO [2017-10-31 16:12:14] Debug:db:/tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:14] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:12:14] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/nongithub.toml
#> INFO [2017-10-31 16:12:14] Fetching github_demo versions....
#> INFO [2017-10-31 16:12:15] Install versions:master
#> INFO [2017-10-31 16:12:15] Now start to install github_demo in /tmp/Rtmphb4wYO/github_demo.
#> INFO [2017-10-31 16:12:15] Running before install steps.
#> INFO [2017-10-31 16:12:16] Running install steps.
#> INFO [2017-10-31 16:12:16] Running after install successful steps.
#> INFO [2017-10-31 16:12:16] Running CMD:echo 'successful!'
#> INFO [2017-10-31 16:12:16] Running change.info for github_demo and be saved to /tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:16] Debug:Install by Github configuration file: github_demo
#> INFO [2017-10-31 16:12:16] Debug:Install by Non Github configuration file: 
#> INFO [2017-10-31 16:12:16] Installed successful list: github_demo
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "github_demo"

download.dir <- sprintf('%s/demo_local', tempdir())
install.bioinfo('demo_2', download.dir = download.dir, download.only = TRUE, verbose = FALSE)
#> [1] FALSE
install.bioinfo('demo_2', download.dir = download.dir, local.source = sprintf('%s/GRCh37_MT_ensGene.txt.gz', download.dir), decompress = TRUE)
#> INFO [2017-10-31 16:12:18] Debug:name:demo_2
#> INFO [2017-10-31 16:12:18] Debug:destdir:
#> INFO [2017-10-31 16:12:18] Debug:db:/tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:18] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:12:18] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/nongithub.toml
#> INFO [2017-10-31 16:12:19] Fetching demo_2 versions....
#> INFO [2017-10-31 16:12:19] Install versions:GRCh37
#> INFO [2017-10-31 16:12:19] Now start to install demo_2 in /tmp/Rtmphb4wYO/demo_local.
#> INFO [2017-10-31 16:12:19] Running before install steps.
#> INFO [2017-10-31 16:12:21] Running install steps.
#> INFO [2017-10-31 16:12:21] Running after install successful steps.
#> INFO [2017-10-31 16:12:21] Running CMD:echo 'successful!'
#> INFO [2017-10-31 16:12:21] Running change.info for demo_2 and be saved to /tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:22] Debug:Install by Github configuration file: 
#> INFO [2017-10-31 16:12:22] Debug:Install by Non Github configuration file: demo_2
#> INFO [2017-10-31 16:12:22] Installed successful list: demo_2
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "demo_2"

Craw all versions of softwares or databases

BioInstaller provide a craw.all.version function to try download all avaliable URL files in nongithub part.

download.dir <- sprintf('%s/craw_all_versions', tempdir())
craw.all.versions('demo', download.dir = download.dir)
#> INFO [2017-10-31 16:12:22] Fetching demo versions....

Download ANNOVAR databases

download.dir <- sprintf('%s/ANNOVAR', tempdir())
config.toml <- system.file("extdata", "databases/ANNOVAR.toml", 
  package = "BioInstaller")
install.bioinfo('raw_ucsc_refgene', download.dir = download.dir, 
  nongithub.cfg = config.toml, extra.list = list(buildver = "hg19"))
#> INFO [2017-10-31 16:12:24] Debug:name:raw_ucsc_refgene
#> INFO [2017-10-31 16:12:24] Debug:destdir:
#> INFO [2017-10-31 16:12:24] Debug:db:/tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:24] Debug:github.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/github.toml
#> INFO [2017-10-31 16:12:24] Debug:nongithub.cfg:/tmp/RtmpTQQMYN/Rinst1b93d3af5b7bf/BioInstaller/extdata/databases/ANNOVAR.toml
#> INFO [2017-10-31 16:12:24] Fetching raw_ucsc_refgene versions....
#> INFO [2017-10-31 16:12:25] Install versions:refgene
#> INFO [2017-10-31 16:12:25] Now start to install raw_ucsc_refgene in /tmp/Rtmphb4wYO/ANNOVAR.
#> INFO [2017-10-31 16:12:25] Running before install steps.
#> INFO [2017-10-31 16:12:25] Now start to download raw_ucsc_refgene in /tmp/Rtmphb4wYO/ANNOVAR.
#> INFO [2017-10-31 16:12:33] Running install steps.
#> INFO [2017-10-31 16:12:33] Running after install successful steps.
#> INFO [2017-10-31 16:12:33] Running CMD:mv refGene.txt ucsc_hg19_refGene.txt
#> INFO [2017-10-31 16:12:34] Running change.info for raw_ucsc_refgene and be saved to /tmp/Rtmphb4wYO/file1b94e339b60ba
#> INFO [2017-10-31 16:12:34] Debug:Install by Github configuration file: 
#> INFO [2017-10-31 16:12:34] Debug:Install by Non Github configuration file: raw_ucsc_refgene
#> INFO [2017-10-31 16:12:34] Installed successful list: raw_ucsc_refgene
#> $fail.list
#> [1] ""
#> 
#> $success.list
#> [1] "raw_ucsc_refgene"