babette: Step by Step

Richèl J.C. Bilderbeek

2023-09-26

Introduction

This step-by-step demo shows how to run the babette pipeline in detail.

First, load babette:

library(babette)

In all cases, this is done for a short MCMC chain length of 10K:

inference_model <- create_inference_model()
inference_model$mcmc$chain_length <- 10000
inference_model$mcmc$tracelog$filename <- normalizePath(
  get_beautier_tempfilename(
    pattern = "tracelog_", fileext = ".log"
  ),
  mustWork = FALSE
)
inference_model$mcmc$treelog$filename <- normalizePath(
  get_beautier_tempfilename(
    pattern = "treelog_",
    fileext = ".trees"
  ),
  mustWork = FALSE
)

Create a ‘BEAST2’ input file

This step is commonly done using BEAUti. With babette, this can be done as follows:

beast2_input_file <- tempfile(pattern = "beast2_", fileext = ".xml")
create_beast2_input_file_from_model(
  input_filename = get_babette_path("anthus_aco.fas"),
  inference_model = inference_model,
  output_filename = beast2_input_file
)

Display (part of) the ‘BEAST2’ input file

print(head(readLines(beast2_input_file)))
#> [1] "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><beast beautitemplate='Standard' beautistatus='' namespace=\"beast.core:beast.evolution.alignment:beast.evolution.tree.coalescent:beast.core.util:beast.evolution.nuc:beast.evolution.operators:beast.evolution.sitemodel:beast.evolution.substitutionmodel:beast.evolution.likelihood\" required=\"\" version=\"2.4\">"
#> [2] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [3] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [4] "    <data"                                                                                                                                                                                                                                                                                                                                                                          
#> [5] "id=\"anthus_aco\""                                                                                                                                                                                                                                                                                                                                                                  
#> [6] "name=\"alignment\">"

This file can both be loaded by BEAUti and be used by ‘BEAST2’.

The file can be checked if it is indeed a valid input file:

if (is_beast2_installed()) {
  is_beast2_input_file(beast2_input_file)
}
#> [1] TRUE

Run MCMC

This step is commonly done using ‘BEAST2’ from the command-line or using its GUI. With babette, this can be done as follows:

if (is_beast2_installed()) {
  beast2_options <- create_beast2_options(
    input_filename = beast2_input_file
  )
  beastier::check_can_create_file(beast2_options$output_state_filename)
  beastier::check_can_create_treelog_file(beast2_options)
  run_beast2_from_options(
    beast2_options = beast2_options
  )
  testthat::expect_true(file.exists(beast2_options$output_state_filename))
}

Display (part of) the ‘BEAST2’ output files

The .log file contains the model parameters and parameter estimates:

if (is_beast2_installed()) {
  print(head(readLines(inference_model$mcmc$tracelog$filename)))
  print(tail(readLines(inference_model$mcmc$tracelog$filename)))
}
#> [1] "#"                                                                                                                                  
#> [2] "#model:"                                                                                                                            
#> [3] "#"                                                                                                                                  
#> [4] "#<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><input id=\"posterior\" spec=\"beast.core.util.CompoundDistribution\">"
#> [5] "#    <distribution id=\"prior\" spec=\"beast.core.util.CompoundDistribution\">"                                                     
#> [6] "#        <distribution id=\"YuleModel.t:anthus_aco\" spec=\"beast.evolution.speciation.YuleModel\">"                                
#> [1] "5000\t-1761.3204192200221\t-1856.8658140026791\t95.54539478265697\t-1856.8658140026791\t0.01291079458323695\t95.54539478265697\t202.78110557637092"   
#> [2] "6000\t-1761.5654059574215\t-1858.337673928368\t96.7722679709466\t-1858.337673928368\t0.013370667326241429\t96.7722679709466\t229.7380725652953"       
#> [3] "7000\t-1758.7396712914406\t-1861.1877644644346\t102.44809317299416\t-1861.1877644644346\t0.009125245329688603\t102.44809317299416\t369.44907119208756"
#> [4] "8000\t-1766.2619613667187\t-1865.4277246825154\t99.16576331579684\t-1865.4277246825154\t0.010520208769015974\t99.16576331579684\t257.9294228628412"   
#> [5] "9000\t-1760.984336956429\t-1856.231684379699\t95.24734742327003\t-1856.231684379699\t0.014481905583304693\t95.24734742327003\t253.4630847697568"      
#> [6] "10000\t-1771.784990750627\t-1862.9187533221218\t91.1337625714948\t-1862.9187533221218\t0.01394547902874544\t91.1337625714948\t251.4836164360151"

The .trees file contains the alignment, taxa and posterior trees:

if (is_beast2_installed()) {
  print(head(readLines(inference_model$mcmc$treelog$filename)))
  print(tail(readLines(inference_model$mcmc$treelog$filename)))
}
#> Warning in readLines(inference_model$mcmc$treelog$filename): incomplete final
#> line found on '/home/richel/.cache/beautier/treelog_12b0318a53e6d.trees'
#> [1] "#NEXUS"                ""                      "Begin taxa;"          
#> [4] "\tDimensions ntax=22;" "\t\tTaxlabels"         "\t\t\t61430_aco"
#> Warning in readLines(inference_model$mcmc$treelog$filename): incomplete final
#> line found on '/home/richel/.cache/beautier/treelog_12b0318a53e6d.trees'
#> [1] "tree STATE_6000 = (((((((1:3.6977342405987413E-4,17:3.6977342405987413E-4):0.0010569403214262837,(3:1.3040791405877659E-4,5:1.3040791405877659E-4):0.0012963058314273814):5.286138500541656E-4,11:0.0019553275955403234):0.0019919031496889615,(9:0.0017151950198212762,22:0.0017151950198212762):0.0022320357254080087):2.2110823663423242E-4,(14:0.0026496189916270815,20:0.0026496189916270815):0.0015187199902364358):9.332625790424709E-4,(((4:7.448016694816867E-5,21:7.448016694816867E-5):1.864990836889317E-4,15:2.6097925063710037E-4):0.003286982353322925,((8:3.299820011465127E-4,13:3.299820011465127E-4):0.0012108256072855222,(16:0.0013202806592410553,19:0.0013202806592410553):2.205269491909797E-4):0.00200715399552799):0.001553639956945963):0.00826906576533544,(((2:0.0019493179864674714,7:0.0019493179864674714):0.001105416628592499,10:0.0030547346150599703):0.00419761177544435,(6:0.006083013360759853,(12:0.0021340785147390346,18:0.0021340785147390346):0.003948934846020819):0.0011693330297444671):0.0061183209357371085):0.0;"
#> [2] "tree STATE_7000 = (((((((1:2.403637311778034E-4,17:2.403637311778034E-4):9.132095500987973E-5,5:3.316846861876831E-4):5.217517027291963E-5,3:3.8385985646060275E-4):6.152448799878216E-4,11:9.991047364484244E-4):0.004073426517631093,((4:2.063205553004262E-4,15:2.063205553004262E-4):0.0022711652325245408,((8:5.883919174126987E-4,13:5.883919174126987E-4):5.196373222685158E-4,16:0.0011080292396812145):0.0013694565481437524):0.002595045466254551):5.794218486548869E-4,(9:0.004498622484643184,((14:0.0018256400336360912,20:0.0018256400336360912):4.4317044850915744E-4,21:0.0022688104821452486):0.0022298120024979353):0.0011533306180912207):0.0034732922269541987,((((2:0.00118043964362626,7:0.00118043964362626):6.871632963106977E-4,(10:3.6259067401828686E-4,19:3.6259067401828686E-4):0.001505012265918671):0.002564073313607384,6:0.004431676253544342):5.237488852222194E-5,(12:0.0022732965551788592,(18:2.4446873285717183E-4,22:2.4446873285717183E-4):0.0020288278223216873):0.0022107545868877043):0.00464119418762204):0.0;"        
#> [3] "tree STATE_8000 = (((((((((1:8.785097155263515E-4,(3:4.0014769551964795E-4,17:4.0014769551964795E-4):4.783620200067035E-4):7.433980193647326E-6,5:8.859436957199988E-4):6.925691337040554E-4,19:0.0015785128294240542):1.3343465154504706E-4,11:0.0017119474809691013):0.0013374454184067236,(14:0.0018196989373712132,20:0.0018196989373712132):0.0012296939620046116):8.565789043614904E-4,((4:4.744296622603254E-4,15:4.744296622603254E-4):0.0026209542949752374,((8:3.373737349414038E-4,13:3.373737349414038E-4):0.001077036937384963,(16:2.865462129611004E-4,22:2.865462129611004E-4):0.0011278644593652664):0.0016809732849091957):8.105878465017526E-4):2.887970238207296E-5,9:0.003934851506119388):6.605826512717317E-4,21:0.00459543415739112):0.005924774611624854,(((2:0.003375250975568782,7:0.003375250975568782):9.058923381819262E-4,10:0.004281143313750708):0.0010573158918958181,(6:0.004282905520046269,(12:9.311553382134508E-4,18:9.311553382134508E-4):0.0033517501818328185):0.001055553685600257):0.005181749563369448):0.0;"          
#> [4] "tree STATE_9000 = ((((((((1:6.309086385713606E-5,5:6.309086385713606E-5):1.3995493204422893E-4,17:2.03045795901365E-4):1.2532403445296723E-5,3:2.1557819934666172E-4):0.0014825152465426583,11:0.00169809344588932):0.0019257231432257304,(19:0.0017424520708144882,20:0.0017424520708144882):0.0018813645183005621):0.0019826394575953414,9:0.005606456046710392):6.306269028914383E-4,(((4:7.98921799443248E-5,15:7.98921799443248E-5):0.0033835292535314486,(((8:1.5166408093018928E-4,13:1.5166408093018928E-4):2.107705991293986E-4,22:3.624346800595879E-4):2.0124209114908537E-4,16:5.636767712086732E-4):0.0028997446622671):0.0019510025043749687,(14:0.0025856436176120027,21:0.0025856436176120027):0.0028287803202387395):8.226590117510878E-4):0.008244822633702863,((((2:0.0024865855220345674,7:0.0024865855220345674):7.569495376194089E-4,10:0.0032435350596539763):0.002576254141314131,6:0.005819789200968107):3.8627506896289805E-4,(12:0.004091106623837715,18:0.004091106623837715):0.0021149576460932906):0.008275841313373687):0.0;"       
#> [5] "tree STATE_10000 = (((((((1:5.640509028850157E-4,5:5.640509028850157E-4):2.6680946098870555E-4,3:8.308603638737213E-4):7.295543716622958E-4,17:0.001560414735536017):0.0015705220234042607,11:0.0031309367589402778):0.004055419525075088,((((4:0.001052267323852714,15:0.001052267323852714):0.002870029885961618,22:0.003922297209814332):0.0012624457300269842,(((8:0.0014439209497470666,13:0.0014439209497470666):4.3642168197745215E-5,20:0.0014875631179448118):0.0010772639161133134,16:0.0025648270340581252):0.0026199159057831906):0.0017129867452574978,9:0.006897729685098814):2.8862659891655193E-4):9.966759454736558E-4,(14:8.193630644722192E-4,21:8.193630644722192E-4):0.007363669165016802):0.005762446799256419,((((2:0.001639597360341705,7:0.001639597360341705):4.6727057160475213E-4,19:0.002106867931946457):0.0010334622297815798,10:0.003140330161728037):0.005555889071961653,(6:0.008655743375149239,(12:0.0022180525383988115,18:0.0022180525383988115):0.006437690836750427):4.047585854045216E-5):0.00524925979505575):0.0;"      
#> [6] "End;"

The .xml.state file contains the final state of the MCMC run and the MCMC operator acceptances thus far:

if (is_beast2_installed()) {
  print(head(readLines(beast2_options$output_state_filename)))
  print(tail(readLines(beast2_options$output_state_filename)))
}
#> [1] "<itsabeastystatewerein version='2.0' sample
#> [2] "<statenode id='Tree.t:anthus_aco'>(((((((0:5.640509028850157E-4,4:5.640509028850157E-4)24:2.6680946098870555E-4,2:8.308603638737213E-4)36:7.295543716622958E-4,16:0.001560414735536017)34:0.0015705220234042607,10:0.0031309367589402778)31:0.004055419525075088,((((3:0.001052267323852714,14:0.001052267323852714)30:0.002870029885961618,21:0.003922297209814332)22:0.0012624457300269842,(((7:0.0014439209497470666,12:0.0014439209497470666)32:4.3642168197745215E-5,19:0.0014875631179448118)35:0.0010772639161133134,15:0.0025648270340581252)33:0.0026199159057831906)39:0.0017129867452574978,8:0.006897729685098814)28:2.8862659891655193E-4)23:9.966759454736558E-4,(13:8.193630644722192E-4,20:8.193630644722192E-4)38:0.007363669165016802)41:0.005762446799256419,((((1:0.001639597360341705,6:0.001639597360341705)25:4.6727057160475213E-4,18:0.002106867931946457)40:0.0010334622297815798,9:0.003140330161728037)29:0.005555889071961653,(5:0.008655743375149239,(11:0.0022180525383988115,17:0.0022180525383988115)27:0.006437690836750427)37:4.047585854045216E-5)26:0.00524925979505575)42:0.0</statenode>"
#> [3] "<statenode id='birthRate.t:anthus_aco'>birthRate.t:anthus_aco[1 1] (-Infinity,Infinity): 251.4836164360151 </statenode
#> [4] "</itsabeastystatewerein

#> [6] "{\"operators\":["                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
#> [1] "{\"id\":\"YuleModelSubtreeSlide.t:anthus_aco\",\"p\":1,\"accept\":5,\"reject\":2011,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":949,\"rejectOp\":949},"    
#> [2] "{\"id\":\"YuleModelNarrow.t:anthus_aco\",\"p\":\"NaN\",\"accept\":905,\"reject\":1080,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":0,\"rejectOp\":0},"      
#> [3] "{\"id\":\"YuleModelWide.t:anthus_aco\",\"p\":\"NaN\",\"accept\":14,\"reject\":366,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":206,\"rejectOp\":206},"      
#> [4] "{\"id\":\"YuleModelWilsonBalding.t:anthus_aco\",\"p\":\"NaN\",\"accept\":50,\"reject\":386,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":67,\"rejectOp\":67}"
#> [5] "]}"                                                                                                                                                       
#> [6] "-->"

Parse output

This step is commonly done using Tracer. With babette, this can be done as follows.

Parsing .log file to obtain the parameter estimates:

if (is_beast2_installed()) {
  knitr::kable(
    head(parse_beast_tracelog_file(inference_model$mcmc$tracelog$filename))
  )
}
Sample posterior likelihood prior treeLikelihood TreeHeight YuleModel birthRate
0 -6879.867 -6869.726 -10.14191 -6869.726 3.3665920 -10.14191 1.00000
1000 -1918.589 -1991.985 73.39653 -1991.985 0.0268112 73.39653 47.27918
2000 -1767.270 -1857.064 89.79335 -1857.064 0.0122694 89.79335 140.42977
3000 -1763.245 -1861.037 97.79241 -1861.037 0.0106426 97.79241 234.51126
4000 -1758.597 -1858.961 100.36453 -1858.961 0.0131173 100.36453 297.03691
5000 -1761.320 -1856.866 95.54539 -1856.866 0.0129108 95.54539 202.78111

Parsing .trees file to obtain the posterior phylogenies:

if (is_beast2_installed()) {
  plot_densitree(parse_beast_trees(inference_model$mcmc$treelog$filename))
}

Parsing .xml.state file to obtain the MCMC operator acceptances:

if (is_beast2_installed()) {
  knitr::kable(
    head(parse_beast_state_operators(beast2_options$output_state_filename))
  )
}
operator p accept reject acceptFC rejectFC rejectIv rejectOp
YuleBirthRateScaler.t 0.75 275 110 0 0 0 0
YuleModelTreeScaler.t 0.50 83 275 0 0 0 0
YuleModelTreeRootScaler.t 0.50 92 314 0 0 48 48
YuleModelUniformOperator.t NaN 2360 1675 0 0 0 0
YuleModelSubtreeSlide.t 1.00 5 2011 0 0 949 949
YuleModelNarrow.t NaN 905 1080 0 0 0 0