The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

RScelestial Vignette

Mohammad-Hadi Foroughmand-Araabi

2023-11-30

library(RScelestial)
# We load igraph for drawing trees. If you do not want to draw,
# there is no need to import igraph.
library(igraph)
#> 
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#> 
#>     decompose, spectrum
#> The following object is masked from 'package:base':
#> 
#>     union

Installing RScelestial

The RScelestial package could be installed easily as follows

install.packages("RScelestial")

Simulation

Here we show a simulation. We build a data set with following command.

# Following command generates ten samples with 20 loci. 
# Rate of mutations on each edge of the evolutionary tree is 1.5. 
D = synthesis(10, 20, 5, seed = 7)
D
#> $seqeunce
#>     C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1   0  0  0  3  0  1  0  3  0   0
#> L2   3  3  3  3  3  3  3  3  0   3
#> L3   0  3  3  0  3  1  0  0  0   3
#> L4   1  0  3  3  1  3  0  1  0   3
#> L5   0  3  3  1  3  3  0  0  0   0
#> L6   3  3  0  3  0  3  3  3  3   3
#> L7   0  0  0  0  3  0  3  0  3   0
#> L8   3  1  0  1  0  3  1  3  3   1
#> L9   0  0  3  3  3  3  3  0  0   3
#> L10  3  3  0  1  3  0  3  3  1   0
#> L11  0  3  0  0  0  0  3  1  3   0
#> L12  3  3  3  0  0  0  3  0  3   3
#> L13  3  1  1  3  0  3  0  0  0   3
#> L14  3  0  0  0  1  3  0  3  3   0
#> L15  0  3  0  3  0  0  3  3  1   0
#> L16  3  0  3  0  3  0  3  0  3   0
#> L17  3  3  0  3  3  3  0  3  1   3
#> L18  0  0  3  3  3  0  1  3  0   3
#> L19  3  3  0  1  3  0  0  3  0   3
#> L20  3  1  3  0  1  3  3  3  3   0
#> 
#> $true.sequence
#>     C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1   0  0  0  0  0  0  0  0  0   0
#> L2   0  0  0  0  0  0  0  0  0   0
#> L3   0  0  0  0  0  0  0  0  0   0
#> L4   0  0  0  0  0  0  0  0  0   0
#> L5   0  0  0  0  0  0  0  0  0   0
#> L6   0  0  0  0  0  0  0  0  0   0
#> L7   0  0  0  0  0  0  0  0  0   0
#> L8   0  0  0  1  0  0  1  0  1   1
#> L9   0  0  0  0  0  0  0  0  0   0
#> L10  0  0  0  0  0  0  0  0  0   0
#> L11  0  0  0  0  0  0  0  0  0   0
#> L12  0  0  0  0  0  0  0  0  0   0
#> L13  0  0  0  0  0  0  0  0  0   0
#> L14  0  0  0  0  0  0  0  0  0   0
#> L15  0  0  0  0  0  0  0  0  0   0
#> L16  0  0  0  0  0  0  0  0  0   0
#> L17  0  0  0  0  0  0  0  0  0   0
#> L18  0  0  0  0  0  0  0  0  0   0
#> L19  0  0  0  0  0  0  0  0  0   0
#> L20  0  0  0  0  0  0  0  0  0   0
#> 
#> $true.clone
#> $true.clone$N1
#> [1] "C2" "C6"
#> 
#> $true.clone$N2
#> [1] "C5"
#> 
#> $true.clone$N3
#> [1] "C1" "C8"
#> 
#> $true.clone$N4
#> [1] "C3"
#> 
#> $true.clone$N6
#> [1] "C4"  "C7"  "C9"  "C10"
#> 
#> 
#> $true.tree
#>   src dest len
#> 1  N2   N1   1
#> 2  N3   N2   1
#> 3  N4   N2   1
#> 4  N6   N3   1

Inferring the phylogenetic tree

seq = as.ten.state.matrix(D$seqeunce)
SP = scelestial(seq, return.graph = TRUE)
SP
#> $input
#>      V1  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1  A/A ./. A/A C/C A/A ./. A/A ./. A/A ./. A/A ./. ./. ./. A/A ./. ./. A/A ./.
#> C10 A/A ./. ./. ./. A/A ./. A/A C/C ./. A/A A/A ./. ./. A/A A/A A/A ./. ./. ./.
#> C2  A/A ./. ./. A/A ./. ./. A/A C/C A/A ./. ./. ./. C/C A/A ./. A/A ./. A/A ./.
#> C3  A/A ./. ./. ./. ./. A/A A/A A/A ./. A/A A/A ./. C/C A/A A/A ./. A/A ./. A/A
#> C4  ./. ./. A/A ./. C/C ./. A/A C/C ./. C/C A/A A/A ./. A/A ./. A/A ./. ./. C/C
#> C5  A/A ./. ./. C/C ./. A/A ./. A/A ./. ./. A/A A/A A/A C/C A/A ./. ./. ./. ./.
#> C6  C/C ./. C/C ./. ./. ./. A/A ./. ./. A/A A/A A/A ./. ./. A/A A/A ./. A/A A/A
#> C7  A/A ./. A/A A/A A/A ./. ./. C/C ./. ./. ./. ./. A/A A/A ./. ./. A/A C/C A/A
#> C8  ./. ./. A/A C/C A/A ./. A/A ./. A/A ./. C/C A/A A/A ./. ./. A/A ./. ./. ./.
#> C9  A/A A/A A/A A/A A/A ./. ./. ./. A/A C/C ./. ./. A/A ./. C/C ./. C/C A/A A/A
#>     V20
#> C1  ./.
#> C10 A/A
#> C2  C/C
#> C3  ./.
#> C4  A/A
#> C5  C/C
#> C6  ./.
#> C7  ./.
#> C8  ./.
#> C9  ./.
#> 
#> $sequence
#>      V1  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1  A/A A/A A/A C/C A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A
#> C10 A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C2  A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A
#> C3  A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A
#> C4  A/A A/A A/A A/A C/C A/A A/A C/C A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A C/C
#> C5  A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A
#> C6  C/C A/A C/C A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C7  A/A A/A A/A A/A A/A A/A A/A C/C A/A C/C A/A A/A A/A A/A C/C A/A A/A C/C A/A
#> C8  A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A
#> C9  A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A C/C A/A C/C A/A A/A
#>     V20
#> C1  A/A
#> C10 A/A
#> C2  C/C
#> C3  A/A
#> C4  A/A
#> C5  C/C
#> C6  A/A
#> C7  A/A
#> C8  A/A
#> C9  A/A
#> 
#> $tree
#>   src dest     len
#> 1  C3  C10 6.00013
#> 2  C1   C8 6.00014
#> 3  C1  C10 6.00015
#> 4  C4  C10 6.25012
#> 5  C7   C9 6.50012
#> 6  C2  C10 6.50014
#> 7  C6  C10 6.50014
#> 8  C1   C5 6.75016
#> 9  C1   C9 7.00013
#> 
#> $graph
#> IGRAPH fd2a474 UNW- 10 9 -- 
#> + attr: name (v/c), weight (e/n)
#> + edges from fd2a474 (vertex names):
#> [1] C3 --C10 C1 --C8  C10--C1  C10--C4  C7 --C9  C10--C2  C10--C6  C1 --C5 
#> [9] C1 --C9

You can draw the graph with following command

tree.plot(SP, vertex.size = 30)

Also, we can make a rooted tree with cell “C8” as the root of the tree as follows:

SP = scelestial(seq, root.assign.method = "fix", root = "C8", return.graph = TRUE)
#> [1] "C8 -1"
#> [1] "C1 C8"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
tree.plot(SP, vertex.size = 30)

Setting root.assign.method to “balance” lets the algorithm decide for a root that produces minimum height tree.

SP = scelestial(seq, root.assign.method = "balance", return.graph = TRUE)
#> [1] "C1 -1"
#> [1] "C8 C1"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
#> [1] "C1 -1"
#> [1] "C8 C1"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
tree.plot(SP, vertex.size = 30)

Evaluating results

Following command calculates the distance array between pairs of samples.

D.distance.matrix <- distance.matrix.true.tree(D)
D.distance.matrix
#>              C2          C6          C5          C1          C8          C3
#> C2  0.000000000 0.000000000 0.006849315 0.013698630 0.013698630 0.013698630
#> C6  0.000000000 0.000000000 0.006849315 0.013698630 0.013698630 0.013698630
#> C5  0.006849315 0.006849315 0.000000000 0.006849315 0.006849315 0.006849315
#> C1  0.013698630 0.013698630 0.006849315 0.000000000 0.000000000 0.013698630
#> C8  0.013698630 0.013698630 0.006849315 0.000000000 0.000000000 0.013698630
#> C3  0.013698630 0.013698630 0.006849315 0.013698630 0.013698630 0.000000000
#> C4  0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C7  0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C9  0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C10 0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#>              C4          C7          C9         C10
#> C2  0.020547945 0.020547945 0.020547945 0.020547945
#> C6  0.020547945 0.020547945 0.020547945 0.020547945
#> C5  0.013698630 0.013698630 0.013698630 0.013698630
#> C1  0.006849315 0.006849315 0.006849315 0.006849315
#> C8  0.006849315 0.006849315 0.006849315 0.006849315
#> C3  0.020547945 0.020547945 0.020547945 0.020547945
#> C4  0.000000000 0.000000000 0.000000000 0.000000000
#> C7  0.000000000 0.000000000 0.000000000 0.000000000
#> C9  0.000000000 0.000000000 0.000000000 0.000000000
#> C10 0.000000000 0.000000000 0.000000000 0.000000000
SP.distance.matrix <- distance.matrix.scelestial(SP)
SP.distance.matrix
#>              C1         C10          C2          C3          C4          C5
#> C1  0.000000000 0.004528317 0.009433976 0.009056619 0.009245286 0.005094350
#> C10 0.004528317 0.000000000 0.004905660 0.004528302 0.004716969 0.009622667
#> C2  0.009433976 0.004905660 0.000000000 0.009433961 0.009622629 0.014528326
#> C3  0.009056619 0.004528302 0.009433961 0.000000000 0.009245271 0.014150968
#> C4  0.009245286 0.004716969 0.009622629 0.009245271 0.000000000 0.014339636
#> C5  0.005094350 0.009622667 0.014528326 0.014150968 0.014339636 0.000000000
#> C6  0.009433976 0.004905660 0.009811319 0.009433961 0.009622629 0.014528326
#> C7  0.010188647 0.014716964 0.019622623 0.019245265 0.019433933 0.015282997
#> C8  0.004528309 0.009056626 0.013962286 0.013584928 0.013773595 0.009622659
#> C9  0.005283002 0.009811319 0.014716979 0.014339621 0.014528288 0.010377352
#>              C6          C7          C8          C9
#> C1  0.009433976 0.010188647 0.004528309 0.005283002
#> C10 0.004905660 0.014716964 0.009056626 0.009811319
#> C2  0.009811319 0.019622623 0.013962286 0.014716979
#> C3  0.009433961 0.019245265 0.013584928 0.014339621
#> C4  0.009622629 0.019433933 0.013773595 0.014528288
#> C5  0.014528326 0.015282997 0.009622659 0.010377352
#> C6  0.000000000 0.019622623 0.013962286 0.014716979
#> C7  0.019622623 0.000000000 0.014716956 0.004905644
#> C8  0.013962286 0.014716956 0.000000000 0.009811312
#> C9  0.014716979 0.004905644 0.009811312 0.000000000
## Difference between normalized distance matrices
vertices <- rownames(SP.distance.matrix)
sum(abs(D.distance.matrix[vertices,vertices] - SP.distance.matrix))
#> [1] 0.5453399

Running Scelestial on multiple sequence alignment

Given a multiple sequence alignment, Scelestial infers the phylogeny of them. Here we present a simple example. First we load libraries to load a multiple alignment.

library(stringr)
if (!require("seqinr")) install.packages("seqinr")
#> Loading required package: seqinr
library(seqinr)

In this example, we load a multiple alignment from seqinr package.

data(phylip, package = "seqinr")

Then we clean the data and build a zero-one matrix representing taxa and characters. Note that Scelestial accept matrices with taxa as its columns and characters as its rows.

# Removing non-informative columns and duplicate rows.
mcb <-  toupper(t(sapply(seq(phylip$seq), function(i) unlist(strsplit(phylip$seq[[i]], '')))))
ccb <- as.character(phylip$seq)
occb <- order(ccb)
cbColMask <- sapply(seq(ncol(mcb)), function(j) length(levels(as.factor(mcb[,j]))) == 1)
cbRowMask <- rep(TRUE, length(ccb))
for (i in seq(length(ccb))) {
    if (i == 1 || ccb[occb[i]] != ccb[occb[i-1]]) {
        cbRowMask[occb[i]] <- FALSE
    }
}
mcbRows <- apply(mcb[!cbRowMask, !cbColMask], MARGIN = 1, FUN = function(a) paste0(str_replace(a, "-", "X"), collapse = ""))

Executing Scelestial on the input matrix.

n.seq <- data.frame(nodes = phylip$nam[!cbRowMask], seq = mcbRows)
seq2 <- data.frame(t(as.ten.state.matrix.from.node.seq(n.seq)), stringsAsFactors = TRUE)
# Running Scelestial
SP = scelestial(seq2, return.graph = TRUE)
tree.plot(SP, vertex.size=20, vertex.label.dist=0, asp = 0, vertex.label.cex = 1)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.