Nested analyses

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Nested analyses

Dewey Dunnington

2023-01-17

Creating nested data

Preparing the data:

alta_lake_geochem
#> # A tibble: 192 × 9
#>    location param depth   age value stdev units     n zone  
#>    <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <chr> <int> <chr> 
#>  1 ALGC2    Cu     0.25 2015.  76   NA    ppm       1 Zone 3
#>  2 ALGC2    Cu     0.75 2011. 108.   4.50 ppm       3 Zone 3
#>  3 ALGC2    Cu     1.25 2008. 158   NA    ppm       1 Zone 3
#>  4 ALGC2    Cu     1.75 2003. 169   NA    ppm       1 Zone 3
#>  5 ALGC2    Cu     2.5  1998. 161   NA    ppm       1 Zone 3
#>  6 ALGC2    Cu     3.5  1982. 129   NA    ppm       1 Zone 3
#>  7 ALGC2    Cu     4.5  1966.  88.7  3.86 ppm       3 Zone 2
#>  8 ALGC2    Cu     5.5  1947.  65   NA    ppm       1 Zone 2
#>  9 ALGC2    Cu     6.5  1922.  62.3  9.53 ppm       3 Zone 2
#> 10 ALGC2    Cu     7.5  1896.  48   NA    ppm       1 Zone 2
#> # … with 182 more rows

alta_nested <- nested_data(
  alta_lake_geochem,
  qualifiers = c(age, depth, zone),
  key = param,
  value = value,
  trans = scale
)

alta_nested
#> # A tibble: 1 × 4
#>   discarded_columns discarded_rows   qualifiers        data             
#> * <list>            <list>           <list>            <list>           
#> 1 <tibble [32 × 0]> <tibble [0 × 9]> <tibble [32 × 4]> <tibble [32 × 6]>

alta_nested %>% unnested_data(data)
#> # A tibble: 32 × 6
#>     C[,1] `C/N`[,1] Cu[,1] Ti[,1] d13C[,1] d15N[,1]
#>     <dbl>     <dbl>  <dbl>  <dbl>    <dbl>    <dbl>
#>  1 -1.54      1.26  -0.794  0.807    1.03   -0.670 
#>  2 -1.59      1.36  -0.559  1.33     1.19    0.0499
#>  3 -1.98      0.960 -0.721  0.682    1.10   -0.511 
#>  4 -0.189     1.61  -0.749  0.233    0.836  -2.37  
#>  5  0.993     2.48  -0.694  0.908    1.06   -2.55  
#>  6 -0.157     1.76  -0.712  0.941    1.17   -1.52  
#>  7 -0.642     1.36  -0.667  1.14     1.07   -1.39  
#>  8 -1.07      0.924 -0.559  1.16     0.820  -0.439 
#>  9 -0.722     0.696 -0.830  0.932    0.765  -0.929 
#> 10 -0.631     0.309 -0.504  0.882    0.409  -0.166 
#> # … with 22 more rows
alta_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 32 × 10
#>      age depth zone   row_number  C[,1] `C/N`[,1] Cu[,1] Ti[,1] d13C[,1] d15N[…¹
#>    <dbl> <dbl> <chr>       <int>  <dbl>     <dbl>  <dbl>  <dbl>    <dbl>   <dbl>
#>  1 1550   29.5 Zone 1          1 -1.54      1.26  -0.794  0.807    1.03  -0.670 
#>  2 1566.  28.5 Zone 1          2 -1.59      1.36  -0.559  1.33     1.19   0.0499
#>  3 1581.  27.5 Zone 1          3 -1.98      0.960 -0.721  0.682    1.10  -0.511 
#>  4 1597.  26.5 Zone 1          4 -0.189     1.61  -0.749  0.233    0.836 -2.37  
#>  5 1613.  25.5 Zone 1          5  0.993     2.48  -0.694  0.908    1.06  -2.55  
#>  6 1629.  24.5 Zone 1          6 -0.157     1.76  -0.712  0.941    1.17  -1.52  
#>  7 1644.  23.5 Zone 1          7 -0.642     1.36  -0.667  1.14     1.07  -1.39  
#>  8 1660.  22.5 Zone 1          8 -1.07      0.924 -0.559  1.16     0.820 -0.439 
#>  9 1676.  21.5 Zone 1          9 -0.722     0.696 -0.830  0.932    0.765 -0.929 
#> 10 1692.  20.5 Zone 1         10 -0.631     0.309 -0.504  0.882    0.409 -0.166 
#> # … with 22 more rows, and abbreviated variable name ¹d15N[,1]

Principal components analysis

pca <- alta_nested %>% nested_prcomp()
pca
#> # A tibble: 1 × 8
#>   discarded_col…¹ discar…² qualif…³ data     model    variance loadings scores  
#> * <list>          <list>   <list>   <list>   <list>   <list>   <list>   <list>  
#> 1 <tibble>        <tibble> <tibble> <tibble> <prcomp> <tibble> <tibble> <tibble>
#> # … with abbreviated variable names ¹discarded_columns, ²discarded_rows,
#> #   ³qualifiers

plot(pca)

pca %>% unnested_data(qualifiers, scores)
#> # A tibble: 32 × 10
#>      age depth zone   row_number   PC1     PC2     PC3     PC4     PC5     PC6
#>    <dbl> <dbl> <chr>       <int> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 1550   29.5 Zone 1          1 -2.48 -0.273   0.409   0.518  -0.316   0.0538
#>  2 1566.  28.5 Zone 1          2 -2.48 -0.675   0.902   0.561   0.364   0.190 
#>  3 1581.  27.5 Zone 1          3 -2.41 -0.721   0.576   0.465  -0.619   0.184 
#>  4 1597.  26.5 Zone 1          4 -2.39  1.73   -0.527   0.197  -0.742  -0.0656
#>  5 1613.  25.5 Zone 1          5 -2.73  2.82   -0.666   0.212   0.432  -0.0715
#>  6 1629.  24.5 Zone 1          6 -2.56  1.22   -0.155   0.235   0.208   0.105 
#>  7 1644.  23.5 Zone 1          7 -2.57  0.708   0.123  -0.0210  0.0114 -0.0286
#>  8 1660.  22.5 Zone 1          8 -2.04 -0.243   0.490   0.101   0.157  -0.0834
#>  9 1676.  21.5 Zone 1          9 -1.98  0.0702 -0.0657 -0.147  -0.0813 -0.186 
#> 10 1692.  20.5 Zone 1         10 -1.20 -0.376   0.202  -0.132   0.202  -0.220 
#> # … with 22 more rows
pca %>% unnested_data(variance)
#> # A tibble: 6 × 6
#>   component component_text standard_deviation variance variance_propor…¹ varia…²
#>       <int> <chr>                       <dbl>    <dbl>             <dbl>   <dbl>
#> 1         1 PC1                         2.15    4.61             0.768     0.768
#> 2         2 PC2                         0.884   0.781            0.130     0.899
#> 3         3 PC3                         0.603   0.364            0.0607    0.959
#> 4         4 PC4                         0.381   0.145            0.0242    0.984
#> 5         5 PC5                         0.276   0.0761           0.0127    0.996
#> 6         6 PC6                         0.151   0.0228           0.00380   1    
#> # … with abbreviated variable names ¹variance_proportion,
#> #   ²variance_proportion_cumulative
pca %>% unnested_data(loadings)
#> # A tibble: 6 × 7
#>   variable    PC1     PC2    PC3    PC4     PC5     PC6
#>   <chr>     <dbl>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
#> 1 C         0.380  0.540  -0.460 -0.162  0.567   0.0718
#> 2 C/N      -0.401  0.451   0.207  0.735  0.223  -0.0408
#> 3 Cu        0.387  0.340   0.760 -0.173 -0.0609  0.352 
#> 4 Ti       -0.439 -0.0783  0.356 -0.483  0.565  -0.350 
#> 5 d13C     -0.458 -0.0890 -0.144 -0.145  0.126   0.851 
#> 6 d15N      0.377 -0.613   0.144  0.386  0.539   0.149

Constrained hierarchical clustering

keji_nested <- keji_lakes_plottable %>%
  group_by(location) %>%
  nested_data(qualifiers = depth, key = taxon, value = rel_abund)

keji_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 37 × 9
#>    location        depth row_num…¹ Aster…² Aulac…³ Aulac…⁴ Cyclo…⁵ Tabel…⁶ Other
#>    <chr>           <dbl>     <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
#>  1 Beaverskin Lake 0.125         1   0        3.02    1.72    3.45   0      91.8
#>  2 Beaverskin Lake 0.375         2   0        3.25    2.03    5.69   1.63   87.4
#>  3 Beaverskin Lake 0.825         3   1.32     0       2.32    4.30   1.32   90.7
#>  4 Beaverskin Lake 2.12          4   0.333    0       2.67    3      1.67   92.3
#>  5 Beaverskin Lake 3.12          5   0        0       5.32    6.98   1.33   86.4
#>  6 Beaverskin Lake 4.12          6   0        0       3.54   13.2    0.643  82.6
#>  7 Beaverskin Lake 5.38          7   0.987    0      10.2    12.8    2.63   73.4
#>  8 Beaverskin Lake 6.38          8   0.993    0       8.94   17.5    3.97   68.5
#>  9 Beaverskin Lake 7.62          9   1.63     0       8.82   20.3    2.29   67.0
#> 10 Beaverskin Lake 9.12         10   0.328    0      10.8    23.9    2.95   62.0
#> # … with 27 more rows, and abbreviated variable names ¹row_number,
#> #   ²`Asterionella ralfsii var. americana (large)`, ³`Aulacoseira distans`,
#> #   ⁴`Aulacoseira lirata`, ⁵`Cyclotella stelligera`,
#> #   ⁶`Tabellaria flocculosa (strain III)`

coniss <- keji_nested %>% 
  nested_chclust_coniss()

plot(coniss, main = location)

plot(coniss, main = location, xvar = qualifiers$depth, labels = "")

coniss %>% select(location, zone_info) %>% unnest(zone_info)
#> # A tibble: 4 × 12
#>   location       hclus…¹ min_d…² max_d…³ first…⁴ last_…⁵ min_r…⁶ max_r…⁷ first…⁸
#>   <chr>            <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 Beaverskin La…       1   0.125    4.12   0.125    4.12       1       6       1
#> 2 Beaverskin La…       2   5.38    23.4    5.38    23.4        7      17       7
#> 3 Peskawa Lake         1   0.125    5.12   0.125    5.12       1       5       1
#> 4 Peskawa Lake         2   6.38    35.1    6.38    35.1        6      20       6
#> # … with 3 more variables: last_row_number <dbl>, boundary_depth <dbl>,
#> #   boundary_row_number <dbl>, and abbreviated variable names ¹hclust_zone,
#> #   ²min_depth, ³max_depth, ⁴first_depth, ⁵last_depth, ⁶min_row_number,
#> #   ⁷max_row_number, ⁸first_row_number

keji_nested %>%
  nested_chclust_coniss(n_groups = c(3, 2)) %>%
  select(location, zone_info) %>% 
  unnested_data(zone_info)
#> # A tibble: 5 × 12
#>   location       hclus…¹ min_d…² max_d…³ first…⁴ last_…⁵ min_r…⁶ max_r…⁷ first…⁸
#>   <chr>            <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 Beaverskin La…       1   0.125    4.12   0.125    4.12       1       6       1
#> 2 Beaverskin La…       2   5.38    13.6    5.38    13.6        7      13       7
#> 3 Beaverskin La…       3  15.1     23.4   15.1     23.4       14      17      14
#> 4 Peskawa Lake         1   0.125    5.12   0.125    5.12       1       5       1
#> 5 Peskawa Lake         2   6.38    35.1    6.38    35.1        6      20       6
#> # … with 3 more variables: last_row_number <dbl>, boundary_depth <dbl>,
#> #   boundary_row_number <dbl>, and abbreviated variable names ¹hclust_zone,
#> #   ²min_depth, ³max_depth, ⁴first_depth, ⁵last_depth, ⁶min_row_number,
#> #   ⁷max_row_number, ⁸first_row_number

Unconstrained hierarchical clustering

halifax_nested <- halifax_lakes_plottable %>%
  nested_data(c(location, sample_type), taxon, rel_abund, fill = 0)

halifax_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 20 × 9
#>    location        sampl…¹ row_n…² Aulac…³ Eunot…⁴ Fragi…⁵ Tabel…⁶ Tabel…⁷ Other
#>    <chr>           <chr>     <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
#>  1 Anderson Lake   bottom        1   4.65    2.42    1.16    4.11    3.76   83.9
#>  2 Anderson Lake   top           2   1.87    0       0.330   5.71    5.71   86.4
#>  3 Bayers          bottom        3  11.6     5.71    4.50    4.84    0      73.4
#>  4 Bayers          top           4   0.993   6.81    3.40    3.40    0      85.4
#>  5 Bell Lake       bottom        5   0.476   2.62    0.833   2.14    6.90   87.0
#>  6 Bell Lake       top           6   9.22    0.432   2.59    6.20    1.01   80.5
#>  7 Cranberry Lake  bottom        7   0       9.17   11.6     3.06    9.39   66.8
#>  8 Cranberry Lake  top           8   0       7.72    8.94    0.203   8.54   74.6
#>  9 Frasers Lake    bottom        9   6.42    0.714   0.624   1.87    2.59   87.8
#> 10 Frasers Lake    top          10   4.85    0       0       8.58   10.6    76.0
#> 11 Kinsac lake     bottom       11  11.3     8.33    2.98   12.7     0.992  63.7
#> 12 Kinsac lake     top          12   0       3.85    2.75    3.30    0      90.1
#> 13 Little Albro L… bottom       13   2.34    2.34    1.91    1.70    6.69   85.0
#> 14 Little Albro L… top          14   5.78    2.61    1.90    0       5.78   83.9
#> 15 Little Springf… bottom       15  12.1     2.64    6.23   10       0      69.1
#> 16 Little Springf… top          16   0      19.8    14.1    11.4     0      54.6
#> 17 Maynard Lake    bottom       17   9.75    4       1.5    11.6     7.88   65.2
#> 18 Maynard Lake    top          18   2.98    1.23    2.26    7.51   10.1    75.9
#> 19 Miller Lake     bottom       19   1.79    2.19    2.19    0.299  15.6    77.9
#> 20 Miller Lake     top          20   0.816   4.35    2.90    1.27    1.18   89.5
#> # … with abbreviated variable names ¹sample_type, ²row_number,
#> #   ³`Aulacoseira distans`, ⁴`Eunotia exigua`, ⁵`Fragilariforma exigua`,
#> #   ⁶`Tabellaria fenestrata`, ⁷`Tabellaria flocculosa (strain IV)`

hclust <- halifax_nested %>%
  nested_hclust(method = "average")

plot(
  hclust, 
  labels = sprintf(
    "%s (%s)",
    qualifiers$location,
    qualifiers$sample_type
  )
)

Nested analysis of other functions

alta_nested %>%
  nested_analysis(vegan::rda, data) %>%
  plot()

biplot(pca)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.