The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Nested analyses

Dewey Dunnington

2023-01-17

Load the packages:

library(tidyverse)
library(tidypaleo)

Creating nested data

Preparing the data:

alta_lake_geochem
#> # A tibble: 192 × 9
#>    location param depth   age value stdev units     n zone  
#>    <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <chr> <int> <chr> 
#>  1 ALGC2    Cu     0.25 2015.  76   NA    ppm       1 Zone 3
#>  2 ALGC2    Cu     0.75 2011. 108.   4.50 ppm       3 Zone 3
#>  3 ALGC2    Cu     1.25 2008. 158   NA    ppm       1 Zone 3
#>  4 ALGC2    Cu     1.75 2003. 169   NA    ppm       1 Zone 3
#>  5 ALGC2    Cu     2.5  1998. 161   NA    ppm       1 Zone 3
#>  6 ALGC2    Cu     3.5  1982. 129   NA    ppm       1 Zone 3
#>  7 ALGC2    Cu     4.5  1966.  88.7  3.86 ppm       3 Zone 2
#>  8 ALGC2    Cu     5.5  1947.  65   NA    ppm       1 Zone 2
#>  9 ALGC2    Cu     6.5  1922.  62.3  9.53 ppm       3 Zone 2
#> 10 ALGC2    Cu     7.5  1896.  48   NA    ppm       1 Zone 2
#> # … with 182 more rows
alta_nested <- nested_data(
  alta_lake_geochem,
  qualifiers = c(age, depth, zone),
  key = param,
  value = value,
  trans = scale
)

alta_nested
#> # A tibble: 1 × 4
#>   discarded_columns discarded_rows   qualifiers        data             
#> * <list>            <list>           <list>            <list>           
#> 1 <tibble [32 × 0]> <tibble [0 × 9]> <tibble [32 × 4]> <tibble [32 × 6]>
alta_nested %>% unnested_data(data)
#> # A tibble: 32 × 6
#>     C[,1] `C/N`[,1] Cu[,1] Ti[,1] d13C[,1] d15N[,1]
#>     <dbl>     <dbl>  <dbl>  <dbl>    <dbl>    <dbl>
#>  1 -1.54      1.26  -0.794  0.807    1.03   -0.670 
#>  2 -1.59      1.36  -0.559  1.33     1.19    0.0499
#>  3 -1.98      0.960 -0.721  0.682    1.10   -0.511 
#>  4 -0.189     1.61  -0.749  0.233    0.836  -2.37  
#>  5  0.993     2.48  -0.694  0.908    1.06   -2.55  
#>  6 -0.157     1.76  -0.712  0.941    1.17   -1.52  
#>  7 -0.642     1.36  -0.667  1.14     1.07   -1.39  
#>  8 -1.07      0.924 -0.559  1.16     0.820  -0.439 
#>  9 -0.722     0.696 -0.830  0.932    0.765  -0.929 
#> 10 -0.631     0.309 -0.504  0.882    0.409  -0.166 
#> # … with 22 more rows
alta_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 32 × 10
#>      age depth zone   row_number  C[,1] `C/N`[,1] Cu[,1] Ti[,1] d13C[,1] d15N[…¹
#>    <dbl> <dbl> <chr>       <int>  <dbl>     <dbl>  <dbl>  <dbl>    <dbl>   <dbl>
#>  1 1550   29.5 Zone 1          1 -1.54      1.26  -0.794  0.807    1.03  -0.670 
#>  2 1566.  28.5 Zone 1          2 -1.59      1.36  -0.559  1.33     1.19   0.0499
#>  3 1581.  27.5 Zone 1          3 -1.98      0.960 -0.721  0.682    1.10  -0.511 
#>  4 1597.  26.5 Zone 1          4 -0.189     1.61  -0.749  0.233    0.836 -2.37  
#>  5 1613.  25.5 Zone 1          5  0.993     2.48  -0.694  0.908    1.06  -2.55  
#>  6 1629.  24.5 Zone 1          6 -0.157     1.76  -0.712  0.941    1.17  -1.52  
#>  7 1644.  23.5 Zone 1          7 -0.642     1.36  -0.667  1.14     1.07  -1.39  
#>  8 1660.  22.5 Zone 1          8 -1.07      0.924 -0.559  1.16     0.820 -0.439 
#>  9 1676.  21.5 Zone 1          9 -0.722     0.696 -0.830  0.932    0.765 -0.929 
#> 10 1692.  20.5 Zone 1         10 -0.631     0.309 -0.504  0.882    0.409 -0.166 
#> # … with 22 more rows, and abbreviated variable name ¹​d15N[,1]

Principal components analysis

pca <- alta_nested %>% nested_prcomp()
pca
#> # A tibble: 1 × 8
#>   discarded_col…¹ discar…² qualif…³ data     model    variance loadings scores  
#> * <list>          <list>   <list>   <list>   <list>   <list>   <list>   <list>  
#> 1 <tibble>        <tibble> <tibble> <tibble> <prcomp> <tibble> <tibble> <tibble>
#> # … with abbreviated variable names ¹​discarded_columns, ²​discarded_rows,
#> #   ³​qualifiers
plot(pca)

pca %>% unnested_data(qualifiers, scores)
#> # A tibble: 32 × 10
#>      age depth zone   row_number   PC1     PC2     PC3     PC4     PC5     PC6
#>    <dbl> <dbl> <chr>       <int> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 1550   29.5 Zone 1          1 -2.48 -0.273   0.409   0.518  -0.316   0.0538
#>  2 1566.  28.5 Zone 1          2 -2.48 -0.675   0.902   0.561   0.364   0.190 
#>  3 1581.  27.5 Zone 1          3 -2.41 -0.721   0.576   0.465  -0.619   0.184 
#>  4 1597.  26.5 Zone 1          4 -2.39  1.73   -0.527   0.197  -0.742  -0.0656
#>  5 1613.  25.5 Zone 1          5 -2.73  2.82   -0.666   0.212   0.432  -0.0715
#>  6 1629.  24.5 Zone 1          6 -2.56  1.22   -0.155   0.235   0.208   0.105 
#>  7 1644.  23.5 Zone 1          7 -2.57  0.708   0.123  -0.0210  0.0114 -0.0286
#>  8 1660.  22.5 Zone 1          8 -2.04 -0.243   0.490   0.101   0.157  -0.0834
#>  9 1676.  21.5 Zone 1          9 -1.98  0.0702 -0.0657 -0.147  -0.0813 -0.186 
#> 10 1692.  20.5 Zone 1         10 -1.20 -0.376   0.202  -0.132   0.202  -0.220 
#> # … with 22 more rows
pca %>% unnested_data(variance)
#> # A tibble: 6 × 6
#>   component component_text standard_deviation variance variance_propor…¹ varia…²
#>       <int> <chr>                       <dbl>    <dbl>             <dbl>   <dbl>
#> 1         1 PC1                         2.15    4.61             0.768     0.768
#> 2         2 PC2                         0.884   0.781            0.130     0.899
#> 3         3 PC3                         0.603   0.364            0.0607    0.959
#> 4         4 PC4                         0.381   0.145            0.0242    0.984
#> 5         5 PC5                         0.276   0.0761           0.0127    0.996
#> 6         6 PC6                         0.151   0.0228           0.00380   1    
#> # … with abbreviated variable names ¹​variance_proportion,
#> #   ²​variance_proportion_cumulative
pca %>% unnested_data(loadings)
#> # A tibble: 6 × 7
#>   variable    PC1     PC2    PC3    PC4     PC5     PC6
#>   <chr>     <dbl>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
#> 1 C         0.380  0.540  -0.460 -0.162  0.567   0.0718
#> 2 C/N      -0.401  0.451   0.207  0.735  0.223  -0.0408
#> 3 Cu        0.387  0.340   0.760 -0.173 -0.0609  0.352 
#> 4 Ti       -0.439 -0.0783  0.356 -0.483  0.565  -0.350 
#> 5 d13C     -0.458 -0.0890 -0.144 -0.145  0.126   0.851 
#> 6 d15N      0.377 -0.613   0.144  0.386  0.539   0.149

Constrained hierarchical clustering

keji_nested <- keji_lakes_plottable %>%
  group_by(location) %>%
  nested_data(qualifiers = depth, key = taxon, value = rel_abund)

keji_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 37 × 9
#>    location        depth row_num…¹ Aster…² Aulac…³ Aulac…⁴ Cyclo…⁵ Tabel…⁶ Other
#>    <chr>           <dbl>     <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
#>  1 Beaverskin Lake 0.125         1   0        3.02    1.72    3.45   0      91.8
#>  2 Beaverskin Lake 0.375         2   0        3.25    2.03    5.69   1.63   87.4
#>  3 Beaverskin Lake 0.825         3   1.32     0       2.32    4.30   1.32   90.7
#>  4 Beaverskin Lake 2.12          4   0.333    0       2.67    3      1.67   92.3
#>  5 Beaverskin Lake 3.12          5   0        0       5.32    6.98   1.33   86.4
#>  6 Beaverskin Lake 4.12          6   0        0       3.54   13.2    0.643  82.6
#>  7 Beaverskin Lake 5.38          7   0.987    0      10.2    12.8    2.63   73.4
#>  8 Beaverskin Lake 6.38          8   0.993    0       8.94   17.5    3.97   68.5
#>  9 Beaverskin Lake 7.62          9   1.63     0       8.82   20.3    2.29   67.0
#> 10 Beaverskin Lake 9.12         10   0.328    0      10.8    23.9    2.95   62.0
#> # … with 27 more rows, and abbreviated variable names ¹​row_number,
#> #   ²​`Asterionella ralfsii var. americana (large)`, ³​`Aulacoseira distans`,
#> #   ⁴​`Aulacoseira lirata`, ⁵​`Cyclotella stelligera`,
#> #   ⁶​`Tabellaria flocculosa (strain III)`
coniss <- keji_nested %>% 
  nested_chclust_coniss()

plot(coniss, main = location)

plot(coniss, main = location, xvar = qualifiers$depth, labels = "")

coniss %>% select(location, zone_info) %>% unnest(zone_info)
#> # A tibble: 4 × 12
#>   location       hclus…¹ min_d…² max_d…³ first…⁴ last_…⁵ min_r…⁶ max_r…⁷ first…⁸
#>   <chr>            <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 Beaverskin La…       1   0.125    4.12   0.125    4.12       1       6       1
#> 2 Beaverskin La…       2   5.38    23.4    5.38    23.4        7      17       7
#> 3 Peskawa Lake         1   0.125    5.12   0.125    5.12       1       5       1
#> 4 Peskawa Lake         2   6.38    35.1    6.38    35.1        6      20       6
#> # … with 3 more variables: last_row_number <dbl>, boundary_depth <dbl>,
#> #   boundary_row_number <dbl>, and abbreviated variable names ¹​hclust_zone,
#> #   ²​min_depth, ³​max_depth, ⁴​first_depth, ⁵​last_depth, ⁶​min_row_number,
#> #   ⁷​max_row_number, ⁸​first_row_number
keji_nested %>%
  nested_chclust_coniss(n_groups = c(3, 2)) %>%
  select(location, zone_info) %>% 
  unnested_data(zone_info)
#> # A tibble: 5 × 12
#>   location       hclus…¹ min_d…² max_d…³ first…⁴ last_…⁵ min_r…⁶ max_r…⁷ first…⁸
#>   <chr>            <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 Beaverskin La…       1   0.125    4.12   0.125    4.12       1       6       1
#> 2 Beaverskin La…       2   5.38    13.6    5.38    13.6        7      13       7
#> 3 Beaverskin La…       3  15.1     23.4   15.1     23.4       14      17      14
#> 4 Peskawa Lake         1   0.125    5.12   0.125    5.12       1       5       1
#> 5 Peskawa Lake         2   6.38    35.1    6.38    35.1        6      20       6
#> # … with 3 more variables: last_row_number <dbl>, boundary_depth <dbl>,
#> #   boundary_row_number <dbl>, and abbreviated variable names ¹​hclust_zone,
#> #   ²​min_depth, ³​max_depth, ⁴​first_depth, ⁵​last_depth, ⁶​min_row_number,
#> #   ⁷​max_row_number, ⁸​first_row_number

Unconstrained hierarchical clustering

halifax_nested <- halifax_lakes_plottable %>%
  nested_data(c(location, sample_type), taxon, rel_abund, fill = 0)

halifax_nested %>% unnested_data(qualifiers, data)
#> # A tibble: 20 × 9
#>    location        sampl…¹ row_n…² Aulac…³ Eunot…⁴ Fragi…⁵ Tabel…⁶ Tabel…⁷ Other
#>    <chr>           <chr>     <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
#>  1 Anderson Lake   bottom        1   4.65    2.42    1.16    4.11    3.76   83.9
#>  2 Anderson Lake   top           2   1.87    0       0.330   5.71    5.71   86.4
#>  3 Bayers          bottom        3  11.6     5.71    4.50    4.84    0      73.4
#>  4 Bayers          top           4   0.993   6.81    3.40    3.40    0      85.4
#>  5 Bell Lake       bottom        5   0.476   2.62    0.833   2.14    6.90   87.0
#>  6 Bell Lake       top           6   9.22    0.432   2.59    6.20    1.01   80.5
#>  7 Cranberry Lake  bottom        7   0       9.17   11.6     3.06    9.39   66.8
#>  8 Cranberry Lake  top           8   0       7.72    8.94    0.203   8.54   74.6
#>  9 Frasers Lake    bottom        9   6.42    0.714   0.624   1.87    2.59   87.8
#> 10 Frasers Lake    top          10   4.85    0       0       8.58   10.6    76.0
#> 11 Kinsac lake     bottom       11  11.3     8.33    2.98   12.7     0.992  63.7
#> 12 Kinsac lake     top          12   0       3.85    2.75    3.30    0      90.1
#> 13 Little Albro L… bottom       13   2.34    2.34    1.91    1.70    6.69   85.0
#> 14 Little Albro L… top          14   5.78    2.61    1.90    0       5.78   83.9
#> 15 Little Springf… bottom       15  12.1     2.64    6.23   10       0      69.1
#> 16 Little Springf… top          16   0      19.8    14.1    11.4     0      54.6
#> 17 Maynard Lake    bottom       17   9.75    4       1.5    11.6     7.88   65.2
#> 18 Maynard Lake    top          18   2.98    1.23    2.26    7.51   10.1    75.9
#> 19 Miller Lake     bottom       19   1.79    2.19    2.19    0.299  15.6    77.9
#> 20 Miller Lake     top          20   0.816   4.35    2.90    1.27    1.18   89.5
#> # … with abbreviated variable names ¹​sample_type, ²​row_number,
#> #   ³​`Aulacoseira distans`, ⁴​`Eunotia exigua`, ⁵​`Fragilariforma exigua`,
#> #   ⁶​`Tabellaria fenestrata`, ⁷​`Tabellaria flocculosa (strain IV)`
hclust <- halifax_nested %>%
  nested_hclust(method = "average")

plot(
  hclust, 
  labels = sprintf(
    "%s (%s)",
    qualifiers$location,
    qualifiers$sample_type
  )
)

Nested analysis of other functions

alta_nested %>%
  nested_analysis(vegan::rda, data) %>%
  plot()

biplot(pca)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.