sapfluxnetr
package offers a very flexible but powerful system based on tidyverse
and tibbletime
packages to aggregate and summarise the site/s data in the form of the sfn_metrics
functions. All the metrics family of functions (?metrics
) make use of sfn_metrics
under the hood. If you want full control to the statistics returned and aggregation periodswe recommend you to use this main function. This vignette will show you how.
daily_metrics
monthly_metrics
predawn_metrics
midday_metrics
nightly_metrics
daylight_metrics
See each function help for a detailed description and examples of use.
daily_metrics
and related functions return a complete set of metrics ready for use, but if you want simpler metrics and/or avoid the computational burden of returning all the pre-fixed metrics you can supply your own summarising functions using the funs
function belonging to the dplyr
package in the tidyverse
suite:
# libraries
library(sapfluxnetr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
### only mean and sd at a daily scale
# data
data('ARG_TRE', package = 'sapfluxnetr')
# summarising funs (built with funs function from dplyr package)
custom_funs <- funs(mean = mean(., na.rm = TRUE), std_dev = sd(., na.rm = TRUE))
#> Warning: funs() is soft deprecated as of dplyr 0.8.0
#> please use list() instead
#>
#> # Before:
#> funs(name = f(.)
#>
#> # After:
#> list(name = ~f(.))
#> This warning is displayed once per session.
# metrics
foo_simpler_metrics <- sfn_metrics(
ARG_TRE,
period = 'daily',
.funs = custom_funs,
solar = TRUE,
interval = 'general'
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "General data for ARG_TRE"
foo_simpler_metrics[['sapf']]
#> # A time tibble: 14 x 9
#> # Index: TIMESTAMP
#> TIMESTAMP ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_…
#> <dttm> <dbl> <dbl> <dbl>
#> 1 2009-11-17 00:00:00 308. 173. 303.
#> 2 2009-11-18 00:00:00 507. 376. 432.
#> 3 2009-11-19 00:00:00 541. 380. 391.
#> 4 2009-11-20 00:00:00 330. 218. 272.
#> 5 2009-11-21 00:00:00 338. 219. 278.
#> 6 2009-11-22 00:00:00 384. 243. 310.
#> 7 2009-11-23 00:00:00 492. 300. 390.
#> 8 2009-11-24 00:00:00 573. 389. 497.
#> 9 2009-11-25 00:00:00 601. 400. 484.
#> 10 2009-11-26 00:00:00 502. 360. 450.
#> 11 2009-11-27 00:00:00 544. 411. 506.
#> 12 2009-11-28 00:00:00 573. 451. 589.
#> 13 2009-11-29 00:00:00 371. 285. 357.
#> 14 2009-11-30 00:00:00 386. 293. 381.
#> # … with 5 more variables: ARG_TRE_Nan_Jt_4_mean <dbl>,
#> # ARG_TRE_Nan_Jt_1_std_dev <dbl>, ARG_TRE_Nan_Jt_2_std_dev <dbl>,
#> # ARG_TRE_Nan_Jt_3_std_dev <dbl>, ARG_TRE_Nan_Jt_4_std_dev <dbl>
You can also select if the “special interest” intervals (predawn, midday, nighttime or daylight) are calculated or not. For example, if you are only interested in the midday interval you can use:
foo_simpler_metrics_midday <- sfn_metrics(
ARG_TRE,
period = 'daily',
.funs = custom_funs,
solar = TRUE,
interval = 'midday', int_start = 11, int_end = 13
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "midday data for ARG_TRE"
foo_simpler_metrics_midday[['sapf']]
#> # A tibble: 13 x 9
#> TIMESTAMP_md ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_…
#> <dttm> <dbl> <dbl> <dbl>
#> 1 2009-11-18 00:00:00 685. 665. 614.
#> 2 2009-11-19 00:00:00 879. 594. 626.
#> 3 2009-11-20 00:00:00 438. 272. 258.
#> 4 2009-11-21 00:00:00 631. 379. 533.
#> 5 2009-11-22 00:00:00 783. 535. 680.
#> 6 2009-11-23 00:00:00 841. 478. 618.
#> 7 2009-11-24 00:00:00 951. 636. 789.
#> 8 2009-11-25 00:00:00 907. 602. 789.
#> 9 2009-11-26 00:00:00 861. 697. 925.
#> 10 2009-11-27 00:00:00 806. 594. 706.
#> 11 2009-11-28 00:00:00 837. 730. 925.
#> 12 2009-11-29 00:00:00 638. 605. 666.
#> 13 2009-11-30 00:00:00 548. 371. 444.
#> # … with 5 more variables: ARG_TRE_Nan_Jt_4_mean_md <dbl>,
#> # ARG_TRE_Nan_Jt_1_std_dev_md <dbl>, ARG_TRE_Nan_Jt_2_std_dev_md <dbl>,
#> # ARG_TRE_Nan_Jt_3_std_dev_md <dbl>, ARG_TRE_Nan_Jt_4_std_dev_md <dbl>
When supplying only one function names of variables are not changed to contain the metric name at the end, as the summary function returns the same columns as the original data
period
argument in sfn_metrics
is passed to collapse_index
function in the tibbletime
package, and so, it can use the same input:
# weekly
foo_weekly <- sfn_metrics(
ARG_TRE,
period = '7 days',
.funs = custom_funs,
solar = TRUE,
interval = 'general'
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "General data for ARG_TRE"
foo_weekly[['env']]
#> # A time tibble: 3 x 19
#> # Index: TIMESTAMP
#> TIMESTAMP ta_mean rh_mean vpd_mean sw_in_mean ws_mean
#> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2009-11-15 00:00:00 4.81 35.3 0.598 280. 15.5
#> 2 2009-11-22 00:00:00 6.15 35.3 0.656 327. 24.5
#> 3 2009-11-29 00:00:00 2.55 40.9 0.453 261. 23.1
#> # … with 13 more variables: precip_mean <dbl>, swc_shallow_mean <dbl>,
#> # ppfd_in_mean <dbl>, ext_rad_mean <dbl>, ta_std_dev <dbl>,
#> # rh_std_dev <dbl>, vpd_std_dev <dbl>, sw_in_std_dev <dbl>,
#> # ws_std_dev <dbl>, precip_std_dev <dbl>, swc_shallow_std_dev <dbl>,
#> # ppfd_in_std_dev <dbl>, ext_rad_std_dev <dbl>
# custom
foo_custom <- sfn_metrics(
ARG_TRE,
period = as.POSIXct(
c('2009-11-17 00:00:00', '2009-11-22 14:00:00', '2009-11-30 23:59:00'),
tz = 'UTC'
),
.funs = custom_funs,
solar = TRUE,
interval = 'general'
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "General data for ARG_TRE"
foo_custom[['sapf']]
#> # A time tibble: 2 x 9
#> # Index: TIMESTAMP
#> TIMESTAMP ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_…
#> <dttm> <dbl> <dbl> <dbl>
#> 1 2009-11-17 00:00:00 419. 290. 338.
#> 2 2009-11-22 14:00:00 501. 355. 450.
#> # … with 5 more variables: ARG_TRE_Nan_Jt_4_mean <dbl>,
#> # ARG_TRE_Nan_Jt_1_std_dev <dbl>, ARG_TRE_Nan_Jt_2_std_dev <dbl>,
#> # ARG_TRE_Nan_Jt_3_std_dev <dbl>, ARG_TRE_Nan_Jt_4_std_dev <dbl>
sfn_metrics
has a ...
parameter intended to supply additional parameters to the internal functions used:
tibbletime::collapse_index
accepts the following extra arguments:
start_date
side
clean
dplyr::summarise_all
accepts extra arguments intended to be applied to the summarising functions provided (to all, so they all must have the argument provided or an error will be raised). That’s the reason because we recommend to use the funs
function from dplyr
, as the arguments are specified for the individual functions.
For example, if we want the TIMESTAMPs after aggregation to show the end of the period instead the beggining (default) we can do the following:
foo_simpler_metrics_end <- sfn_metrics(
ARG_TRE,
period = 'daily',
.funs = custom_funs,
solar = TRUE,
interval = 'general',
side = "end"
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "General data for ARG_TRE"
foo_simpler_metrics_end[['sapf']]
#> # A time tibble: 14 x 9
#> # Index: TIMESTAMP
#> TIMESTAMP ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_…
#> <dttm> <dbl> <dbl> <dbl>
#> 1 2009-11-18 00:00:00 308. 173. 303.
#> 2 2009-11-19 00:00:00 507. 376. 432.
#> 3 2009-11-20 00:00:00 541. 380. 391.
#> 4 2009-11-21 00:00:00 330. 218. 272.
#> 5 2009-11-22 00:00:00 338. 219. 278.
#> 6 2009-11-23 00:00:00 384. 243. 310.
#> 7 2009-11-24 00:00:00 492. 300. 390.
#> 8 2009-11-25 00:00:00 573. 389. 497.
#> 9 2009-11-26 00:00:00 601. 400. 484.
#> 10 2009-11-27 00:00:00 502. 360. 450.
#> 11 2009-11-28 00:00:00 544. 411. 506.
#> 12 2009-11-29 00:00:00 573. 451. 589.
#> 13 2009-11-30 00:00:00 371. 285. 357.
#> 14 2009-12-01 00:00:00 386. 293. 381.
#> # … with 5 more variables: ARG_TRE_Nan_Jt_4_mean <dbl>,
#> # ARG_TRE_Nan_Jt_1_std_dev <dbl>, ARG_TRE_Nan_Jt_2_std_dev <dbl>,
#> # ARG_TRE_Nan_Jt_3_std_dev <dbl>, ARG_TRE_Nan_Jt_4_std_dev <dbl>
If it is compared with the foo_simpler_metrics
calculated before, now the period is identified in the TIMESTAMP by the ending of the period (daily in this case).
The internal aggregation process in sfn_metrics
generates some transitory columns which can be used in the summarising functions:
TIMESTAMP_coll
When aggregating by the declared period (i.e. "daily"
), the TIMESTAMP column collapses to the period start/end value (meaning that all the TIMESTAMP values for the same day becomes identical).
This makes impossible to use any summarise functions that obtain the time of the day at which one event happens (i.e. time of the day at which the maximum sap flow occurs) because all TIMESTAMP values are identical. For that kind of summarising functions, a transitory column called TIMESTAMP_coll
is created. So in this case we can create a function that takes de variable values for the day, the TIMESTAMP_coll values for the day and return the TIMESTAMP at which the max sap flow occurs and use it with sfn_metrics
:
max_time <- function(x, time) {
# x: vector of values for a day
# time: TIMESTAMP for the day
# if all the values in x are NAs (a daily summmarise of no measures day for
# example) this will return a length 0 POSIXct vector, which will crash
# dplyr summarise step. So, check if all NA and if true return NA as POSIXct
if(all(is.na(x))) {
return(as.POSIXct(NA, tz = attr(time, 'tz'), origin = lubridate::origin))
} else {
time[which.max(x)]
}
}
custom_funs <- funs(max = max(., na.rm = TRUE), max_time(., TIMESTAMP_coll))
max_time_metrics <- sfn_metrics(
ARG_TRE,
period = 'daily',
.funs = custom_funs,
solar = TRUE,
interval = 'general'
)
#> [1] "Crunching data for ARG_TRE. In large datasets this could take a while"
#> [1] "General data for ARG_TRE"
max_time_metrics[['sapf']]
#> # A time tibble: 14 x 9
#> # Index: TIMESTAMP
#> TIMESTAMP ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_… ARG_TRE_Nan_Jt_…
#> <dttm> <dbl> <dbl> <dbl>
#> 1 2009-11-17 00:00:00 322. 190. 313.
#> 2 2009-11-18 00:00:00 778. 715. 679.
#> 3 2009-11-19 00:00:00 1015. 694. 633.
#> 4 2009-11-20 00:00:00 648. 401. 442.
#> 5 2009-11-21 00:00:00 664. 406. 539.
#> 6 2009-11-22 00:00:00 812. 564. 816.
#> 7 2009-11-23 00:00:00 1085. 676. 935.
#> 8 2009-11-24 00:00:00 992. 736. 1115.
#> 9 2009-11-25 00:00:00 976. 646. 951.
#> 10 2009-11-26 00:00:00 932. 766. 1087.
#> 11 2009-11-27 00:00:00 862. 704. 921.
#> 12 2009-11-28 00:00:00 845. 763. 1165.
#> 13 2009-11-29 00:00:00 714. 747. 701.
#> 14 2009-11-30 00:00:00 875. 646. 919.
#> # … with 5 more variables: ARG_TRE_Nan_Jt_4_max <dbl>,
#> # ARG_TRE_Nan_Jt_1_max_time <dttm>, ARG_TRE_Nan_Jt_2_max_time <dttm>,
#> # ARG_TRE_Nan_Jt_3_max_time <dttm>, ARG_TRE_Nan_Jt_4_max_time <dttm>