The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Get started

library(mintyr)

C:40A1p3ea03fe1bb5-started.R

split_cv

# Prepare example data: Convert first 3 columns of iris dataset to long format and split
dt_split <- w2l_split(data = iris, cols2l = 1:3)
# dt_split is now a list containing 3 data tables for Sepal.Length, Sepal.Width, and Petal.Length

# Example 1: Single cross-validation (no repeats)
split_cv(
  split_dt = dt_split,  # Input list of split data
  v = 3,                # Set 3-fold cross-validation
  repeats = 1           # Perform cross-validation once (no repeats)
)
#> $Sepal.Length
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
#> 
#> $Sepal.Width
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
#> 
#> $Petal.Length
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data

# Example 2: Repeated cross-validation
split_cv(
  split_dt = dt_split,  # Input list of split data
  v = 3,                # Set 3-fold cross-validation
  repeats = 2           # Perform cross-validation twice
)
#> $Sepal.Length
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#> 
#> $Sepal.Width
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#> 
#> $Petal.Length
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: repeat numbers (Repeat1, Repeat2)
# - id2: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data

C:40A1p3ea03fe1bb5-started.R

c2p_nest

# Example data preparation: Define column names for combination
col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length")

# Example 1: Basic column-to-pairs nesting with custom separator
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = col_names,  # Columns to be combined as pairs
  pairs_n = 2,            # Create pairs of 2 columns
  sep = "&"               # Custom separator for pair names
)
#>                        pairs                data
#>                       <char>              <list>
#> 1:  Sepal.Length&Sepal.Width <data.table[150x4]>
#> 2: Sepal.Length&Petal.Length <data.table[150x4]>
#> 3:  Sepal.Width&Petal.Length <data.table[150x4]>
# Returns a nested data.table where:
# - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width")
# - data: list column containing data.tables with value1, value2 columns

# Example 2: Column-to-pairs nesting with numeric indices and grouping
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = 1:3,        # First 3 columns to be combined
  pairs_n = 2,            # Create pairs of 2 columns
  by = 5                  # Group by 5th column (Species)
)
#>                        pairs    Species               data
#>                       <char>     <fctr>             <list>
#> 1:  Sepal.Length-Sepal.Width     setosa <data.table[50x3]>
#> 2:  Sepal.Length-Sepal.Width versicolor <data.table[50x3]>
#> 3:  Sepal.Length-Sepal.Width  virginica <data.table[50x3]>
#> 4: Sepal.Length-Petal.Length     setosa <data.table[50x3]>
#> 5: Sepal.Length-Petal.Length versicolor <data.table[50x3]>
#> 6: Sepal.Length-Petal.Length  virginica <data.table[50x3]>
#> 7:  Sepal.Width-Petal.Length     setosa <data.table[50x3]>
#> 8:  Sepal.Width-Petal.Length versicolor <data.table[50x3]>
#> 9:  Sepal.Width-Petal.Length  virginica <data.table[50x3]>
# Returns a nested data.table where:
# - pairs: combined column names
# - Species: grouping variable
# - data: list column containing data.tables grouped by Species

C:40A1p3ea03fe1bb5-started.R

r2p_nest

# Example 1: Row-to-pairs nesting with column names
r2p_nest(
  mtcars,                     # Input mtcars dataset
  rows2bind = "cyl",          # Column to be used as row values
  by = c("hp", "drat", "wt")  # Columns to be transformed into pairs
)
#>      name                data
#>    <fctr>              <list>
#> 1:     hp <data.table[32x12]>
#> 2:   drat <data.table[32x12]>
#> 3:     wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names (hp, drat, wt)
# - data: list column containing data.tables with rows grouped by cyl values

# Example 2: Row-to-pairs nesting with numeric indices
r2p_nest(
  mtcars,                     # Input mtcars dataset
  rows2bind = 2,              # Use 2nd column (cyl) as row values
  by = 4:6                    # Use columns 4-6 (hp, drat, wt) for pairs
)
#>      name                data
#>    <fctr>              <list>
#> 1:     hp <data.table[32x12]>
#> 2:   drat <data.table[32x12]>
#> 3:     wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names from columns 4-6
# - data: list column containing data.tables with rows grouped by cyl values

C:40A1p3ea03fe1bb5-started.R

export_nest

# Example 1: Basic nested data export workflow
# Step 1: Create nested data structure
dt_nest <- w2l_nest(
  data = iris,              # Input iris dataset
  cols2l = 1:2,             # Columns to be nested
  by = "Species"            # Grouping variable
)

# Step 2: Export nested data to files
export_nest(
  nest_dt = dt_nest,        # Input nested data.table
  nest_col = "data",        # Column containing nested data
  group_cols = c("name", "Species")  # Columns to create directory structure
)
#> [1] 6
# Returns the number of files created
# Creates directory structure: tempdir()/name/Species/data.txt

# Check exported files
list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE          # Search in subdirectories
)
#> [1] "Sepal.Length/setosa/data.txt"     "Sepal.Length/versicolor/data.txt"
#> [3] "Sepal.Length/virginica/data.txt"  "Sepal.Width/setosa/data.txt"     
#> [5] "Sepal.Width/versicolor/data.txt"  "Sepal.Width/virginica/data.txt"
# Returns list of created files and their paths

# Clean up exported files
files <- list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE,         # Search in subdirectories
  full.names = TRUE         # Return full file paths
)
file.remove(files)          # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE

C:40A1p3ea03fe1bb5-started.R

export_list

# Example: Export split data to files

# Step 1: Create split data structure
dt_split <- w2l_split(
  data = iris,              # Input iris dataset
  cols2l = 1:2,             # Columns to be split
  by = "Species"            # Grouping variable
)

# Step 2: Export split data to files
export_list(
  split_dt = dt_split       # Input list of data.tables
)
#> [1] 6
# Returns the number of files created
# Files are saved in tempdir() with .txt extension

# Check exported files
list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE          # Search in subdirectories
)
#> [1] "Sepal.Length_setosa.txt"     "Sepal.Length_versicolor.txt"
#> [3] "Sepal.Length_virginica.txt"  "Sepal.Width_setosa.txt"     
#> [5] "Sepal.Width_versicolor.txt"  "Sepal.Width_virginica.txt"

# Clean up exported files
files <- list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE,         # Search in subdirectories
  full.names = TRUE         # Return full file paths
)
file.remove(files)          # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE

C:40A1p3ea03fe1bb5-started.R

fires

head(fires())
#>    Location   Tag       Date    Entry     Exit Ent Wt Ext Wt Consumed Weight
#>       <int> <int>     <char>   <char>   <char>  <num>  <num>    <num>  <num>
#> 1:      101 35877 2024-10-07 14:15:39 14:18:02  0.678  0.632    0.046   67.6
#> 2:      101 35873 2024-10-07 14:18:03 14:23:05  0.632  0.384    0.248   60.8
#> 3:      101 35878 2024-10-07 14:23:15 14:28:45  0.670  0.469    0.201   70.8
#> 4:      101 35855 2024-10-07 14:29:05 14:34:29  0.755  0.634    0.121   51.2
#> 5:      101 35877 2024-10-07 14:34:30 14:34:37  0.634  0.634    0.000    0.0
#> 6:      101 35853 2024-10-07 14:34:38 14:36:26  0.634  0.634    0.000   88.6
#>    Topup Amount
#>           <num>
#> 1:        0.286
#> 2:        0.000
#> 3:        0.286
#> 4:        0.286
#> 5:        0.000
#> 6:        0.000

C:40A1p3ea03fe1bb5-started.R

nedaps

head(nedaps())
#>    animal_number lifenumber responder location          visit_time duration
#>            <int>     <lgcl>     <int>    <int>              <POSc>    <int>
#> 1:      10115497         NA     15497      101 2024-09-06 20:22:51        3
#> 2:      10115967         NA     15967      101 2024-09-06 20:22:54       65
#> 3:      10115983         NA     15983      101 2024-09-06 20:23:59        2
#> 4:      10115967         NA     15967      101 2024-09-06 20:24:01       11
#> 5:      10115983         NA     15983      101 2024-09-06 20:24:12        2
#> 6:      10115967         NA     15967      101 2024-09-06 20:24:14       33
#>    state weight feed_intake
#>    <int>  <int>       <int>
#> 1:     0  46500           0
#> 2:     0  22000          17
#> 3:     0  33000           0
#> 4:     0  33500           0
#> 5:     0  35500           0
#> 6:     0  31000           0

C:40A1p3ea03fe1bb5-started.R

convert_nest

# Example 1: Create nested data structures
# Create single nested column
df_nest1 <- iris |> 
  dplyr::group_nest(Species)     # Group and nest by Species

# Create multiple nested columns
df_nest2 <- iris |>
  dplyr::group_nest(Species) |>  # Group and nest by Species
  dplyr::mutate(
    data2 = purrr::map(          # Create second nested column
      data,
      dplyr::mutate, 
      c = 2
    )
  )

# Example 2: Convert nested structures
# Convert data frame to data table
convert_nest(
  df_nest1,                      # Input nested data frame
  to = "dt"                      # Convert to data.table
)
#>       Species               data
#>        <fctr>             <list>
#> 1:     setosa <data.table[50x4]>
#> 2: versicolor <data.table[50x4]>
#> 3:  virginica <data.table[50x4]>

# Convert specific nested columns
convert_nest(
  df_nest2,                      # Input nested data frame
  to = "dt",                     # Convert to data.table
  nest_cols = "data"             # Only convert 'data' column
)
#>       Species               data          data2
#>        <fctr>             <list>         <list>
#> 1:     setosa <data.table[50x4]> <tbl_df[50x5]>
#> 2: versicolor <data.table[50x4]> <tbl_df[50x5]>
#> 3:  virginica <data.table[50x4]> <tbl_df[50x5]>

# Example 3: Convert data table to data frame
dt_nest <- mintyr::w2l_nest(
  data = iris,                   # Input dataset
  cols2l = 1:2                   # Columns to nest
)
convert_nest(
  dt_nest,                       # Input nested data table
  to = "df"                      # Convert to data frame
)
#> # A tibble: 2 × 2
#>   name         data              
#>   <fct>        <list>            
#> 1 Sepal.Length <tibble [150 × 4]>
#> 2 Sepal.Width  <tibble [150 × 4]>

C:40A1p3ea03fe1bb5-started.R

get_path_segment

# Example: Path segment extraction demonstrations

# Setup test paths
paths <- c(
  "C:/home/user/documents",   # Windows style path
  "/var/log/system",          # Unix system path
  "/usr/local/bin"            # Unix binary path
)

# Example 1: Extract first segment
get_path_segment(
  paths,                      # Input paths
  1                           # Get first segment
)
#> [1] "home" "var"  "usr"
# Returns: c("home", "var", "usr")

# Example 2: Extract second-to-last segment
get_path_segment(
  paths,                      # Input paths
  -2                          # Get second-to-last segment
)
#> [1] "user"  "log"   "local"
# Returns: c("user", "log", "local")

# Example 3: Extract from first to last segment
get_path_segment(
  paths,                      # Input paths
  c(1,-1)                     # Range from first to last
)
#> [1] "home/user/documents" "var/log/system"      "usr/local/bin"
# Returns full paths without drive letters

# Example 4: Extract first three segments
get_path_segment(
  paths,                      # Input paths
  c(1,3)                      # Range from first to third
)
#> [1] "home/user/documents" "var/log/system"      "usr/local/bin"
# Returns: c("home/user/documents", "var/log/system", "usr/local/bin")

# Example 5: Extract last two segments (reverse order)
get_path_segment(
  paths,                      # Input paths
  c(-1,-2)                    # Range from last to second-to-last
)
#> [1] "user/documents" "log/system"     "local/bin"
# Returns: c("documents/user", "system/log", "bin/local")

# Example 6: Extract first two segments
get_path_segment(
  paths,                      # Input paths
  c(1,2)                      # Range from first to second
)
#> [1] "home/user" "var/log"   "usr/local"
# Returns: c("home/user", "var/log", "usr/local")

C:40A1p3ea03fe1bb5-started.R

format_digits

# Example: Number formatting demonstrations

# Setup test data
dt <- data.table::data.table(
  a = c(0.1234, 0.5678),      # Numeric column 1
  b = c(0.2345, 0.6789),      # Numeric column 2
  c = c("text1", "text2")     # Text column
)

# Example 1: Format all numeric columns
format_digits(
  dt,                         # Input data table
  digits = 2                  # Round to 2 decimal places
)
#>         a      b      c
#>    <char> <char> <char>
#> 1:   0.12   0.23  text1
#> 2:   0.57   0.68  text2

# Example 2: Format specific column as percentage
format_digits(
  dt,                         # Input data table
  cols = c("a"),              # Only format column 'a'
  digits = 2,                 # Round to 2 decimal places
  percentage = TRUE           # Convert to percentage
)
#>         a      b      c
#>    <char>  <num> <char>
#> 1: 12.34% 0.2345  text1
#> 2: 56.78% 0.6789  text2

C:40A1p3ea03fe1bb5-started.R

mintyr_example

# Get path to an example file
mintyr_example("csv_test1.csv")
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/csv_test1.csv"

C:40A1p3ea03fe1bb5-started.R

mintyr_examples

# List all example files
mintyr_examples()
#> [1] "csv_test1.csv"   "csv_test2.csv"   "xlsx_test1.xlsx" "xlsx_test2.xlsx"

C:40A1p3ea03fe1bb5-started.R

import_xlsx

# Example: Excel file import demonstrations

# Setup test files
xlsx_files <- mintyr_example(
  mintyr_examples("xlsx_test")    # Get example Excel files
)

# Example 1: Import and combine all sheets from all files
import_xlsx(
  xlsx_files,                     # Input Excel file paths
  rbind = TRUE                    # Combine all sheets into one data.table
)
#>     excel_name sheet_name  col1   col2   col3
#>         <char>     <char> <num> <char> <lgcl>
#>  1: xlsx_test1     Sheet1     4      d  FALSE
#>  2: xlsx_test1     Sheet1     5      f   TRUE
#>  3: xlsx_test1     Sheet1     6      e   TRUE
#>  4: xlsx_test1     Sheet2     1      a   TRUE
#>  5: xlsx_test1     Sheet2     2      b  FALSE
#>  6: xlsx_test1     Sheet2     3      c   TRUE
#>  7: xlsx_test2     Sheet1    15      o  FALSE
#>  8: xlsx_test2     Sheet1    16      p   TRUE
#>  9: xlsx_test2     Sheet1    17      q  FALSE
#> 10: xlsx_test2          a     7      g  FALSE
#> 11: xlsx_test2          a     9      h   TRUE
#> 12: xlsx_test2          a     8      i  FALSE
#> 13: xlsx_test2          b    10      J  FALSE
#> 14: xlsx_test2          b    11      K   TRUE
#> 15: xlsx_test2          b    12      L  FALSE

# Example 2: Import specific sheets separately
import_xlsx(
  xlsx_files,                     # Input Excel file paths
  rbind = FALSE,                  # Keep sheets as separate data.tables
  sheet = 2                       # Only import first sheet
)
#> $xlsx_test1_Sheet2
#>     col1   col2   col3
#>    <num> <char> <lgcl>
#> 1:     1      a   TRUE
#> 2:     2      b  FALSE
#> 3:     3      c   TRUE
#> 
#> $xlsx_test2_a
#>     col1   col2   col3
#>    <num> <char> <lgcl>
#> 1:     7      g  FALSE
#> 2:     9      h   TRUE
#> 3:     8      i  FALSE

C:40A1p3ea03fe1bb5-started.R

import_csv

# Example: CSV file import demonstrations

# Setup test files
csv_files <- mintyr_example(
  mintyr_examples("csv_test")     # Get example CSV files
)

# Example 1: Import and combine CSV files using data.table
import_csv(
  csv_files,                      # Input CSV file paths
  package = "data.table",         # Use data.table for reading
  rbind = TRUE,                   # Combine all files into one data.table
  rbind_label = "_file"           # Column name for file source
)
#>        _file  col1   col2   col3
#>       <char> <int> <char> <lgcl>
#> 1: csv_test1     4      d  FALSE
#> 2: csv_test1     5      f   TRUE
#> 3: csv_test1     6      e   TRUE
#> 4: csv_test2    15      o  FALSE
#> 5: csv_test2    16      p   TRUE
#> 6: csv_test2    17      q  FALSE

# Example 2: Import files separately using arrow
import_csv(
  csv_files,                      # Input CSV file paths
  package = "arrow",              # Use arrow for reading
  rbind = FALSE                   # Keep files as separate data.tables
)
#> $csv_test1
#> # A tibble: 3 × 3
#>    col1 col2  col3 
#>   <int> <chr> <lgl>
#> 1     4 d     FALSE
#> 2     5 f     TRUE 
#> 3     6 e     TRUE 
#> 
#> $csv_test2
#> # A tibble: 3 × 3
#>    col1 col2  col3 
#>   <int> <chr> <lgl>
#> 1    15 o     FALSE
#> 2    16 p     TRUE 
#> 3    17 q     FALSE

C:40A1p3ea03fe1bb5-started.R

get_filename

# Example: File path processing demonstrations

# Setup test files
xlsx_files <- mintyr_example(
  mintyr_examples("xlsx_test")    # Get example Excel files
)

# Example 1: Extract filenames without extensions
get_filename(
  xlsx_files,                     # Input file paths
  rm_extension = TRUE,            # Remove file extensions
  rm_path = TRUE                  # Remove directory paths
)
#> [1] "xlsx_test1" "xlsx_test2"

# Example 2: Keep file extensions
get_filename(
  xlsx_files,                     # Input file paths
  rm_extension = FALSE,           # Keep file extensions
  rm_path = TRUE                  # Remove directory paths
)
#> [1] "xlsx_test1.xlsx" "xlsx_test2.xlsx"

# Example 3: Keep full paths without extensions
get_filename(
  xlsx_files,                     # Input file paths
  rm_extension = TRUE,            # Remove file extensions
  rm_path = FALSE                 # Keep directory paths
)
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/xlsx_test1"
#> [2] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/xlsx_test2"

C:40A1p3ea03fe1bb5-started.R

w2l_nest

# Example: Wide to long format nesting demonstrations

# Example 1: Basic nesting by group
w2l_nest(
  data = iris,                    # Input dataset
  by = "Species"                  # Group by Species column
)
#>       Species               data
#>        <fctr>             <list>
#> 1:     setosa <data.table[50x4]>
#> 2: versicolor <data.table[50x4]>
#> 3:  virginica <data.table[50x4]>

# Example 2: Nest specific columns with numeric indices
w2l_nest(
  data = iris,                    # Input dataset
  cols2l = 1:4,                   # Select first 4 columns to nest
  by = "Species"                  # Group by Species column
)
#>             name    Species               data
#>           <fctr>     <fctr>             <list>
#>  1: Sepal.Length     setosa <data.table[50x1]>
#>  2: Sepal.Length versicolor <data.table[50x1]>
#>  3: Sepal.Length  virginica <data.table[50x1]>
#>  4:  Sepal.Width     setosa <data.table[50x1]>
#>  5:  Sepal.Width versicolor <data.table[50x1]>
#>  6:  Sepal.Width  virginica <data.table[50x1]>
#>  7: Petal.Length     setosa <data.table[50x1]>
#>  8: Petal.Length versicolor <data.table[50x1]>
#>  9: Petal.Length  virginica <data.table[50x1]>
#> 10:  Petal.Width     setosa <data.table[50x1]>
#> 11:  Petal.Width versicolor <data.table[50x1]>
#> 12:  Petal.Width  virginica <data.table[50x1]>

# Example 3: Nest specific columns with column names
w2l_nest(
  data = iris,                    # Input dataset
  cols2l = c("Sepal.Length",      # Select columns by name
             "Sepal.Width", 
             "Petal.Length"),
  by = 5                          # Group by column index 5 (Species)
)
#>            name    Species               data
#>          <fctr>     <fctr>             <list>
#> 1: Sepal.Length     setosa <data.table[50x2]>
#> 2: Sepal.Length versicolor <data.table[50x2]>
#> 3: Sepal.Length  virginica <data.table[50x2]>
#> 4:  Sepal.Width     setosa <data.table[50x2]>
#> 5:  Sepal.Width versicolor <data.table[50x2]>
#> 6:  Sepal.Width  virginica <data.table[50x2]>
#> 7: Petal.Length     setosa <data.table[50x2]>
#> 8: Petal.Length versicolor <data.table[50x2]>
#> 9: Petal.Length  virginica <data.table[50x2]>
# Returns similar structure to Example 2

C:40A1p3ea03fe1bb5-started.R

w2l_split

# Example: Wide to long format splitting demonstrations

# Example 1: Basic splitting by Species
w2l_split(
  data = iris,                    # Input dataset
  by = "Species"                  # Split by Species column
) |> 
  lapply(head)                    # Show first 6 rows of each split
#> $setosa
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          5.1         3.5          1.4         0.2
#> 2:          4.9         3.0          1.4         0.2
#> 3:          4.7         3.2          1.3         0.2
#> 4:          4.6         3.1          1.5         0.2
#> 5:          5.0         3.6          1.4         0.2
#> 6:          5.4         3.9          1.7         0.4
#> 
#> $versicolor
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          7.0         3.2          4.7         1.4
#> 2:          6.4         3.2          4.5         1.5
#> 3:          6.9         3.1          4.9         1.5
#> 4:          5.5         2.3          4.0         1.3
#> 5:          6.5         2.8          4.6         1.5
#> 6:          5.7         2.8          4.5         1.3
#> 
#> $virginica
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          6.3         3.3          6.0         2.5
#> 2:          5.8         2.7          5.1         1.9
#> 3:          7.1         3.0          5.9         2.1
#> 4:          6.3         2.9          5.6         1.8
#> 5:          6.5         3.0          5.8         2.2
#> 6:          7.6         3.0          6.6         2.1

# Example 2: Split specific columns using numeric indices
w2l_split(
  data = iris,                    # Input dataset
  cols2l = 1:3,                   # Select first 3 columns to split
  by = 5                          # Split by column index 5 (Species)
) |> 
  lapply(head)                    # Show first 6 rows of each split
#> $Sepal.Length_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   5.1
#> 2:         0.2   4.9
#> 3:         0.2   4.7
#> 4:         0.2   4.6
#> 5:         0.2   5.0
#> 6:         0.4   5.4
#> 
#> $Sepal.Length_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   7.0
#> 2:         1.5   6.4
#> 3:         1.5   6.9
#> 4:         1.3   5.5
#> 5:         1.5   6.5
#> 6:         1.3   5.7
#> 
#> $Sepal.Length_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   6.3
#> 2:         1.9   5.8
#> 3:         2.1   7.1
#> 4:         1.8   6.3
#> 5:         2.2   6.5
#> 6:         2.1   7.6
#> 
#> $Sepal.Width_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   3.5
#> 2:         0.2   3.0
#> 3:         0.2   3.2
#> 4:         0.2   3.1
#> 5:         0.2   3.6
#> 6:         0.4   3.9
#> 
#> $Sepal.Width_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   3.2
#> 2:         1.5   3.2
#> 3:         1.5   3.1
#> 4:         1.3   2.3
#> 5:         1.5   2.8
#> 6:         1.3   2.8
#> 
#> $Sepal.Width_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   3.3
#> 2:         1.9   2.7
#> 3:         2.1   3.0
#> 4:         1.8   2.9
#> 5:         2.2   3.0
#> 6:         2.1   3.0
#> 
#> $Petal.Length_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   1.4
#> 2:         0.2   1.4
#> 3:         0.2   1.3
#> 4:         0.2   1.5
#> 5:         0.2   1.4
#> 6:         0.4   1.7
#> 
#> $Petal.Length_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   4.7
#> 2:         1.5   4.5
#> 3:         1.5   4.9
#> 4:         1.3   4.0
#> 5:         1.5   4.6
#> 6:         1.3   4.5
#> 
#> $Petal.Length_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   6.0
#> 2:         1.9   5.1
#> 3:         2.1   5.9
#> 4:         1.8   5.6
#> 5:         2.2   5.8
#> 6:         2.1   6.6

# Example 3: Split specific columns using column names
list_res <- w2l_split(
  data = iris,                    # Input dataset
  cols2l = c("Sepal.Length",      # Select columns by name
             "Sepal.Width"),
  by = "Species"                  # Split by Species column
)
lapply(list_res, head)            # Show first 6 rows of each split
#> $Sepal.Length_setosa
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          1.4         0.2   5.1
#> 2:          1.4         0.2   4.9
#> 3:          1.3         0.2   4.7
#> 4:          1.5         0.2   4.6
#> 5:          1.4         0.2   5.0
#> 6:          1.7         0.4   5.4
#> 
#> $Sepal.Length_versicolor
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          4.7         1.4   7.0
#> 2:          4.5         1.5   6.4
#> 3:          4.9         1.5   6.9
#> 4:          4.0         1.3   5.5
#> 5:          4.6         1.5   6.5
#> 6:          4.5         1.3   5.7
#> 
#> $Sepal.Length_virginica
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          6.0         2.5   6.3
#> 2:          5.1         1.9   5.8
#> 3:          5.9         2.1   7.1
#> 4:          5.6         1.8   6.3
#> 5:          5.8         2.2   6.5
#> 6:          6.6         2.1   7.6
#> 
#> $Sepal.Width_setosa
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          1.4         0.2   3.5
#> 2:          1.4         0.2   3.0
#> 3:          1.3         0.2   3.2
#> 4:          1.5         0.2   3.1
#> 5:          1.4         0.2   3.6
#> 6:          1.7         0.4   3.9
#> 
#> $Sepal.Width_versicolor
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          4.7         1.4   3.2
#> 2:          4.5         1.5   3.2
#> 3:          4.9         1.5   3.1
#> 4:          4.0         1.3   2.3
#> 5:          4.6         1.5   2.8
#> 6:          4.5         1.3   2.8
#> 
#> $Sepal.Width_virginica
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          6.0         2.5   3.3
#> 2:          5.1         1.9   2.7
#> 3:          5.9         2.1   3.0
#> 4:          5.6         1.8   2.9
#> 5:          5.8         2.2   3.0
#> 6:          6.6         2.1   3.0
# Returns similar structure to Example 2

C:40A1p3ea03fe1bb5-started.R

nest_cv

# Example: Cross-validation for nested data.table demonstrations

# Setup test data
dt_nest <- w2l_nest(
  data = iris,                   # Input dataset
  cols2l = 1:2                   # Nest first 2 columns
)

# Example 1: Basic 2-fold cross-validation
nest_cv(
  nest_dt = dt_nest,             # Input nested data.table
  v = 2                          # Number of folds (2-fold CV)
)
#>            name                     splits     id              train
#>          <fctr>                     <list> <char>             <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]>  Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]>  Fold2 <data.table[75x4]>
#> 3:  Sepal.Width <vfold_split[75x75x150x4]>  Fold1 <data.table[75x4]>
#> 4:  Sepal.Width <vfold_split[75x75x150x4]>  Fold2 <data.table[75x4]>
#>              validate
#>                <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>

# Example 2: Repeated 2-fold cross-validation
nest_cv(
  nest_dt = dt_nest,             # Input nested data.table
  v = 2,                         # Number of folds (2-fold CV)
  repeats = 2                    # Number of repetitions
)
#>            name                     splits      id    id2              train
#>          <fctr>                     <list>  <char> <char>             <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]> Repeat1  Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]> Repeat1  Fold2 <data.table[75x4]>
#> 3: Sepal.Length <vfold_split[75x75x150x4]> Repeat2  Fold1 <data.table[75x4]>
#> 4: Sepal.Length <vfold_split[75x75x150x4]> Repeat2  Fold2 <data.table[75x4]>
#> 5:  Sepal.Width <vfold_split[75x75x150x4]> Repeat1  Fold1 <data.table[75x4]>
#> 6:  Sepal.Width <vfold_split[75x75x150x4]> Repeat1  Fold2 <data.table[75x4]>
#> 7:  Sepal.Width <vfold_split[75x75x150x4]> Repeat2  Fold1 <data.table[75x4]>
#> 8:  Sepal.Width <vfold_split[75x75x150x4]> Repeat2  Fold2 <data.table[75x4]>
#>              validate
#>                <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>
#> 5: <data.table[75x4]>
#> 6: <data.table[75x4]>
#> 7: <data.table[75x4]>
#> 8: <data.table[75x4]>

C:40A1p3ea03fe1bb5-started.R

top_perc

# Example 1: Basic usage with single trait
# This example selects the top 10% of observations based on Petal.Width
# keep_data=TRUE returns both summary statistics and the filtered data
top_perc(iris, 
         perc = 0.1,                # Select top 10%
         trait = c("Petal.Width"),  # Column to analyze
         keep_data = TRUE)          # Return both stats and filtered data
#> $Petal.Width_0.1
#> $Petal.Width_0.1$stat
#> # A tibble: 1 × 5
#>   variable        n  mean    sd top_perc
#>   <fct>       <dbl> <dbl> <dbl> <chr>   
#> 1 Petal.Width    17  2.34   0.1 10%     
#> 
#> $Petal.Width_0.1$data
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
#> 1           6.3         3.3          6.0         2.5 virginica
#> 2           6.5         3.0          5.8         2.2 virginica
#> 3           7.2         3.6          6.1         2.5 virginica
#> 4           5.8         2.8          5.1         2.4 virginica
#> 5           6.4         3.2          5.3         2.3 virginica
#> 6           7.7         3.8          6.7         2.2 virginica
#> 7           7.7         2.6          6.9         2.3 virginica
#> 8           6.9         3.2          5.7         2.3 virginica
#> 9           6.4         2.8          5.6         2.2 virginica
#> 10          7.7         3.0          6.1         2.3 virginica
#> 11          6.3         3.4          5.6         2.4 virginica
#> 12          6.7         3.1          5.6         2.4 virginica
#> 13          6.9         3.1          5.1         2.3 virginica
#> 14          6.8         3.2          5.9         2.3 virginica
#> 15          6.7         3.3          5.7         2.5 virginica
#> 16          6.7         3.0          5.2         2.3 virginica
#> 17          6.2         3.4          5.4         2.3 virginica

# Example 2: Using grouping with 'by' parameter
# This example performs the same analysis but separately for each Species
# Returns nested list with stats and filtered data for each group
top_perc(iris, 
         perc = 0.1,                # Select top 10%
         trait = c("Petal.Width"),  # Column to analyze
         by = "Species")            # Group by Species
#> # A tibble: 3 × 6
#>   Species    variable        n  mean    sd top_perc
#>   <fct>      <fct>       <dbl> <dbl> <dbl> <chr>   
#> 1 setosa     Petal.Width     9 0.433 0.071 10%     
#> 2 versicolor Petal.Width     5 1.66  0.089 10%     
#> 3 virginica  Petal.Width     6 2.45  0.055 10%

# Example 3: Complex example with multiple percentages and grouping variables
# Reshape data from wide to long format for Sepal.Length and Sepal.Width
iris |> 
  tidyr::pivot_longer(1:2,
                      names_to = "names", 
                      values_to = "values") |> 
  mintyr::top_perc(
    perc = c(0.1, -0.2),
    trait = "values",
    by = c("Species", "names"),
    type = "mean_sd")
#> # A tibble: 12 × 7
#>    Species    names        variable     n  mean    sd top_perc
#>    <fct>      <chr>        <fct>    <dbl> <dbl> <dbl> <chr>   
#>  1 setosa     Sepal.Length values       5  5.64 0.134 10%     
#>  2 setosa     Sepal.Width  values       6  4.08 0.194 10%     
#>  3 versicolor Sepal.Length values       6  6.8  0.126 10%     
#>  4 versicolor Sepal.Width  values       5  3.26 0.089 10%     
#>  5 virginica  Sepal.Length values       5  7.74 0.089 10%     
#>  6 virginica  Sepal.Width  values       5  3.6  0.2   10%     
#>  7 setosa     Sepal.Length values      11  4.53 0.135 -20%    
#>  8 setosa     Sepal.Width  values      12  2.97 0.219 -20%    
#>  9 versicolor Sepal.Length values      11  5.28 0.244 -20%    
#> 10 versicolor Sepal.Width  values      13  2.35 0.151 -20%    
#> 11 virginica  Sepal.Length values      11  5.79 0.336 -20%    
#> 12 virginica  Sepal.Width  values      11  2.56 0.15  -20%

C:40A1p3ea03fe1bb5-started.R

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.