The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Get started

library(mintyr)
#> 🌿 Good afternoon! Keep calm and analyze on.

w2l_nest

# Example: Wide to long format nesting demonstrations

# Example 1: Basic nesting by group
w2l_nest(
  data = iris,                    # Input dataset
  by = "Species"                  # Group by Species column
)
#>       Species               data
#>        <fctr>             <list>
#> 1:     setosa <data.table[50x4]>
#> 2: versicolor <data.table[50x4]>
#> 3:  virginica <data.table[50x4]>

# Example 2: Nest specific columns with numeric indices
w2l_nest(
  data = iris,                    # Input dataset
  cols2l = 1:4,                   # Select first 4 columns to nest
  by = "Species"                  # Group by Species column
)
#>             name    Species               data
#>           <char>     <fctr>             <list>
#>  1: Sepal.Length     setosa <data.table[50x1]>
#>  2: Sepal.Length versicolor <data.table[50x1]>
#>  3: Sepal.Length  virginica <data.table[50x1]>
#>  4:  Sepal.Width     setosa <data.table[50x1]>
#>  5:  Sepal.Width versicolor <data.table[50x1]>
#>  6:  Sepal.Width  virginica <data.table[50x1]>
#>  7: Petal.Length     setosa <data.table[50x1]>
#>  8: Petal.Length versicolor <data.table[50x1]>
#>  9: Petal.Length  virginica <data.table[50x1]>
#> 10:  Petal.Width     setosa <data.table[50x1]>
#> 11:  Petal.Width versicolor <data.table[50x1]>
#> 12:  Petal.Width  virginica <data.table[50x1]>

# Example 3: Nest specific columns with column names
w2l_nest(
  data = iris,                    # Input dataset
  cols2l = c("Sepal.Length",      # Select columns by name
             "Sepal.Width", 
             "Petal.Length"),
  by = 5                          # Group by column index 5 (Species)
)
#>            name    Species               data
#>          <char>     <fctr>             <list>
#> 1: Sepal.Length     setosa <data.table[50x2]>
#> 2: Sepal.Length versicolor <data.table[50x2]>
#> 3: Sepal.Length  virginica <data.table[50x2]>
#> 4:  Sepal.Width     setosa <data.table[50x2]>
#> 5:  Sepal.Width versicolor <data.table[50x2]>
#> 6:  Sepal.Width  virginica <data.table[50x2]>
#> 7: Petal.Length     setosa <data.table[50x2]>
#> 8: Petal.Length versicolor <data.table[50x2]>
#> 9: Petal.Length  virginica <data.table[50x2]>
# Returns similar structure to Example 2

w2l_split

# Example: Wide to long format splitting demonstrations

# Example 1: Basic splitting by Species
w2l_split(
  data = iris,                    # Input dataset
  by = "Species"                  # Split by Species column
) |> 
  lapply(head)                    # Show first 6 rows of each split
#> $setosa
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          5.1         3.5          1.4         0.2
#> 2:          4.9         3.0          1.4         0.2
#> 3:          4.7         3.2          1.3         0.2
#> 4:          4.6         3.1          1.5         0.2
#> 5:          5.0         3.6          1.4         0.2
#> 6:          5.4         3.9          1.7         0.4
#> 
#> $versicolor
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          7.0         3.2          4.7         1.4
#> 2:          6.4         3.2          4.5         1.5
#> 3:          6.9         3.1          4.9         1.5
#> 4:          5.5         2.3          4.0         1.3
#> 5:          6.5         2.8          4.6         1.5
#> 6:          5.7         2.8          4.5         1.3
#> 
#> $virginica
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>           <num>       <num>        <num>       <num>
#> 1:          6.3         3.3          6.0         2.5
#> 2:          5.8         2.7          5.1         1.9
#> 3:          7.1         3.0          5.9         2.1
#> 4:          6.3         2.9          5.6         1.8
#> 5:          6.5         3.0          5.8         2.2
#> 6:          7.6         3.0          6.6         2.1

# Example 2: Split specific columns using numeric indices
w2l_split(
  data = iris,                    # Input dataset
  cols2l = 1:3,                   # Select first 3 columns to split
  by = 5                          # Split by column index 5 (Species)
) |> 
  lapply(head)                    # Show first 6 rows of each split
#> $Sepal.Length_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   5.1
#> 2:         0.2   4.9
#> 3:         0.2   4.7
#> 4:         0.2   4.6
#> 5:         0.2   5.0
#> 6:         0.4   5.4
#> 
#> $Sepal.Length_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   7.0
#> 2:         1.5   6.4
#> 3:         1.5   6.9
#> 4:         1.3   5.5
#> 5:         1.5   6.5
#> 6:         1.3   5.7
#> 
#> $Sepal.Length_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   6.3
#> 2:         1.9   5.8
#> 3:         2.1   7.1
#> 4:         1.8   6.3
#> 5:         2.2   6.5
#> 6:         2.1   7.6
#> 
#> $Sepal.Width_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   3.5
#> 2:         0.2   3.0
#> 3:         0.2   3.2
#> 4:         0.2   3.1
#> 5:         0.2   3.6
#> 6:         0.4   3.9
#> 
#> $Sepal.Width_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   3.2
#> 2:         1.5   3.2
#> 3:         1.5   3.1
#> 4:         1.3   2.3
#> 5:         1.5   2.8
#> 6:         1.3   2.8
#> 
#> $Sepal.Width_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   3.3
#> 2:         1.9   2.7
#> 3:         2.1   3.0
#> 4:         1.8   2.9
#> 5:         2.2   3.0
#> 6:         2.1   3.0
#> 
#> $Petal.Length_setosa
#>    Petal.Width value
#>          <num> <num>
#> 1:         0.2   1.4
#> 2:         0.2   1.4
#> 3:         0.2   1.3
#> 4:         0.2   1.5
#> 5:         0.2   1.4
#> 6:         0.4   1.7
#> 
#> $Petal.Length_versicolor
#>    Petal.Width value
#>          <num> <num>
#> 1:         1.4   4.7
#> 2:         1.5   4.5
#> 3:         1.5   4.9
#> 4:         1.3   4.0
#> 5:         1.5   4.6
#> 6:         1.3   4.5
#> 
#> $Petal.Length_virginica
#>    Petal.Width value
#>          <num> <num>
#> 1:         2.5   6.0
#> 2:         1.9   5.1
#> 3:         2.1   5.9
#> 4:         1.8   5.6
#> 5:         2.2   5.8
#> 6:         2.1   6.6

# Example 3: Split specific columns using column names
list_res <- w2l_split(
  data = iris,                    # Input dataset
  cols2l = c("Sepal.Length",      # Select columns by name
             "Sepal.Width"),
  by = "Species"                  # Split by Species column
)
lapply(list_res, head)            # Show first 6 rows of each split
#> $Sepal.Length_setosa
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          1.4         0.2   5.1
#> 2:          1.4         0.2   4.9
#> 3:          1.3         0.2   4.7
#> 4:          1.5         0.2   4.6
#> 5:          1.4         0.2   5.0
#> 6:          1.7         0.4   5.4
#> 
#> $Sepal.Length_versicolor
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          4.7         1.4   7.0
#> 2:          4.5         1.5   6.4
#> 3:          4.9         1.5   6.9
#> 4:          4.0         1.3   5.5
#> 5:          4.6         1.5   6.5
#> 6:          4.5         1.3   5.7
#> 
#> $Sepal.Length_virginica
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          6.0         2.5   6.3
#> 2:          5.1         1.9   5.8
#> 3:          5.9         2.1   7.1
#> 4:          5.6         1.8   6.3
#> 5:          5.8         2.2   6.5
#> 6:          6.6         2.1   7.6
#> 
#> $Sepal.Width_setosa
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          1.4         0.2   3.5
#> 2:          1.4         0.2   3.0
#> 3:          1.3         0.2   3.2
#> 4:          1.5         0.2   3.1
#> 5:          1.4         0.2   3.6
#> 6:          1.7         0.4   3.9
#> 
#> $Sepal.Width_versicolor
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          4.7         1.4   3.2
#> 2:          4.5         1.5   3.2
#> 3:          4.9         1.5   3.1
#> 4:          4.0         1.3   2.3
#> 5:          4.6         1.5   2.8
#> 6:          4.5         1.3   2.8
#> 
#> $Sepal.Width_virginica
#>    Petal.Length Petal.Width value
#>           <num>       <num> <num>
#> 1:          6.0         2.5   3.3
#> 2:          5.1         1.9   2.7
#> 3:          5.9         2.1   3.0
#> 4:          5.6         1.8   2.9
#> 5:          5.8         2.2   3.0
#> 6:          6.6         2.1   3.0
# Returns similar structure to Example 2

nest_cv

# Example: Cross-validation for nested data.table demonstrations

# Setup test data
dt_nest <- w2l_nest(
  data = iris,                   # Input dataset
  cols2l = 1:2                   # Nest first 2 columns
)

# Example 1: Basic 2-fold cross-validation
nest_cv(
  nest_dt = dt_nest,             # Input nested data.table
  v = 2                          # Number of folds (2-fold CV)
)
#>            name                     splits     id              train
#>          <char>                     <list> <char>             <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]>  Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]>  Fold2 <data.table[75x4]>
#> 3:  Sepal.Width <vfold_split[75x75x150x4]>  Fold1 <data.table[75x4]>
#> 4:  Sepal.Width <vfold_split[75x75x150x4]>  Fold2 <data.table[75x4]>
#>              validate
#>                <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>

# Example 2: Repeated 2-fold cross-validation
nest_cv(
  nest_dt = dt_nest,             # Input nested data.table
  v = 2,                         # Number of folds (2-fold CV)
  repeats = 2                    # Number of repetitions
)
#>            name                     splits      id    id2              train
#>          <char>                     <list>  <char> <char>             <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]> Repeat1  Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]> Repeat1  Fold2 <data.table[75x4]>
#> 3: Sepal.Length <vfold_split[75x75x150x4]> Repeat2  Fold1 <data.table[75x4]>
#> 4: Sepal.Length <vfold_split[75x75x150x4]> Repeat2  Fold2 <data.table[75x4]>
#> 5:  Sepal.Width <vfold_split[75x75x150x4]> Repeat1  Fold1 <data.table[75x4]>
#> 6:  Sepal.Width <vfold_split[75x75x150x4]> Repeat1  Fold2 <data.table[75x4]>
#> 7:  Sepal.Width <vfold_split[75x75x150x4]> Repeat2  Fold1 <data.table[75x4]>
#> 8:  Sepal.Width <vfold_split[75x75x150x4]> Repeat2  Fold2 <data.table[75x4]>
#>              validate
#>                <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>
#> 5: <data.table[75x4]>
#> 6: <data.table[75x4]>
#> 7: <data.table[75x4]>
#> 8: <data.table[75x4]>

split_cv

# Prepare example data: Convert first 3 columns of iris dataset to long format and split
dt_split <- w2l_split(data = iris, cols2l = 1:3)
# dt_split is now a list containing 3 data tables for Sepal.Length, Sepal.Width, and Petal.Length

# Example 1: Single cross-validation (no repeats)
split_cv(
  split_dt = dt_split,  # Input list of split data
  v = 3,                # Set 3-fold cross-validation
  repeats = 1           # Perform cross-validation once (no repeats)
)
#> $Sepal.Length
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
#> 
#> $Sepal.Width
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
#> 
#> $Petal.Length
#>                         splits     id               train           validate
#>                         <list> <char>              <list>             <list>
#> 1: <vfold_split[100x50x150x3]>  Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]>  Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]>  Fold3 <data.table[100x3]> <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data

# Example 2: Repeated cross-validation
split_cv(
  split_dt = dt_split,  # Input list of split data
  v = 3,                # Set 3-fold cross-validation
  repeats = 2           # Perform cross-validation twice
)
#> $Sepal.Length
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#> 
#> $Sepal.Width
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#> 
#> $Petal.Length
#>                         splits      id    id2               train
#>                         <list>  <char> <char>              <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1  Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1  Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1  Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2  Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2  Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2  Fold3 <data.table[100x3]>
#>              validate
#>                <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: repeat numbers (Repeat1, Repeat2)
# - id2: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data

c2p_nest

# Example data preparation: Define column names for combination
col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length")

# Example 1: Basic column-to-pairs nesting with custom separator
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = col_names,  # Columns to be combined as pairs
  pairs_n = 2,            # Create pairs of 2 columns
  sep = "&"               # Custom separator for pair names
)
#>                        pairs                data
#>                       <char>              <list>
#> 1:  Sepal.Length&Sepal.Width <data.table[150x4]>
#> 2: Sepal.Length&Petal.Length <data.table[150x4]>
#> 3:  Sepal.Width&Petal.Length <data.table[150x4]>
# Returns a nested data.table where:
# - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width")
# - data: list column containing data.tables with value1, value2 columns

# Example 2: Column-to-pairs nesting with numeric indices and grouping
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = 1:3,        # First 3 columns to be combined
  pairs_n = 2,            # Create pairs of 2 columns
  by = 5                  # Group by 5th column (Species)
)
#>                        pairs    Species               data
#>                       <char>     <fctr>             <list>
#> 1:  Sepal.Length-Sepal.Width     setosa <data.table[50x3]>
#> 2:  Sepal.Length-Sepal.Width versicolor <data.table[50x3]>
#> 3:  Sepal.Length-Sepal.Width  virginica <data.table[50x3]>
#> 4: Sepal.Length-Petal.Length     setosa <data.table[50x3]>
#> 5: Sepal.Length-Petal.Length versicolor <data.table[50x3]>
#> 6: Sepal.Length-Petal.Length  virginica <data.table[50x3]>
#> 7:  Sepal.Width-Petal.Length     setosa <data.table[50x3]>
#> 8:  Sepal.Width-Petal.Length versicolor <data.table[50x3]>
#> 9:  Sepal.Width-Petal.Length  virginica <data.table[50x3]>
# Returns a nested data.table where:
# - pairs: combined column names
# - Species: grouping variable
# - data: list column containing data.tables grouped by Species
# Example data preparation: Define column names for combination
col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length")

# Example 1: Basic column-to-pairs nesting with custom separator
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = col_names,  # Columns to be combined as pairs
  pairs_n = 2,            # Create pairs of 2 columns
  sep = "&"               # Custom separator for pair names
)
#>                        pairs                data
#>                       <char>              <list>
#> 1:  Sepal.Length&Sepal.Width <data.table[150x4]>
#> 2: Sepal.Length&Petal.Length <data.table[150x4]>
#> 3:  Sepal.Width&Petal.Length <data.table[150x4]>
# Returns a nested data.table where:
# - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width")
# - data: list column containing data.tables with value1, value2 columns

# Example 2: Column-to-pairs nesting with numeric indices and grouping
c2p_nest(
  iris,                   # Input iris dataset
  cols2bind = 1:3,        # First 3 columns to be combined
  pairs_n = 2,            # Create pairs of 2 columns
  by = 5                  # Group by 5th column (Species)
)
#>                        pairs    Species               data
#>                       <char>     <fctr>             <list>
#> 1:  Sepal.Length-Sepal.Width     setosa <data.table[50x3]>
#> 2:  Sepal.Length-Sepal.Width versicolor <data.table[50x3]>
#> 3:  Sepal.Length-Sepal.Width  virginica <data.table[50x3]>
#> 4: Sepal.Length-Petal.Length     setosa <data.table[50x3]>
#> 5: Sepal.Length-Petal.Length versicolor <data.table[50x3]>
#> 6: Sepal.Length-Petal.Length  virginica <data.table[50x3]>
#> 7:  Sepal.Width-Petal.Length     setosa <data.table[50x3]>
#> 8:  Sepal.Width-Petal.Length versicolor <data.table[50x3]>
#> 9:  Sepal.Width-Petal.Length  virginica <data.table[50x3]>
# Returns a nested data.table where:
# - pairs: combined column names
# - Species: grouping variable
# - data: list column containing data.tables grouped by Species

r2p_nest

# Example 1: Row-to-pairs nesting with column names
r2p_nest(
  mtcars,                     # Input mtcars dataset
  rows2bind = "cyl",          # Column to be used as row values
  by = c("hp", "drat", "wt")  # Columns to be transformed into pairs
)
#>      name                data
#>    <char>              <list>
#> 1:     hp <data.table[32x12]>
#> 2:   drat <data.table[32x12]>
#> 3:     wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names (hp, drat, wt)
# - data: list column containing data.tables with rows grouped by cyl values

# Example 2: Row-to-pairs nesting with numeric indices
r2p_nest(
  mtcars,                     # Input mtcars dataset
  rows2bind = 2,              # Use 2nd column (cyl) as row values
  by = 4:6                    # Use columns 4-6 (hp, drat, wt) for pairs
)
#>      name                data
#>    <char>              <list>
#> 1:     hp <data.table[32x12]>
#> 2:   drat <data.table[32x12]>
#> 3:     wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names from columns 4-6
# - data: list column containing data.tables with rows grouped by cyl values

export_nest

# Example 1: Basic nested data export workflow
# Step 1: Create nested data structure
dt_nest <- w2l_nest(
  data = iris,              # Input iris dataset
  cols2l = 1:2,             # Columns to be nested
  by = "Species"            # Grouping variable
)

# Step 2: Export nested data to files
export_nest(
  nest_dt = dt_nest,        # Input nested data.table
  nest_cols = "data",       # Column containing nested data
  group_cols = c("name", "Species")  # Columns to create directory structure
)
#> [ export_nest ] Using grouping columns: name, Species
#> [ export_nest ] Export complete. 6 file(s) written to: C:\Users\Dell\AppData\Local\Temp\RtmpMJp0L3
# Returns the number of files created
# Creates directory structure: tempdir()/name/Species/data.txt

# Check exported files
list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE          # Search in subdirectories
)
#> [1] "Sepal.Length/setosa/data.txt"     "Sepal.Length/versicolor/data.txt"
#> [3] "Sepal.Length/virginica/data.txt"  "Sepal.Width/setosa/data.txt"     
#> [5] "Sepal.Width/versicolor/data.txt"  "Sepal.Width/virginica/data.txt"
# Returns list of created files and their paths

# Clean up exported files
files <- list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE,         # Search in subdirectories
  full.names = TRUE         # Return full file paths
)
file.remove(files)          # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE

export_list

# Example: Export split data to files

# Step 1: Create split data structure
dt_split <- w2l_split(
  data = iris,              # Input iris dataset
  cols2l = 1:2,             # Columns to be split
  by = "Species"            # Grouping variable
)

# Step 2: Export split data to files
export_list(
  split_dt = dt_split       # Input list of data.tables
)
#> [ export_list ] Export complete. 6 / 6 file(s) written to: C:\Users\Dell\AppData\Local\Temp\RtmpMJp0L3
# Returns the number of files created
# Files are saved in tempdir() with .txt extension

# Check exported files
list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE          # Search in subdirectories
)
#> [1] "Sepal.Length_setosa.txt"     "Sepal.Length_versicolor.txt"
#> [3] "Sepal.Length_virginica.txt"  "Sepal.Width_setosa.txt"     
#> [5] "Sepal.Width_versicolor.txt"  "Sepal.Width_virginica.txt"

# Clean up exported files
files <- list.files(
  path = tempdir(),         # Default export directory
  pattern = "txt",          # File type pattern to search
  recursive = TRUE,         # Search in subdirectories
  full.names = TRUE         # Return full file paths
)
file.remove(files)          # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE

import_csv

# Example: CSV file import demonstrations

# Setup test files
csv_files <- mintyr_example(
  mintyr_examples("csv_test")     # Get example CSV files
)

# Example 1: Import and combine CSV files using data.table
import_csv(
  csv_files,                      # Input CSV file paths
  rbind = TRUE,                   # Combine all files into one data.table
  rbind_label = "_file",          # Column name for file source
  keep_ext = TRUE,                # Include .csv extension in _file column
  full_path = TRUE                # Show complete file paths in _file column
)
#>                                                                                         _file
#>                                                                                        <char>
#> 1: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test1.csv
#> 2: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test1.csv
#> 3: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test1.csv
#> 4: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test2.csv
#> 5: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test2.csv
#> 6: C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test2.csv
#>     col1   col2   col3
#>    <int> <char> <lgcl>
#> 1:     4      d  FALSE
#> 2:     5      f   TRUE
#> 3:     6      e   TRUE
#> 4:    15      o  FALSE
#> 5:    16      p   TRUE
#> 6:    17      q  FALSE

import_xlsx

# Example: Excel file import demonstrations

# Setup test files
xlsx_files <- mintyr_example(
  mintyr_examples("xlsx_test")    # Get example Excel files
)

# Example 1: Import and combine all sheets from all files
import_xlsx(
  xlsx_files,                     # Input Excel file paths
  rbind = TRUE                    # Combine all sheets into one data.table
)
#>     excel_name sheet_name  col1   col2   col3
#>         <char>     <char> <num> <char> <lgcl>
#>  1: xlsx_test1     Sheet1     4      d  FALSE
#>  2: xlsx_test1     Sheet1     5      f   TRUE
#>  3: xlsx_test1     Sheet1     6      e   TRUE
#>  4: xlsx_test1     Sheet2     1      a   TRUE
#>  5: xlsx_test1     Sheet2     2      b  FALSE
#>  6: xlsx_test1     Sheet2     3      c   TRUE
#>  7: xlsx_test2     Sheet1    15      o  FALSE
#>  8: xlsx_test2     Sheet1    16      p   TRUE
#>  9: xlsx_test2     Sheet1    17      q  FALSE
#> 10: xlsx_test2          a     7      g  FALSE
#> 11: xlsx_test2          a     9      h   TRUE
#> 12: xlsx_test2          a     8      i  FALSE
#> 13: xlsx_test2          b    10      J  FALSE
#> 14: xlsx_test2          b    11      K   TRUE
#> 15: xlsx_test2          b    12      L  FALSE

# Example 2: Import specific sheets separately
import_xlsx(
  xlsx_files,                     # Input Excel file paths
  rbind = FALSE,                  # Keep sheets as separate data.tables
  sheet = 2                       # Only import first sheet
)
#> $xlsx_test1_Sheet2
#>     col1   col2   col3
#>    <num> <char> <lgcl>
#> 1:     1      a   TRUE
#> 2:     2      b  FALSE
#> 3:     3      c   TRUE
#> 
#> $xlsx_test2_a
#>     col1   col2   col3
#>    <num> <char> <lgcl>
#> 1:     7      g  FALSE
#> 2:     9      h   TRUE
#> 3:     8      i  FALSE
#> 
#> attr(,"source_files")
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/xlsx_test1.xlsx"
#> [2] "C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/xlsx_test2.xlsx"

export_xlsx

# Example: Excel file export demonstrations
# Example 1: Export a plain data.frame to a single workbook
out_file <- file.path(tempdir(), "test.xlsx")
export_xlsx(
  mtcars,                             # Data to export (no tracking columns)
  path       = out_file,              # Ends in .xlsx -> one workbook
  sheet_name = "test"                 # Worksheet tab name for the single sheet
)
# Clean up the generated file
file.remove(out_file)
#> [1] TRUE

# Example 2: Split into one file per group
out_files <- export_xlsx(
  iris,                               # Data to export
  path      = tempdir(),              # A directory -> one file per file_col value
  file_col  = "Species",              # Column whose values name the output files
  drop_cols = FALSE                   # Keep the Species column in each output file
)
# Clean up the generated files (export_xlsx returns the written paths)
file.remove(out_files)
#> [1] TRUE TRUE TRUE

top_perc

# Example 1: Basic usage with single trait
# This example selects the top 10% of observations based on Petal.Width
# keep_data=TRUE returns both summary statistics and the filtered data
top_perc(iris, 
         perc = 0.1,                # Select top 10%
         trait = c("Petal.Width"),  # Column to analyze
         keep_data = TRUE)          # Return both stats and filtered data
#> $perc_0.1
#> $perc_0.1$stat
#>      variable  N Min Max     Mean Median         SD         SE         CV
#> 1 Petal.Width 17 2.2 2.5 2.335294    2.3 0.09963167 0.02416423 0.04266344
#>   selection
#> 1   top_10%
#> 
#> $perc_0.1$data
#>    Sepal.Length Sepal.Width Petal.Length   Species    variable value
#> 1           6.3         3.3          6.0 virginica Petal.Width   2.5
#> 2           6.5         3.0          5.8 virginica Petal.Width   2.2
#> 3           7.2         3.6          6.1 virginica Petal.Width   2.5
#> 4           5.8         2.8          5.1 virginica Petal.Width   2.4
#> 5           6.4         3.2          5.3 virginica Petal.Width   2.3
#> 6           7.7         3.8          6.7 virginica Petal.Width   2.2
#> 7           7.7         2.6          6.9 virginica Petal.Width   2.3
#> 8           6.9         3.2          5.7 virginica Petal.Width   2.3
#> 9           6.4         2.8          5.6 virginica Petal.Width   2.2
#> 10          7.7         3.0          6.1 virginica Petal.Width   2.3
#> 11          6.3         3.4          5.6 virginica Petal.Width   2.4
#> 12          6.7         3.1          5.6 virginica Petal.Width   2.4
#> 13          6.9         3.1          5.1 virginica Petal.Width   2.3
#> 14          6.8         3.2          5.9 virginica Petal.Width   2.3
#> 15          6.7         3.3          5.7 virginica Petal.Width   2.5
#> 16          6.7         3.0          5.2 virginica Petal.Width   2.3
#> 17          6.2         3.4          5.4 virginica Petal.Width   2.3

# Example 2: Using grouping with 'by' parameter
# This example performs the same analysis but separately for each Species
# Returns nested list with stats and filtered data for each group
top_perc(iris, 
         perc = 0.1,                # Select top 10%
         trait = c("Petal.Width"),  # Column to analyze
         by = "Species")            # Group by Species
#>      Species    variable N Min Max      Mean Median         SD         SE
#> 1     setosa Petal.Width 9 0.4 0.6 0.4333333   0.40 0.07071068 0.02357023
#> 2 versicolor Petal.Width 5 1.6 1.8 1.6600000   1.60 0.08944272 0.04000000
#> 3  virginica Petal.Width 6 2.4 2.5 2.4500000   2.45 0.05477226 0.02236068
#>           CV selection
#> 1 0.16317849   top_10%
#> 2 0.05388116   top_10%
#> 3 0.02235602   top_10%

get_path_info

paths <- c("C:/Users/foo/Documents/report.xlsx",
           "/home/user/.bashrc",
           "relative/path/to/data.csv",
           ".hidden.tar.gz",
           NA_character_)

# Mode B: filename only, extension stripped (default)
get_path_info(paths)
#> [1] "report"      ".bashrc"     "data"        ".hidden.tar" NA

# Mode B: filename only, extension preserved
get_path_info(paths, rm_extension = FALSE)
#> [1] "report.xlsx"    ".bashrc"        "data.csv"       ".hidden.tar.gz"
#> [5] NA

# Mode B: full normalised path, extension stripped
get_path_info(paths, rm_path = FALSE)
#> [1] "Users/foo/Documents/report" "home/user/.bashrc"         
#> [3] "relative/path/to/data"      ".hidden.tar"               
#> [5] NA

# Mode A: extract the 2nd path segment
get_path_info(paths, n = 2)
#> [1] "foo"  "user" "path" NA     NA

# Mode A: extract the last segment with extension stripped (n = -1 linkage)
get_path_info(paths, n = -1, rm_extension = TRUE)
#> [1] "report"      ".bashrc"     "data"        ".hidden.tar" NA

# Mode A: range extraction
get_path_info(paths, n = c(2, 3))
#> [1] "foo/Documents" "user/.bashrc"  "path/to"       NA             
#> [5] NA

format_digits

# Example: Number formatting demonstrations

# Setup test data
dt <- data.table::data.table(
  a = c(0.1234, 0.5678),      # Numeric column 1
  b = c(0.2345, 0.6789),      # Numeric column 2
  c = c("text1", "text2")     # Text column
)

# Example 1: Format all numeric columns
format_digits(
  dt,                         # Input data table
  digits = 2                  # Round to 2 decimal places
)
#>         a      b      c
#>    <char> <char> <char>
#> 1:   0.12   0.23  text1
#> 2:   0.57   0.68  text2

# Example 2: Format specific column as percentage
format_digits(
  dt,                         # Input data table
  cols = c("a"),              # Only format column 'a'
  digits = 2,                 # Round to 2 decimal places
  percentage = TRUE           # Convert to percentage
)
#>         a      b      c
#>    <char>  <num> <char>
#> 1: 12.34% 0.2345  text1
#> 2: 56.78% 0.6789  text2

mintyr_example

# Get path to an example file
mintyr_example("csv_test1.csv")
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpYd2b20/Rinst3a1c2abe1b10/mintyr/extdata/csv_test1.csv"

mintyr_examples

# List all example files
mintyr_examples()
#> [1] "csv_test1.csv"   "csv_test2.csv"   "xlsx_test1.xlsx" "xlsx_test2.xlsx"

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.