The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
C:40A1p3ea03fe1bb5-started.R
# Prepare example data: Convert first 3 columns of iris dataset to long format and split
dt_split <- w2l_split(data = iris, cols2l = 1:3)
# dt_split is now a list containing 3 data tables for Sepal.Length, Sepal.Width, and Petal.Length
# Example 1: Single cross-validation (no repeats)
split_cv(
split_dt = dt_split, # Input list of split data
v = 3, # Set 3-fold cross-validation
repeats = 1 # Perform cross-validation once (no repeats)
)
#> $Sepal.Length
#> splits id train validate
#> <list> <char> <list> <list>
#> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
#>
#> $Sepal.Width
#> splits id train validate
#> <list> <char> <list> <list>
#> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
#>
#> $Petal.Length
#> splits id train validate
#> <list> <char> <list> <list>
#> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
#> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
#> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data
# Example 2: Repeated cross-validation
split_cv(
split_dt = dt_split, # Input list of split data
v = 3, # Set 3-fold cross-validation
repeats = 2 # Perform cross-validation twice
)
#> $Sepal.Length
#> splits id id2 train
#> <list> <char> <char> <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
#> validate
#> <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#>
#> $Sepal.Width
#> splits id id2 train
#> <list> <char> <char> <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
#> validate
#> <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
#>
#> $Petal.Length
#> splits id id2 train
#> <list> <char> <char> <list>
#> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
#> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
#> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
#> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
#> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
#> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
#> validate
#> <list>
#> 1: <data.table[50x3]>
#> 2: <data.table[50x3]>
#> 3: <data.table[50x3]>
#> 4: <data.table[50x3]>
#> 5: <data.table[50x3]>
#> 6: <data.table[50x3]>
# Returns a list where each element contains:
# - splits: rsample split objects
# - id: repeat numbers (Repeat1, Repeat2)
# - id2: fold numbers (Fold1, Fold2, Fold3)
# - train: training set data
# - validate: validation set data
C:40A1p3ea03fe1bb5-started.R
# Example data preparation: Define column names for combination
col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length")
# Example 1: Basic column-to-pairs nesting with custom separator
c2p_nest(
iris, # Input iris dataset
cols2bind = col_names, # Columns to be combined as pairs
pairs_n = 2, # Create pairs of 2 columns
sep = "&" # Custom separator for pair names
)
#> pairs data
#> <char> <list>
#> 1: Sepal.Length&Sepal.Width <data.table[150x4]>
#> 2: Sepal.Length&Petal.Length <data.table[150x4]>
#> 3: Sepal.Width&Petal.Length <data.table[150x4]>
# Returns a nested data.table where:
# - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width")
# - data: list column containing data.tables with value1, value2 columns
# Example 2: Column-to-pairs nesting with numeric indices and grouping
c2p_nest(
iris, # Input iris dataset
cols2bind = 1:3, # First 3 columns to be combined
pairs_n = 2, # Create pairs of 2 columns
by = 5 # Group by 5th column (Species)
)
#> pairs Species data
#> <char> <fctr> <list>
#> 1: Sepal.Length-Sepal.Width setosa <data.table[50x3]>
#> 2: Sepal.Length-Sepal.Width versicolor <data.table[50x3]>
#> 3: Sepal.Length-Sepal.Width virginica <data.table[50x3]>
#> 4: Sepal.Length-Petal.Length setosa <data.table[50x3]>
#> 5: Sepal.Length-Petal.Length versicolor <data.table[50x3]>
#> 6: Sepal.Length-Petal.Length virginica <data.table[50x3]>
#> 7: Sepal.Width-Petal.Length setosa <data.table[50x3]>
#> 8: Sepal.Width-Petal.Length versicolor <data.table[50x3]>
#> 9: Sepal.Width-Petal.Length virginica <data.table[50x3]>
# Returns a nested data.table where:
# - pairs: combined column names
# - Species: grouping variable
# - data: list column containing data.tables grouped by Species
C:40A1p3ea03fe1bb5-started.R
# Example 1: Row-to-pairs nesting with column names
r2p_nest(
mtcars, # Input mtcars dataset
rows2bind = "cyl", # Column to be used as row values
by = c("hp", "drat", "wt") # Columns to be transformed into pairs
)
#> name data
#> <fctr> <list>
#> 1: hp <data.table[32x12]>
#> 2: drat <data.table[32x12]>
#> 3: wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names (hp, drat, wt)
# - data: list column containing data.tables with rows grouped by cyl values
# Example 2: Row-to-pairs nesting with numeric indices
r2p_nest(
mtcars, # Input mtcars dataset
rows2bind = 2, # Use 2nd column (cyl) as row values
by = 4:6 # Use columns 4-6 (hp, drat, wt) for pairs
)
#> name data
#> <fctr> <list>
#> 1: hp <data.table[32x12]>
#> 2: drat <data.table[32x12]>
#> 3: wt <data.table[32x12]>
# Returns a nested data.table where:
# - name: variable names from columns 4-6
# - data: list column containing data.tables with rows grouped by cyl values
C:40A1p3ea03fe1bb5-started.R
# Example 1: Basic nested data export workflow
# Step 1: Create nested data structure
dt_nest <- w2l_nest(
data = iris, # Input iris dataset
cols2l = 1:2, # Columns to be nested
by = "Species" # Grouping variable
)
# Step 2: Export nested data to files
export_nest(
nest_dt = dt_nest, # Input nested data.table
nest_col = "data", # Column containing nested data
group_cols = c("name", "Species") # Columns to create directory structure
)
#> [1] 6
# Returns the number of files created
# Creates directory structure: tempdir()/name/Species/data.txt
# Check exported files
list.files(
path = tempdir(), # Default export directory
pattern = "txt", # File type pattern to search
recursive = TRUE # Search in subdirectories
)
#> [1] "Sepal.Length/setosa/data.txt" "Sepal.Length/versicolor/data.txt"
#> [3] "Sepal.Length/virginica/data.txt" "Sepal.Width/setosa/data.txt"
#> [5] "Sepal.Width/versicolor/data.txt" "Sepal.Width/virginica/data.txt"
# Returns list of created files and their paths
# Clean up exported files
files <- list.files(
path = tempdir(), # Default export directory
pattern = "txt", # File type pattern to search
recursive = TRUE, # Search in subdirectories
full.names = TRUE # Return full file paths
)
file.remove(files) # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE
C:40A1p3ea03fe1bb5-started.R
# Example: Export split data to files
# Step 1: Create split data structure
dt_split <- w2l_split(
data = iris, # Input iris dataset
cols2l = 1:2, # Columns to be split
by = "Species" # Grouping variable
)
# Step 2: Export split data to files
export_list(
split_dt = dt_split # Input list of data.tables
)
#> [1] 6
# Returns the number of files created
# Files are saved in tempdir() with .txt extension
# Check exported files
list.files(
path = tempdir(), # Default export directory
pattern = "txt", # File type pattern to search
recursive = TRUE # Search in subdirectories
)
#> [1] "Sepal.Length_setosa.txt" "Sepal.Length_versicolor.txt"
#> [3] "Sepal.Length_virginica.txt" "Sepal.Width_setosa.txt"
#> [5] "Sepal.Width_versicolor.txt" "Sepal.Width_virginica.txt"
# Clean up exported files
files <- list.files(
path = tempdir(), # Default export directory
pattern = "txt", # File type pattern to search
recursive = TRUE, # Search in subdirectories
full.names = TRUE # Return full file paths
)
file.remove(files) # Remove all exported files
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE
C:40A1p3ea03fe1bb5-started.R
head(fires())
#> Location Tag Date Entry Exit Ent Wt Ext Wt Consumed Weight
#> <int> <int> <char> <char> <char> <num> <num> <num> <num>
#> 1: 101 35877 2024-10-07 14:15:39 14:18:02 0.678 0.632 0.046 67.6
#> 2: 101 35873 2024-10-07 14:18:03 14:23:05 0.632 0.384 0.248 60.8
#> 3: 101 35878 2024-10-07 14:23:15 14:28:45 0.670 0.469 0.201 70.8
#> 4: 101 35855 2024-10-07 14:29:05 14:34:29 0.755 0.634 0.121 51.2
#> 5: 101 35877 2024-10-07 14:34:30 14:34:37 0.634 0.634 0.000 0.0
#> 6: 101 35853 2024-10-07 14:34:38 14:36:26 0.634 0.634 0.000 88.6
#> Topup Amount
#> <num>
#> 1: 0.286
#> 2: 0.000
#> 3: 0.286
#> 4: 0.286
#> 5: 0.000
#> 6: 0.000
C:40A1p3ea03fe1bb5-started.R
head(nedaps())
#> animal_number lifenumber responder location visit_time duration
#> <int> <lgcl> <int> <int> <POSc> <int>
#> 1: 10115497 NA 15497 101 2024-09-06 20:22:51 3
#> 2: 10115967 NA 15967 101 2024-09-06 20:22:54 65
#> 3: 10115983 NA 15983 101 2024-09-06 20:23:59 2
#> 4: 10115967 NA 15967 101 2024-09-06 20:24:01 11
#> 5: 10115983 NA 15983 101 2024-09-06 20:24:12 2
#> 6: 10115967 NA 15967 101 2024-09-06 20:24:14 33
#> state weight feed_intake
#> <int> <int> <int>
#> 1: 0 46500 0
#> 2: 0 22000 17
#> 3: 0 33000 0
#> 4: 0 33500 0
#> 5: 0 35500 0
#> 6: 0 31000 0
C:40A1p3ea03fe1bb5-started.R
# Example 1: Create nested data structures
# Create single nested column
df_nest1 <- iris |>
dplyr::group_nest(Species) # Group and nest by Species
# Create multiple nested columns
df_nest2 <- iris |>
dplyr::group_nest(Species) |> # Group and nest by Species
dplyr::mutate(
data2 = purrr::map( # Create second nested column
data,
dplyr::mutate,
c = 2
)
)
# Example 2: Convert nested structures
# Convert data frame to data table
convert_nest(
df_nest1, # Input nested data frame
to = "dt" # Convert to data.table
)
#> Species data
#> <fctr> <list>
#> 1: setosa <data.table[50x4]>
#> 2: versicolor <data.table[50x4]>
#> 3: virginica <data.table[50x4]>
# Convert specific nested columns
convert_nest(
df_nest2, # Input nested data frame
to = "dt", # Convert to data.table
nest_cols = "data" # Only convert 'data' column
)
#> Species data data2
#> <fctr> <list> <list>
#> 1: setosa <data.table[50x4]> <tbl_df[50x5]>
#> 2: versicolor <data.table[50x4]> <tbl_df[50x5]>
#> 3: virginica <data.table[50x4]> <tbl_df[50x5]>
# Example 3: Convert data table to data frame
dt_nest <- mintyr::w2l_nest(
data = iris, # Input dataset
cols2l = 1:2 # Columns to nest
)
convert_nest(
dt_nest, # Input nested data table
to = "df" # Convert to data frame
)
#> # A tibble: 2 × 2
#> name data
#> <fct> <list>
#> 1 Sepal.Length <tibble [150 × 4]>
#> 2 Sepal.Width <tibble [150 × 4]>
C:40A1p3ea03fe1bb5-started.R
# Example: Path segment extraction demonstrations
# Setup test paths
paths <- c(
"C:/home/user/documents", # Windows style path
"/var/log/system", # Unix system path
"/usr/local/bin" # Unix binary path
)
# Example 1: Extract first segment
get_path_segment(
paths, # Input paths
1 # Get first segment
)
#> [1] "home" "var" "usr"
# Returns: c("home", "var", "usr")
# Example 2: Extract second-to-last segment
get_path_segment(
paths, # Input paths
-2 # Get second-to-last segment
)
#> [1] "user" "log" "local"
# Returns: c("user", "log", "local")
# Example 3: Extract from first to last segment
get_path_segment(
paths, # Input paths
c(1,-1) # Range from first to last
)
#> [1] "home/user/documents" "var/log/system" "usr/local/bin"
# Returns full paths without drive letters
# Example 4: Extract first three segments
get_path_segment(
paths, # Input paths
c(1,3) # Range from first to third
)
#> [1] "home/user/documents" "var/log/system" "usr/local/bin"
# Returns: c("home/user/documents", "var/log/system", "usr/local/bin")
# Example 5: Extract last two segments (reverse order)
get_path_segment(
paths, # Input paths
c(-1,-2) # Range from last to second-to-last
)
#> [1] "user/documents" "log/system" "local/bin"
# Returns: c("documents/user", "system/log", "bin/local")
# Example 6: Extract first two segments
get_path_segment(
paths, # Input paths
c(1,2) # Range from first to second
)
#> [1] "home/user" "var/log" "usr/local"
# Returns: c("home/user", "var/log", "usr/local")
C:40A1p3ea03fe1bb5-started.R
# Example: Number formatting demonstrations
# Setup test data
dt <- data.table::data.table(
a = c(0.1234, 0.5678), # Numeric column 1
b = c(0.2345, 0.6789), # Numeric column 2
c = c("text1", "text2") # Text column
)
# Example 1: Format all numeric columns
format_digits(
dt, # Input data table
digits = 2 # Round to 2 decimal places
)
#> a b c
#> <char> <char> <char>
#> 1: 0.12 0.23 text1
#> 2: 0.57 0.68 text2
# Example 2: Format specific column as percentage
format_digits(
dt, # Input data table
cols = c("a"), # Only format column 'a'
digits = 2, # Round to 2 decimal places
percentage = TRUE # Convert to percentage
)
#> a b c
#> <char> <num> <char>
#> 1: 12.34% 0.2345 text1
#> 2: 56.78% 0.6789 text2
C:40A1p3ea03fe1bb5-started.R
# Get path to an example file
mintyr_example("csv_test1.csv")
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/csv_test1.csv"
C:40A1p3ea03fe1bb5-started.R
# List all example files
mintyr_examples()
#> [1] "csv_test1.csv" "csv_test2.csv" "xlsx_test1.xlsx" "xlsx_test2.xlsx"
C:40A1p3ea03fe1bb5-started.R
# Example: Excel file import demonstrations
# Setup test files
xlsx_files <- mintyr_example(
mintyr_examples("xlsx_test") # Get example Excel files
)
# Example 1: Import and combine all sheets from all files
import_xlsx(
xlsx_files, # Input Excel file paths
rbind = TRUE # Combine all sheets into one data.table
)
#> excel_name sheet_name col1 col2 col3
#> <char> <char> <num> <char> <lgcl>
#> 1: xlsx_test1 Sheet1 4 d FALSE
#> 2: xlsx_test1 Sheet1 5 f TRUE
#> 3: xlsx_test1 Sheet1 6 e TRUE
#> 4: xlsx_test1 Sheet2 1 a TRUE
#> 5: xlsx_test1 Sheet2 2 b FALSE
#> 6: xlsx_test1 Sheet2 3 c TRUE
#> 7: xlsx_test2 Sheet1 15 o FALSE
#> 8: xlsx_test2 Sheet1 16 p TRUE
#> 9: xlsx_test2 Sheet1 17 q FALSE
#> 10: xlsx_test2 a 7 g FALSE
#> 11: xlsx_test2 a 9 h TRUE
#> 12: xlsx_test2 a 8 i FALSE
#> 13: xlsx_test2 b 10 J FALSE
#> 14: xlsx_test2 b 11 K TRUE
#> 15: xlsx_test2 b 12 L FALSE
# Example 2: Import specific sheets separately
import_xlsx(
xlsx_files, # Input Excel file paths
rbind = FALSE, # Keep sheets as separate data.tables
sheet = 2 # Only import first sheet
)
#> $xlsx_test1_Sheet2
#> col1 col2 col3
#> <num> <char> <lgcl>
#> 1: 1 a TRUE
#> 2: 2 b FALSE
#> 3: 3 c TRUE
#>
#> $xlsx_test2_a
#> col1 col2 col3
#> <num> <char> <lgcl>
#> 1: 7 g FALSE
#> 2: 9 h TRUE
#> 3: 8 i FALSE
C:40A1p3ea03fe1bb5-started.R
# Example: CSV file import demonstrations
# Setup test files
csv_files <- mintyr_example(
mintyr_examples("csv_test") # Get example CSV files
)
# Example 1: Import and combine CSV files using data.table
import_csv(
csv_files, # Input CSV file paths
package = "data.table", # Use data.table for reading
rbind = TRUE, # Combine all files into one data.table
rbind_label = "_file" # Column name for file source
)
#> _file col1 col2 col3
#> <char> <int> <char> <lgcl>
#> 1: csv_test1 4 d FALSE
#> 2: csv_test1 5 f TRUE
#> 3: csv_test1 6 e TRUE
#> 4: csv_test2 15 o FALSE
#> 5: csv_test2 16 p TRUE
#> 6: csv_test2 17 q FALSE
# Example 2: Import files separately using arrow
import_csv(
csv_files, # Input CSV file paths
package = "arrow", # Use arrow for reading
rbind = FALSE # Keep files as separate data.tables
)
#> $csv_test1
#> # A tibble: 3 × 3
#> col1 col2 col3
#> <int> <chr> <lgl>
#> 1 4 d FALSE
#> 2 5 f TRUE
#> 3 6 e TRUE
#>
#> $csv_test2
#> # A tibble: 3 × 3
#> col1 col2 col3
#> <int> <chr> <lgl>
#> 1 15 o FALSE
#> 2 16 p TRUE
#> 3 17 q FALSE
C:40A1p3ea03fe1bb5-started.R
# Example: File path processing demonstrations
# Setup test files
xlsx_files <- mintyr_example(
mintyr_examples("xlsx_test") # Get example Excel files
)
# Example 1: Extract filenames without extensions
get_filename(
xlsx_files, # Input file paths
rm_extension = TRUE, # Remove file extensions
rm_path = TRUE # Remove directory paths
)
#> [1] "xlsx_test1" "xlsx_test2"
# Example 2: Keep file extensions
get_filename(
xlsx_files, # Input file paths
rm_extension = FALSE, # Keep file extensions
rm_path = TRUE # Remove directory paths
)
#> [1] "xlsx_test1.xlsx" "xlsx_test2.xlsx"
# Example 3: Keep full paths without extensions
get_filename(
xlsx_files, # Input file paths
rm_extension = TRUE, # Remove file extensions
rm_path = FALSE # Keep directory paths
)
#> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/xlsx_test1"
#> [2] "C:/Users/Dell/AppData/Local/Temp/RtmpG40A1p/Rinst3ea045257f3c/mintyr/extdata/xlsx_test2"
C:40A1p3ea03fe1bb5-started.R
# Example: Wide to long format nesting demonstrations
# Example 1: Basic nesting by group
w2l_nest(
data = iris, # Input dataset
by = "Species" # Group by Species column
)
#> Species data
#> <fctr> <list>
#> 1: setosa <data.table[50x4]>
#> 2: versicolor <data.table[50x4]>
#> 3: virginica <data.table[50x4]>
# Example 2: Nest specific columns with numeric indices
w2l_nest(
data = iris, # Input dataset
cols2l = 1:4, # Select first 4 columns to nest
by = "Species" # Group by Species column
)
#> name Species data
#> <fctr> <fctr> <list>
#> 1: Sepal.Length setosa <data.table[50x1]>
#> 2: Sepal.Length versicolor <data.table[50x1]>
#> 3: Sepal.Length virginica <data.table[50x1]>
#> 4: Sepal.Width setosa <data.table[50x1]>
#> 5: Sepal.Width versicolor <data.table[50x1]>
#> 6: Sepal.Width virginica <data.table[50x1]>
#> 7: Petal.Length setosa <data.table[50x1]>
#> 8: Petal.Length versicolor <data.table[50x1]>
#> 9: Petal.Length virginica <data.table[50x1]>
#> 10: Petal.Width setosa <data.table[50x1]>
#> 11: Petal.Width versicolor <data.table[50x1]>
#> 12: Petal.Width virginica <data.table[50x1]>
# Example 3: Nest specific columns with column names
w2l_nest(
data = iris, # Input dataset
cols2l = c("Sepal.Length", # Select columns by name
"Sepal.Width",
"Petal.Length"),
by = 5 # Group by column index 5 (Species)
)
#> name Species data
#> <fctr> <fctr> <list>
#> 1: Sepal.Length setosa <data.table[50x2]>
#> 2: Sepal.Length versicolor <data.table[50x2]>
#> 3: Sepal.Length virginica <data.table[50x2]>
#> 4: Sepal.Width setosa <data.table[50x2]>
#> 5: Sepal.Width versicolor <data.table[50x2]>
#> 6: Sepal.Width virginica <data.table[50x2]>
#> 7: Petal.Length setosa <data.table[50x2]>
#> 8: Petal.Length versicolor <data.table[50x2]>
#> 9: Petal.Length virginica <data.table[50x2]>
# Returns similar structure to Example 2
C:40A1p3ea03fe1bb5-started.R
# Example: Wide to long format splitting demonstrations
# Example 1: Basic splitting by Species
w2l_split(
data = iris, # Input dataset
by = "Species" # Split by Species column
) |>
lapply(head) # Show first 6 rows of each split
#> $setosa
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <num> <num> <num> <num>
#> 1: 5.1 3.5 1.4 0.2
#> 2: 4.9 3.0 1.4 0.2
#> 3: 4.7 3.2 1.3 0.2
#> 4: 4.6 3.1 1.5 0.2
#> 5: 5.0 3.6 1.4 0.2
#> 6: 5.4 3.9 1.7 0.4
#>
#> $versicolor
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <num> <num> <num> <num>
#> 1: 7.0 3.2 4.7 1.4
#> 2: 6.4 3.2 4.5 1.5
#> 3: 6.9 3.1 4.9 1.5
#> 4: 5.5 2.3 4.0 1.3
#> 5: 6.5 2.8 4.6 1.5
#> 6: 5.7 2.8 4.5 1.3
#>
#> $virginica
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <num> <num> <num> <num>
#> 1: 6.3 3.3 6.0 2.5
#> 2: 5.8 2.7 5.1 1.9
#> 3: 7.1 3.0 5.9 2.1
#> 4: 6.3 2.9 5.6 1.8
#> 5: 6.5 3.0 5.8 2.2
#> 6: 7.6 3.0 6.6 2.1
# Example 2: Split specific columns using numeric indices
w2l_split(
data = iris, # Input dataset
cols2l = 1:3, # Select first 3 columns to split
by = 5 # Split by column index 5 (Species)
) |>
lapply(head) # Show first 6 rows of each split
#> $Sepal.Length_setosa
#> Petal.Width value
#> <num> <num>
#> 1: 0.2 5.1
#> 2: 0.2 4.9
#> 3: 0.2 4.7
#> 4: 0.2 4.6
#> 5: 0.2 5.0
#> 6: 0.4 5.4
#>
#> $Sepal.Length_versicolor
#> Petal.Width value
#> <num> <num>
#> 1: 1.4 7.0
#> 2: 1.5 6.4
#> 3: 1.5 6.9
#> 4: 1.3 5.5
#> 5: 1.5 6.5
#> 6: 1.3 5.7
#>
#> $Sepal.Length_virginica
#> Petal.Width value
#> <num> <num>
#> 1: 2.5 6.3
#> 2: 1.9 5.8
#> 3: 2.1 7.1
#> 4: 1.8 6.3
#> 5: 2.2 6.5
#> 6: 2.1 7.6
#>
#> $Sepal.Width_setosa
#> Petal.Width value
#> <num> <num>
#> 1: 0.2 3.5
#> 2: 0.2 3.0
#> 3: 0.2 3.2
#> 4: 0.2 3.1
#> 5: 0.2 3.6
#> 6: 0.4 3.9
#>
#> $Sepal.Width_versicolor
#> Petal.Width value
#> <num> <num>
#> 1: 1.4 3.2
#> 2: 1.5 3.2
#> 3: 1.5 3.1
#> 4: 1.3 2.3
#> 5: 1.5 2.8
#> 6: 1.3 2.8
#>
#> $Sepal.Width_virginica
#> Petal.Width value
#> <num> <num>
#> 1: 2.5 3.3
#> 2: 1.9 2.7
#> 3: 2.1 3.0
#> 4: 1.8 2.9
#> 5: 2.2 3.0
#> 6: 2.1 3.0
#>
#> $Petal.Length_setosa
#> Petal.Width value
#> <num> <num>
#> 1: 0.2 1.4
#> 2: 0.2 1.4
#> 3: 0.2 1.3
#> 4: 0.2 1.5
#> 5: 0.2 1.4
#> 6: 0.4 1.7
#>
#> $Petal.Length_versicolor
#> Petal.Width value
#> <num> <num>
#> 1: 1.4 4.7
#> 2: 1.5 4.5
#> 3: 1.5 4.9
#> 4: 1.3 4.0
#> 5: 1.5 4.6
#> 6: 1.3 4.5
#>
#> $Petal.Length_virginica
#> Petal.Width value
#> <num> <num>
#> 1: 2.5 6.0
#> 2: 1.9 5.1
#> 3: 2.1 5.9
#> 4: 1.8 5.6
#> 5: 2.2 5.8
#> 6: 2.1 6.6
# Example 3: Split specific columns using column names
list_res <- w2l_split(
data = iris, # Input dataset
cols2l = c("Sepal.Length", # Select columns by name
"Sepal.Width"),
by = "Species" # Split by Species column
)
lapply(list_res, head) # Show first 6 rows of each split
#> $Sepal.Length_setosa
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 1.4 0.2 5.1
#> 2: 1.4 0.2 4.9
#> 3: 1.3 0.2 4.7
#> 4: 1.5 0.2 4.6
#> 5: 1.4 0.2 5.0
#> 6: 1.7 0.4 5.4
#>
#> $Sepal.Length_versicolor
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 4.7 1.4 7.0
#> 2: 4.5 1.5 6.4
#> 3: 4.9 1.5 6.9
#> 4: 4.0 1.3 5.5
#> 5: 4.6 1.5 6.5
#> 6: 4.5 1.3 5.7
#>
#> $Sepal.Length_virginica
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 6.0 2.5 6.3
#> 2: 5.1 1.9 5.8
#> 3: 5.9 2.1 7.1
#> 4: 5.6 1.8 6.3
#> 5: 5.8 2.2 6.5
#> 6: 6.6 2.1 7.6
#>
#> $Sepal.Width_setosa
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 1.4 0.2 3.5
#> 2: 1.4 0.2 3.0
#> 3: 1.3 0.2 3.2
#> 4: 1.5 0.2 3.1
#> 5: 1.4 0.2 3.6
#> 6: 1.7 0.4 3.9
#>
#> $Sepal.Width_versicolor
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 4.7 1.4 3.2
#> 2: 4.5 1.5 3.2
#> 3: 4.9 1.5 3.1
#> 4: 4.0 1.3 2.3
#> 5: 4.6 1.5 2.8
#> 6: 4.5 1.3 2.8
#>
#> $Sepal.Width_virginica
#> Petal.Length Petal.Width value
#> <num> <num> <num>
#> 1: 6.0 2.5 3.3
#> 2: 5.1 1.9 2.7
#> 3: 5.9 2.1 3.0
#> 4: 5.6 1.8 2.9
#> 5: 5.8 2.2 3.0
#> 6: 6.6 2.1 3.0
# Returns similar structure to Example 2
C:40A1p3ea03fe1bb5-started.R
# Example: Cross-validation for nested data.table demonstrations
# Setup test data
dt_nest <- w2l_nest(
data = iris, # Input dataset
cols2l = 1:2 # Nest first 2 columns
)
# Example 1: Basic 2-fold cross-validation
nest_cv(
nest_dt = dt_nest, # Input nested data.table
v = 2 # Number of folds (2-fold CV)
)
#> name splits id train
#> <fctr> <list> <char> <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]> Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]> Fold2 <data.table[75x4]>
#> 3: Sepal.Width <vfold_split[75x75x150x4]> Fold1 <data.table[75x4]>
#> 4: Sepal.Width <vfold_split[75x75x150x4]> Fold2 <data.table[75x4]>
#> validate
#> <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>
# Example 2: Repeated 2-fold cross-validation
nest_cv(
nest_dt = dt_nest, # Input nested data.table
v = 2, # Number of folds (2-fold CV)
repeats = 2 # Number of repetitions
)
#> name splits id id2 train
#> <fctr> <list> <char> <char> <list>
#> 1: Sepal.Length <vfold_split[75x75x150x4]> Repeat1 Fold1 <data.table[75x4]>
#> 2: Sepal.Length <vfold_split[75x75x150x4]> Repeat1 Fold2 <data.table[75x4]>
#> 3: Sepal.Length <vfold_split[75x75x150x4]> Repeat2 Fold1 <data.table[75x4]>
#> 4: Sepal.Length <vfold_split[75x75x150x4]> Repeat2 Fold2 <data.table[75x4]>
#> 5: Sepal.Width <vfold_split[75x75x150x4]> Repeat1 Fold1 <data.table[75x4]>
#> 6: Sepal.Width <vfold_split[75x75x150x4]> Repeat1 Fold2 <data.table[75x4]>
#> 7: Sepal.Width <vfold_split[75x75x150x4]> Repeat2 Fold1 <data.table[75x4]>
#> 8: Sepal.Width <vfold_split[75x75x150x4]> Repeat2 Fold2 <data.table[75x4]>
#> validate
#> <list>
#> 1: <data.table[75x4]>
#> 2: <data.table[75x4]>
#> 3: <data.table[75x4]>
#> 4: <data.table[75x4]>
#> 5: <data.table[75x4]>
#> 6: <data.table[75x4]>
#> 7: <data.table[75x4]>
#> 8: <data.table[75x4]>
C:40A1p3ea03fe1bb5-started.R
# Example 1: Basic usage with single trait
# This example selects the top 10% of observations based on Petal.Width
# keep_data=TRUE returns both summary statistics and the filtered data
top_perc(iris,
perc = 0.1, # Select top 10%
trait = c("Petal.Width"), # Column to analyze
keep_data = TRUE) # Return both stats and filtered data
#> $Petal.Width_0.1
#> $Petal.Width_0.1$stat
#> # A tibble: 1 × 5
#> variable n mean sd top_perc
#> <fct> <dbl> <dbl> <dbl> <chr>
#> 1 Petal.Width 17 2.34 0.1 10%
#>
#> $Petal.Width_0.1$data
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 6.3 3.3 6.0 2.5 virginica
#> 2 6.5 3.0 5.8 2.2 virginica
#> 3 7.2 3.6 6.1 2.5 virginica
#> 4 5.8 2.8 5.1 2.4 virginica
#> 5 6.4 3.2 5.3 2.3 virginica
#> 6 7.7 3.8 6.7 2.2 virginica
#> 7 7.7 2.6 6.9 2.3 virginica
#> 8 6.9 3.2 5.7 2.3 virginica
#> 9 6.4 2.8 5.6 2.2 virginica
#> 10 7.7 3.0 6.1 2.3 virginica
#> 11 6.3 3.4 5.6 2.4 virginica
#> 12 6.7 3.1 5.6 2.4 virginica
#> 13 6.9 3.1 5.1 2.3 virginica
#> 14 6.8 3.2 5.9 2.3 virginica
#> 15 6.7 3.3 5.7 2.5 virginica
#> 16 6.7 3.0 5.2 2.3 virginica
#> 17 6.2 3.4 5.4 2.3 virginica
# Example 2: Using grouping with 'by' parameter
# This example performs the same analysis but separately for each Species
# Returns nested list with stats and filtered data for each group
top_perc(iris,
perc = 0.1, # Select top 10%
trait = c("Petal.Width"), # Column to analyze
by = "Species") # Group by Species
#> # A tibble: 3 × 6
#> Species variable n mean sd top_perc
#> <fct> <fct> <dbl> <dbl> <dbl> <chr>
#> 1 setosa Petal.Width 9 0.433 0.071 10%
#> 2 versicolor Petal.Width 5 1.66 0.089 10%
#> 3 virginica Petal.Width 6 2.45 0.055 10%
# Example 3: Complex example with multiple percentages and grouping variables
# Reshape data from wide to long format for Sepal.Length and Sepal.Width
iris |>
tidyr::pivot_longer(1:2,
names_to = "names",
values_to = "values") |>
mintyr::top_perc(
perc = c(0.1, -0.2),
trait = "values",
by = c("Species", "names"),
type = "mean_sd")
#> # A tibble: 12 × 7
#> Species names variable n mean sd top_perc
#> <fct> <chr> <fct> <dbl> <dbl> <dbl> <chr>
#> 1 setosa Sepal.Length values 5 5.64 0.134 10%
#> 2 setosa Sepal.Width values 6 4.08 0.194 10%
#> 3 versicolor Sepal.Length values 6 6.8 0.126 10%
#> 4 versicolor Sepal.Width values 5 3.26 0.089 10%
#> 5 virginica Sepal.Length values 5 7.74 0.089 10%
#> 6 virginica Sepal.Width values 5 3.6 0.2 10%
#> 7 setosa Sepal.Length values 11 4.53 0.135 -20%
#> 8 setosa Sepal.Width values 12 2.97 0.219 -20%
#> 9 versicolor Sepal.Length values 11 5.28 0.244 -20%
#> 10 versicolor Sepal.Width values 13 2.35 0.151 -20%
#> 11 virginica Sepal.Length values 11 5.79 0.336 -20%
#> 12 virginica Sepal.Width values 11 2.56 0.15 -20%
C:40A1p3ea03fe1bb5-started.R
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.