--- title: "Get started" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Get started} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r} library(mintyr) ``` # split_cv ```{r example-split_cv} # Prepare example data: Convert first 3 columns of iris dataset to long format and split dt_split <- w2l_split(data = iris, cols2l = 1:3) # dt_split is now a list containing 3 data tables for Sepal.Length, Sepal.Width, and Petal.Length # Example 1: Single cross-validation (no repeats) split_cv( split_dt = dt_split, # Input list of split data v = 3, # Set 3-fold cross-validation repeats = 1 # Perform cross-validation once (no repeats) ) # Returns a list where each element contains: # - splits: rsample split objects # - id: fold numbers (Fold1, Fold2, Fold3) # - train: training set data # - validate: validation set data # Example 2: Repeated cross-validation split_cv( split_dt = dt_split, # Input list of split data v = 3, # Set 3-fold cross-validation repeats = 2 # Perform cross-validation twice ) # Returns a list where each element contains: # - splits: rsample split objects # - id: repeat numbers (Repeat1, Repeat2) # - id2: fold numbers (Fold1, Fold2, Fold3) # - train: training set data # - validate: validation set data ``` # c2p_nest ```{r example-c2p_nest} # Example data preparation: Define column names for combination col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length") # Example 1: Basic column-to-pairs nesting with custom separator c2p_nest( iris, # Input iris dataset cols2bind = col_names, # Columns to be combined as pairs pairs_n = 2, # Create pairs of 2 columns sep = "&" # Custom separator for pair names ) # Returns a nested data.table where: # - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width") # - data: list column containing data.tables with value1, value2 columns # Example 2: Column-to-pairs nesting with numeric indices and grouping c2p_nest( iris, # Input iris dataset cols2bind = 1:3, # First 3 columns to be combined pairs_n = 2, # Create pairs of 2 columns by = 5 # Group by 5th column (Species) ) # Returns a nested data.table where: # - pairs: combined column names # - Species: grouping variable # - data: list column containing data.tables grouped by Species ``` # r2p_nest ```{r example-r2p_nest} # Example 1: Row-to-pairs nesting with column names r2p_nest( mtcars, # Input mtcars dataset rows2bind = "cyl", # Column to be used as row values by = c("hp", "drat", "wt") # Columns to be transformed into pairs ) # Returns a nested data.table where: # - name: variable names (hp, drat, wt) # - data: list column containing data.tables with rows grouped by cyl values # Example 2: Row-to-pairs nesting with numeric indices r2p_nest( mtcars, # Input mtcars dataset rows2bind = 2, # Use 2nd column (cyl) as row values by = 4:6 # Use columns 4-6 (hp, drat, wt) for pairs ) # Returns a nested data.table where: # - name: variable names from columns 4-6 # - data: list column containing data.tables with rows grouped by cyl values ``` # export_nest ```{r example-export_nest} # Example 1: Basic nested data export workflow # Step 1: Create nested data structure dt_nest <- w2l_nest( data = iris, # Input iris dataset cols2l = 1:2, # Columns to be nested by = "Species" # Grouping variable ) # Step 2: Export nested data to files export_nest( nest_dt = dt_nest, # Input nested data.table nest_col = "data", # Column containing nested data group_cols = c("name", "Species") # Columns to create directory structure ) # Returns the number of files created # Creates directory structure: tempdir()/name/Species/data.txt # Check exported files list.files( path = tempdir(), # Default export directory pattern = "txt", # File type pattern to search recursive = TRUE # Search in subdirectories ) # Returns list of created files and their paths # Clean up exported files files <- list.files( path = tempdir(), # Default export directory pattern = "txt", # File type pattern to search recursive = TRUE, # Search in subdirectories full.names = TRUE # Return full file paths ) file.remove(files) # Remove all exported files ``` # export_list ```{r example-export_list} # Example: Export split data to files # Step 1: Create split data structure dt_split <- w2l_split( data = iris, # Input iris dataset cols2l = 1:2, # Columns to be split by = "Species" # Grouping variable ) # Step 2: Export split data to files export_list( split_dt = dt_split # Input list of data.tables ) # Returns the number of files created # Files are saved in tempdir() with .txt extension # Check exported files list.files( path = tempdir(), # Default export directory pattern = "txt", # File type pattern to search recursive = TRUE # Search in subdirectories ) # Clean up exported files files <- list.files( path = tempdir(), # Default export directory pattern = "txt", # File type pattern to search recursive = TRUE, # Search in subdirectories full.names = TRUE # Return full file paths ) file.remove(files) # Remove all exported files ``` # fires ```{r example-fires} head(fires()) ``` # nedaps ```{r example-nedaps} head(nedaps()) ``` # convert_nest ```{r example-convert_nest} # Example 1: Create nested data structures # Create single nested column df_nest1 <- iris |> dplyr::group_nest(Species) # Group and nest by Species # Create multiple nested columns df_nest2 <- iris |> dplyr::group_nest(Species) |> # Group and nest by Species dplyr::mutate( data2 = purrr::map( # Create second nested column data, dplyr::mutate, c = 2 ) ) # Example 2: Convert nested structures # Convert data frame to data table convert_nest( df_nest1, # Input nested data frame to = "dt" # Convert to data.table ) # Convert specific nested columns convert_nest( df_nest2, # Input nested data frame to = "dt", # Convert to data.table nest_cols = "data" # Only convert 'data' column ) # Example 3: Convert data table to data frame dt_nest <- mintyr::w2l_nest( data = iris, # Input dataset cols2l = 1:2 # Columns to nest ) convert_nest( dt_nest, # Input nested data table to = "df" # Convert to data frame ) ``` # get_path_segment ```{r example-get_path_segment} # Example: Path segment extraction demonstrations # Setup test paths paths <- c( "C:/home/user/documents", # Windows style path "/var/log/system", # Unix system path "/usr/local/bin" # Unix binary path ) # Example 1: Extract first segment get_path_segment( paths, # Input paths 1 # Get first segment ) # Returns: c("home", "var", "usr") # Example 2: Extract second-to-last segment get_path_segment( paths, # Input paths -2 # Get second-to-last segment ) # Returns: c("user", "log", "local") # Example 3: Extract from first to last segment get_path_segment( paths, # Input paths c(1,-1) # Range from first to last ) # Returns full paths without drive letters # Example 4: Extract first three segments get_path_segment( paths, # Input paths c(1,3) # Range from first to third ) # Returns: c("home/user/documents", "var/log/system", "usr/local/bin") # Example 5: Extract last two segments (reverse order) get_path_segment( paths, # Input paths c(-1,-2) # Range from last to second-to-last ) # Returns: c("documents/user", "system/log", "bin/local") # Example 6: Extract first two segments get_path_segment( paths, # Input paths c(1,2) # Range from first to second ) # Returns: c("home/user", "var/log", "usr/local") ``` # format_digits ```{r example-format_digits} # Example: Number formatting demonstrations # Setup test data dt <- data.table::data.table( a = c(0.1234, 0.5678), # Numeric column 1 b = c(0.2345, 0.6789), # Numeric column 2 c = c("text1", "text2") # Text column ) # Example 1: Format all numeric columns format_digits( dt, # Input data table digits = 2 # Round to 2 decimal places ) # Example 2: Format specific column as percentage format_digits( dt, # Input data table cols = c("a"), # Only format column 'a' digits = 2, # Round to 2 decimal places percentage = TRUE # Convert to percentage ) ``` # mintyr_example ```{r example-mintyr_example} # Get path to an example file mintyr_example("csv_test1.csv") ``` # mintyr_examples ```{r example-mintyr_examples} # List all example files mintyr_examples() ``` # import_xlsx ```{r example-import_xlsx} # Example: Excel file import demonstrations # Setup test files xlsx_files <- mintyr_example( mintyr_examples("xlsx_test") # Get example Excel files ) # Example 1: Import and combine all sheets from all files import_xlsx( xlsx_files, # Input Excel file paths rbind = TRUE # Combine all sheets into one data.table ) # Example 2: Import specific sheets separately import_xlsx( xlsx_files, # Input Excel file paths rbind = FALSE, # Keep sheets as separate data.tables sheet = 2 # Only import first sheet ) ``` # import_csv ```{r examples-import_csv} # Example: CSV file import demonstrations # Setup test files csv_files <- mintyr_example( mintyr_examples("csv_test") # Get example CSV files ) # Example 1: Import and combine CSV files using data.table import_csv( csv_files, # Input CSV file paths package = "data.table", # Use data.table for reading rbind = TRUE, # Combine all files into one data.table rbind_label = "_file" # Column name for file source ) # Example 2: Import files separately using arrow import_csv( csv_files, # Input CSV file paths package = "arrow", # Use arrow for reading rbind = FALSE # Keep files as separate data.tables ) ``` # get_filename ```{r example-get_filename} # Example: File path processing demonstrations # Setup test files xlsx_files <- mintyr_example( mintyr_examples("xlsx_test") # Get example Excel files ) # Example 1: Extract filenames without extensions get_filename( xlsx_files, # Input file paths rm_extension = TRUE, # Remove file extensions rm_path = TRUE # Remove directory paths ) # Example 2: Keep file extensions get_filename( xlsx_files, # Input file paths rm_extension = FALSE, # Keep file extensions rm_path = TRUE # Remove directory paths ) # Example 3: Keep full paths without extensions get_filename( xlsx_files, # Input file paths rm_extension = TRUE, # Remove file extensions rm_path = FALSE # Keep directory paths ) ``` # w2l_nest ```{r example-w2l_nest} # Example: Wide to long format nesting demonstrations # Example 1: Basic nesting by group w2l_nest( data = iris, # Input dataset by = "Species" # Group by Species column ) # Example 2: Nest specific columns with numeric indices w2l_nest( data = iris, # Input dataset cols2l = 1:4, # Select first 4 columns to nest by = "Species" # Group by Species column ) # Example 3: Nest specific columns with column names w2l_nest( data = iris, # Input dataset cols2l = c("Sepal.Length", # Select columns by name "Sepal.Width", "Petal.Length"), by = 5 # Group by column index 5 (Species) ) # Returns similar structure to Example 2 ``` # w2l_split ```{r example-w2l_split} # Example: Wide to long format splitting demonstrations # Example 1: Basic splitting by Species w2l_split( data = iris, # Input dataset by = "Species" # Split by Species column ) |> lapply(head) # Show first 6 rows of each split # Example 2: Split specific columns using numeric indices w2l_split( data = iris, # Input dataset cols2l = 1:3, # Select first 3 columns to split by = 5 # Split by column index 5 (Species) ) |> lapply(head) # Show first 6 rows of each split # Example 3: Split specific columns using column names list_res <- w2l_split( data = iris, # Input dataset cols2l = c("Sepal.Length", # Select columns by name "Sepal.Width"), by = "Species" # Split by Species column ) lapply(list_res, head) # Show first 6 rows of each split # Returns similar structure to Example 2 ``` # nest_cv ```{r example-nest_cv} # Example: Cross-validation for nested data.table demonstrations # Setup test data dt_nest <- w2l_nest( data = iris, # Input dataset cols2l = 1:2 # Nest first 2 columns ) # Example 1: Basic 2-fold cross-validation nest_cv( nest_dt = dt_nest, # Input nested data.table v = 2 # Number of folds (2-fold CV) ) # Example 2: Repeated 2-fold cross-validation nest_cv( nest_dt = dt_nest, # Input nested data.table v = 2, # Number of folds (2-fold CV) repeats = 2 # Number of repetitions ) ``` # top_perc ```{r example-top_perc} # Example 1: Basic usage with single trait # This example selects the top 10% of observations based on Petal.Width # keep_data=TRUE returns both summary statistics and the filtered data top_perc(iris, perc = 0.1, # Select top 10% trait = c("Petal.Width"), # Column to analyze keep_data = TRUE) # Return both stats and filtered data # Example 2: Using grouping with 'by' parameter # This example performs the same analysis but separately for each Species # Returns nested list with stats and filtered data for each group top_perc(iris, perc = 0.1, # Select top 10% trait = c("Petal.Width"), # Column to analyze by = "Species") # Group by Species # Example 3: Complex example with multiple percentages and grouping variables # Reshape data from wide to long format for Sepal.Length and Sepal.Width iris |> tidyr::pivot_longer(1:2, names_to = "names", values_to = "values") |> mintyr::top_perc( perc = c(0.1, -0.2), trait = "values", by = c("Species", "names"), type = "mean_sd") ```