% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tune_imp.R
\name{tune_imp}
\alias{tune_imp}
\title{Tune Parameters for Imputation Methods}
\usage{
tune_imp(
  obj,
  parameters,
  .f = NULL,
  rep = 1,
  num_na = 100,
  rowmax = 0.9,
  colmax = 0.9,
  check_sd = FALSE,
  max_iter = 1000,
  .progress = TRUE,
  cores = 1
)
}
\arguments{
\item{obj}{A numeric matrix with \strong{samples in rows} and \strong{features in columns}.}

\item{parameters}{A data.frame specifying parameter combinations to tune, where each column
represents a parameter accepted by \code{.f} (excluding \code{obj}). List columns are supported
for complex parameters. Duplicate rows are automatically removed. When \code{.f = NULL}, the
imputation method is inferred from the column names:
\itemize{
\item \code{k}: K-NN imputation
\item \code{ncp}: PCA imputation
\item \code{k} or \code{ncp} with \code{n_feat} and \code{n_overlap}: sliding window imputation
}}

\item{.f}{Custom function to tune. Must accept \code{obj} as the first argument, accept the arguments in \code{parameters},
and return a matrix with the same dimension as \code{obj} (default = \code{NULL}).}

\item{rep}{Either an integer specifying the number of repetitions for random NA injection, or
a list defining fixed NA positions for each repetition (in which case \code{num_na} is ignored).
The list elements can be one of the following formats:
\itemize{
\item A two-column integer matrix. The first column is the row index, the second column is the column index.
Each row is an missing value.
\item A numeric vector specifying linear locations of NAs.
}}

\item{num_na}{The number of missing values used to estimate prediction quality.}

\item{rowmax}{Number between 0 to 1. NA injection cannot create rows with more missing \% than this number.}

\item{colmax}{Number between 0 to 1. NA injection cannot create cols with more missing \% than this number.}

\item{check_sd}{Check if after NA injections zero variance columns are created or not.}

\item{max_iter}{Maximum number of iterations to attempt finding valid NA positions (default to 1000).}

\item{.progress}{Show progress bar (default = \code{TRUE}).}

\item{cores}{Controls the number of cores to parallelize over for K-NN and sliding window K-NN imputation only.
To setup parallelization for PCA and sliding window PCA imputation, use \code{mirai::daemons()}.}
}
\value{
A \code{tibble::tibble()} with columns from \code{parameters}, plus \code{param_set} (unique parameter set ID),
\code{rep} (repetition index), and \code{result} (a nested tibble containing \code{truth} and \code{estimate}
columns for true and imputed values, respectively).
}
\description{
Tunes hyperparameters for imputation methods such as \code{\link[=slide_imp]{slide_imp()}}, \code{\link[=knn_imp]{knn_imp()}}, \code{\link[=pca_imp]{pca_imp()}},
or user-supplied custom functions by repeated cross-validation.
}
\details{
The function supports tuning for built-in imputation methods ("slide_imp", "knn_imp", "pca_imp")
or custom functions provided via \code{.f}.

When using a custom \code{.f}, the columns in \code{parameters} must correspond to the arguments of \code{.f}
(excluding the \code{obj} argument). The custom function must accept \code{obj} (a numeric matrix) as its
first argument and return a numeric matrix of identical dimensions.

Tuning results can be evaluated using the \code{{yardstick}} package or \code{\link[=compute_metrics]{compute_metrics()}}.
}
\examples{
data(khanmiss1)
obj <- t(khanmiss1)[1:20, sample.int(nrow(khanmiss1), size = 200)]

# Tune full K-NN imputation
parameters <- data.frame(k = c(5, 10))

# With random NA injection
results <- tune_imp(obj, parameters, rep = 1, num_na = 20)

# Compute metrics on results
compute_metrics(results)

# Tune with fixed NA positions (2 repetitions)
# Positions must not be NA in the original `obj`
na_positions <- list(
  matrix(c(1, 2, 3, 1, 1, 1), ncol = 2), # Rows 1-3 in column 1
  matrix(c(2, 3, 4, 2, 2, 2), ncol = 2) # Rows 2-4 in column 2
)
results_fixed <- tune_imp(
  obj,
  data.frame(k = 10),
  rep = na_positions
)

compute_metrics(results_fixed)

# Custom imputation function example, with 2 cores parallelization with `mirai::daemons()`
custom_imp <- function(obj, mean = 0, sd = 1) {
  na_pos <- is.na(obj)
  obj[na_pos] <- rnorm(sum(na_pos), mean = mean, sd = sd)
  obj
}

mirai::daemons(2) # Setup 2 cores for parallelization
parameters_custom <- data.frame(mean = c(0, 0, 1), sd = c(1, 2, 1))
results_custom <- tune_imp(
  obj,
  parameters_custom,
  .f = custom_imp,
  rep = 2,
  num_na = 20
)
mirai::daemons(0)
compute_metrics(results_custom)

}
