% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/modelPrune.R
\name{modelPrune}
\alias{modelPrune}
\title{Model-Based Predictor Pruning}
\usage{
modelPrune(
  formula,
  data,
  engine = "lm",
  criterion = "vif",
  limit = 5,
  force_in = NULL,
  max_steps = NULL,
  ...
)
}
\arguments{
\item{formula}{A model formula specifying the response and predictors.
May include random effects for mixed models (e.g., \code{y ~ x1 + x2 + (1|group)}).}

\item{data}{A data.frame containing the variables in the formula.}

\item{engine}{Either a character string for built-in engines, or a list defining a custom engine.

\strong{Built-in engines} (character string):
\itemize{
\item \code{"lm"} (default): Linear models via \code{stats::lm()}
\item \code{"glm"}: Generalized linear models via \code{stats::glm()} (requires \code{family} argument)
\item \code{"lme4"}: Mixed models via \code{lme4::lmer()} or \code{lme4::glmer()} (requires lme4 package)
\item \code{"glmmTMB"}: Generalized linear mixed models via \code{glmmTMB::glmmTMB()} (requires glmmTMB package)
}

\strong{Custom engine} (named list with required components):
\itemize{
\item \code{fit}: function(formula, data, ...) that returns a fitted model object
\item \code{diagnostics}: function(model, fixed_effects) that returns a named numeric vector
of diagnostic scores (one per fixed effect, higher values = worse)
\item \code{name} (optional): character string used in error messages (default: "custom")
}}

\item{criterion}{Character string specifying the diagnostic criterion for pruning.
For built-in engines, only \code{"vif"} (Variance Inflation Factor) is supported.
For custom engines, this parameter is ignored (diagnostics are computed by the
engine's \code{diagnostics} function). Default: \code{"vif"}.}

\item{limit}{Numeric scalar. Maximum allowed value for the criterion.
Predictors with diagnostic values exceeding this limit are iteratively removed.
Default: 5 (common VIF threshold).}

\item{force_in}{Character vector of predictor names that must be retained
in the final model. These variables will not be removed during pruning.
Default: NULL.}

\item{max_steps}{Integer. Maximum number of pruning iterations. If NULL (default),
pruning continues until all diagnostics are below the limit or no more
removable predictors remain.}

\item{...}{Additional arguments passed to the modeling function (e.g., \code{family}
for glm/glmer, control parameters for lme4/glmmTMB).}
}
\value{
A data.frame containing only the retained predictors (and response).
The result has the following attributes:
\describe{
\item{selected_vars}{Character vector of retained predictor names}
\item{removed_vars}{Character vector of removed predictor names (in order of removal)}
\item{engine}{Character string indicating which engine was used (for custom engines, this is the engine's \code{name} field)}
\item{criterion}{Character string indicating which criterion was used}
\item{limit}{The threshold value used}
\item{final_model}{The final fitted model object (optional)}
}
}
\description{
\code{modelPrune()} performs iterative removal of fixed-effect predictors based on
model diagnostics (e.g., VIF) until all remaining predictors satisfy a
specified threshold. It supports linear models, generalized linear models,
and mixed models.
}
\details{
\code{modelPrune()} works by:
\enumerate{
\item Parsing the formula to identify fixed-effect predictors
\item Fitting the initial model
\item Computing diagnostics for each fixed-effect predictor
\item Checking feasibility of \code{force_in} constraints
\item Iteratively removing the predictor with the worst diagnostic value
(excluding \code{force_in} variables) until all diagnostics <= \code{limit}
\item Returning the pruned data frame
}

\strong{Random Effects}: For mixed models (lme4, glmmTMB), only fixed-effect
predictors are considered for pruning. Random-effect structure is preserved
exactly as specified in the original formula.

\strong{VIF Computation}: Variance Inflation Factors are computed from the
fixed-effects design matrix. For categorical predictors, VIF represents
the inflation for the entire factor (not individual dummy variables).

\strong{Determinism}: The algorithm is deterministic. Ties in diagnostic values
are broken by removing the predictor that appears last in the formula.

\strong{Force-in Constraints}: If variables in \code{force_in} violate the diagnostic
threshold, the function will error. This ensures that the constraint is
feasible before pruning begins.
}
\examples{
# Linear model with VIF-based pruning
data(mtcars)
pruned <- modelPrune(mpg ~ ., data = mtcars, engine = "lm", limit = 5)
names(pruned)

# Force certain predictors to remain
pruned <- modelPrune(mpg ~ ., data = mtcars, force_in = "drat", limit = 20)

# GLM example (requires family argument)
pruned <- modelPrune(am ~ ., data = mtcars, engine = "glm",
                     family = binomial(), limit = 5)

\dontrun{
# Custom engine example (INLA)
inla_engine <- list(
  name = "inla",
  fit = function(formula, data, ...) {
    inla::inla(formula = formula, data = data,
               family = list(...)$family \%||\% "gaussian",
               control.compute = list(config = TRUE))
  },
  diagnostics = function(model, fixed_effects) {
    scores <- model$summary.fixed[, "sd"]
    names(scores) <- rownames(model$summary.fixed)
    scores[fixed_effects]
  }
)

pruned <- modelPrune(y ~ x1 + x2 + x3, data = df,
                     engine = inla_engine, limit = 0.5)
}

}
\seealso{
\code{\link{corrPrune}} for association-based predictor pruning,
\code{\link{corrSelect}} for exhaustive subset enumeration.
}
