% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RandomForest_selected.R
\name{RandomForest_selected}
\alias{RandomForest_selected}
\title{Select Important ASVs/OTUs Using Random Forest}
\source{
Based on public API usage of randomForest and phyloseq packages.
}
\usage{
RandomForest_selected(
  physeq,
  response_var,
  minlib = 5000,
  prunescale = 1e-05,
  ntree = 100,
  n_top_predictors = 100,
  output_csv = NULL,
  na_vars = NULL
)
}
\arguments{
\item{physeq}{A \code{phyloseq} or \code{TreeSummarizedExperiment (TSE)} object containing microbiome data.}

\item{response_var}{A character string specifying the response variable from the sample metadata.}

\item{minlib}{A numeric value specifying the minimum library size for filtering low-abundance taxa. Default is \code{15000}.}

\item{prunescale}{A numeric value specifying the relative abundance threshold for pruning rare OTUs. Default is \code{0.0001}.}

\item{ntree}{An integer specifying the number of trees to grow in the Random Forest model. Default is \code{100}.}

\item{n_top_predictors}{An integer specifying the number of top ASVs/OTUs to select based on feature importance. Default is \code{50}.}

\item{output_csv}{An optional character string specifying the output CSV file name. If \code{NULL}, no file is saved. Default is \code{NULL}.}

\item{na_vars}{A character vector specifying metadata variables to check for missing values (\code{NA}). If \code{NULL}, only \code{response_var} is checked.}
}
\value{
Returns a pruned \code{phyloseq} or \code{TreeSummarizedExperiment (TSE)} object containing only the selected ASVs/OTUs.
If the input is \code{TSE}, the output is converted back to \code{TSE}.
}
\description{
This function selects the most important Amplicon Sequence Variants (ASVs) or Operational Taxonomic Units (OTUs)
based on a Random Forest model. If a \code{TreeSummarizedExperiment (TSE)} is provided, it is first converted to \code{phyloseq}.
The function allows filtering and pruning of taxa before selecting the most important features.
Optionally, the selected ASVs/OTUs can be saved as a CSV file.
}
\examples{
if (requireNamespace("DspikeIn", quietly = TRUE)) {
  data("physeq_16SOTU", package = "DspikeIn")

  # Perform Random Forest feature selection
  rf_physeq <- RandomForest_selected(
    physeq_16SOTU,
    prunescale = 0.00001,
    minlib = 5000,
    ntree = 30,
    n_top_predictors = 30,
    response_var = "Host.genus",
    na_vars = c("Habitat", "Ecoregion.III", "Host.genus", "Diet")
  )
  # Less aggressive pruning (retain rare taxa)
  rf_physeq_relaxed <- RandomForest_selected(
    physeq_16SOTU,
    response_var = "Host.genus",
    minlib = 5000,
    prunescale = 0.00001,
    na_vars = c("Habitat", "Ecoregion.III", "Host.genus", "Diet")
  )

  rf_physeq_strict <- RandomForest_selected(
    physeq_16SOTU,
    response_var = "Host.genus",
    minlib = 20000,
    prunescale = 0.0002,
    ntree = 200,
    n_top_predictors = 30,
    na_vars = c("Habitat", "Ecoregion.III", "Host.genus", "Diet")
  )

  # Load TreeSummarizedExperiment (TSE) object
  tse_16SOTU <- convert_phyloseq_to_tse(physeq_16SOTU)

  # Perform Random Forest feature selection on TSE object
  rf_tse <- RandomForest_selected(
    tse_16SOTU,
    response_var = "Host.genus",
    na_vars = c("Habitat", "Ecoregion.III", "Host.genus", "Diet")
  )
}

}
\seealso{
\code{\link[randomForest]{randomForest}}, \code{\link[phyloseq]{prune_taxa}}
}
