% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analysis-functions.R
\name{top_integrations}
\alias{top_integrations}
\title{Sorts and keeps the top n integration sites based on the values
in a given column.}
\usage{
top_integrations(
  x,
  n = 20,
  columns = "fragmentEstimate_sum_RelAbundance",
  keep = "everything",
  key = NULL
)
}
\arguments{
\item{x}{An integration matrix (data frame containing
\code{mandatory_IS_vars()})}

\item{n}{How many integrations should be sliced (in total or
for each group)? Must be numeric
or integer and greater than 0}

\item{columns}{Columns to use for the sorting. If more than a column
is supplied primary ordering is done on the first column,
secondary ordering on all other columns}

\item{keep}{Names of the columns to keep besides \code{mandatory_IS_vars()}
and \code{columns}}

\item{key}{Either \code{NULL} or a character vector of column names to group
by. If not \code{NULL} the input will be grouped and the top fraction will
be extracted from each group.}
}
\value{
Either a data frame with at most n rows or
a data frames with at most n*(number of groups) rows.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
The input data frame will be sorted by the highest values in
the columns specified and the top n rows will be returned as output.
The user can choose to keep additional columns in the output
by passing a vector of column names or passing 2 "shortcuts":
\itemize{
\item \code{keep = "everything"} keeps all columns in the original data frame
\item \code{keep = "nothing"} only keeps the mandatory columns
(\code{mandatory_IS_vars()}) plus the columns in the \code{columns} parameter.
}
}
\section{Required tags}{

The function will explicitly check for the presence of these tags:
\itemize{
\item All columns declared in \code{mandatory_IS_vars()}
}
}

\examples{
smpl <- tibble::tibble(
    chr = c("1", "2", "3", "4", "5", "6"),
    integration_locus = c(14536, 14544, 14512, 14236, 14522, 14566),
    strand = c("+", "+", "-", "+", "-", "+"),
    CompleteAmplificationID = c("ID1", "ID2", "ID1", "ID1", "ID3", "ID2"),
    Value = c(3, 10, 40, 2, 15, 150),
    Value2 = c(456, 87, 87, 9, 64, 96),
    Value3 = c("a", "b", "c", "d", "e", "f")
)
top <- top_integrations(smpl,
    n = 3,
    columns = c("Value", "Value2"),
    keep = "nothing"
)
top_key <- top_integrations(smpl,
    n = 3,
    columns = "Value",
    keep = "Value2",
    key = "CompleteAmplificationID"
)
}
\seealso{
Other Analysis functions: 
\code{\link{CIS_grubbs}()},
\code{\link{HSC_population_size_estimate}()},
\code{\link{compute_abundance}()},
\code{\link{cumulative_is}()},
\code{\link{gene_frequency_fisher}()},
\code{\link{is_sharing}()},
\code{\link{iss_source}()},
\code{\link{sample_statistics}()},
\code{\link{top_targeted_genes}()}
}
\concept{Analysis functions}
