% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{groupSimilarSequences}
\alias{groupSimilarSequences}
\title{Create a conversion table for collapsing similar sequences}
\usage{
groupSimilarSequences(
  seqs,
  scores,
  collapseMaxDist = 0,
  collapseMinScore = 0,
  collapseMinRatio = 0,
  verbose = FALSE
)
}
\arguments{
\item{seqs}{Character vector with nucleotide sequences (or pairs of 
sequences concatenated with "_") to be collapsed. The sequences must 
all be of the same length.}

\item{scores}{Numeric vector of "scores" for the sequences. Typically
the total read/UMI count. A higher score will be preferred when 
deciding which sequence to use as the representative for a group of 
collapsed sequences.}

\item{collapseMaxDist}{Numeric scalar defining the tolerance for collapsing 
similar sequences. If the value is in [0, 1), it defines the maximal 
Hamming distance in terms of a fraction of sequence length:
(\code{round(collapseMaxDist * nchar(sequence))}).
A value greater or equal to 1 is rounded and directly used as the maximum
allowed Hamming distance. Note that sequences can only be
collapsed if they are all of the same length. The default value is 0.}

\item{collapseMinScore}{Numeric scalar, indicating the minimum score 
required for a sequence to be considered as a representative for a 
group of similar sequences (i.e., to allow other sequences to be 
collapsed into it). The default value is 0.}

\item{collapseMinRatio}{Numeric scalar. During collapsing of
similar sequences, a low-frequency sequence will be collapsed 
with a higher-frequency sequence only if the ratio between the 
high-frequency and the low-frequency scores is at least this 
high. A value of 0 indicates that no such check is performed.}

\item{verbose}{Logical scalar, whether to print progress messages.}
}
\value{
A data.frame with two columns, containing the input sequences 
and the representatives for the groups resulting from grouping similar
sequences, respectively.
}
\description{
Create a conversion table for collapsing similar sequences
}
\examples{
seqs <- c("AACGTAGCA", "ACCGTAGCA", "AACGGAGCA", "ATCGGAGCA", "TGAGGCATA")
scores <- c(5, 1, 3, 1, 8)
groupSimilarSequences(seqs = seqs, scores = scores, 
                      collapseMaxDist = 1, collapseMinScore = 0, 
                      collapseMinRatio = 0, verbose = FALSE)
                            
}
\author{
Michael Stadler, Charlotte Soneson
}
