% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/polyA_from_dropseq.R
\name{inferPolyASites}
\alias{inferPolyASites}
\title{Infer polyA sites from droplet sequencing data}
\usage{
inferPolyASites(
  genes,
  bam,
  polyA_downstream = 100,
  min_cvrg = 0,
  wdsize = 200,
  by = 1,
  extend_downstream = 0,
  perc_threshold = 0.9,
  parallel = FALSE
)
}
\arguments{
\item{genes}{\code{\link[GenomicRanges:GRangesList-class]{GRangesList}} object containing
annotations of genes for which polyA sites are to be estimated.}

\item{bam}{Path to .bam file containing aligned reads used for polyA site estimation.}

\item{polyA_downstream}{(numeric) How far downstream of a peak in coverage are polyA sites
expected? Somewhat depends on input DNA fragment size. (default: 100). Importantly, this value
should not be larger than half of the window size (wdsize), else polyA sites might be moved
outside of the transcripts, even if they were extended using the extend_downstream parameter.}

\item{min_cvrg}{(numeric) Minimal coverage for peaks to be considered for polyA site estimation
(default: 0).}

\item{wdsize}{(numeric) Window size to estimate sequencing coverage along transcripts (default:
200).}

\item{by}{(numeric) Steps in basepairs in which the sliding window should be moved along
transcripts to estimate smooth coverage (default: 1).}

\item{extend_downstream}{(numeric) To which amount should transcript annotations be extended
downstream when estimating polyA sites (default: 0). A reasonable value (e.g. 100-200 bp)
allows to account for polyA sites that fall a few basepairs downstream of terminal exons.}

\item{perc_threshold}{(numeric) Only sequencing coverage peaks within \code{perc_threshold}
percentile of coverage are considered for polyA site estimation (default: 0.9). Avoids that
small peaks that in coverage are considered, resulting in manby false polyA sites.}

\item{parallel}{(logical) Triggers parallel computing using the
\code{\link[BiocParallel]{BiocParallel-package}} package. This requires that a parallel
back-end was registered prior to executing the function. (default: FALSE).}
}
\value{
A \code{\link[GenomicRanges:GRanges-class]{GRanges}} object containing coordinates of 
  estimated polyadenylation sites.
}
\description{
Infer polyA sites from 10X, Drop-seq or similar 3' enriched sequencing data. Simple function
that looks for peaks in read coverage to estimate potential polyA sites. Default parameters are
chosen because they work reasonably well with the example data, but they should typically be
empirically selected by verifying the output.
}
\examples{
library(GenomicRanges)

# protein-coding exons of genes within chr11 region
data("chr11_genes")
target_genes <- split(chr11_genes, f = chr11_genes$gene_name)

# subset of target genes for quick example
target_genes <- target_genes[18:27]

# bam file containing aligned Drop-seq reads
dropseq_bam <- system.file("extdata", "chr11_k562_dropseq.bam", package = "TAPseq")

# infer polyA sites for all target genes with adjusted parameters. parameter values depend on the
# input data and at this stage it's best to try different settings and check the results
polyA_sites <- inferPolyASites(target_genes, bam = dropseq_bam, polyA_downstream = 50,
                               wdsize = 100, min_cvrg = 1, parallel = TRUE)
}
