% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TranscriptionDataSet-generics.R,
%   R/TranscriptionDataSet-methods.R
\docType{methods}
\name{estimateGapDistance}
\alias{estimateGapDistance}
\alias{estimateGapDistance,TranscriptionDataSet,GRanges-method}
\title{estimateGapDistance}
\usage{
estimateGapDistance(
  object,
  annot,
  coverage.cutoff,
  filter.annot = TRUE,
  fpkm.quantile = 0.25,
  gap.dist.range = seq(from = 0, to = 10000, by = 100)
)

\S4method{estimateGapDistance}{TranscriptionDataSet,GRanges}(
  object,
  annot,
  coverage.cutoff,
  filter.annot = TRUE,
  fpkm.quantile = 0.25,
  gap.dist.range = seq(from = 0, to = 10000, by = 100)
)
}
\arguments{
\item{object}{A \code{\link{TranscriptionDataSet}} object.}

\item{annot}{\code{\link[GenomicRanges]{GRanges}}. Reference annotations.}

\item{coverage.cutoff}{\code{Numeric}. A cutoff value to discard regions with
the low fragments coverage, representing expression noise. By default,
the value stored in the \code{coverageCutoff} slot of the supplied
\code{TranscriptionDataSet} object is used. The optimal cutoff value can
be calculated by \code{\link{estimateBackground}} function call.}

\item{filter.annot}{\code{Logical}. Whether to filter out lowly expressed
annotations, before estimating error rates. Default: TRUE.}

\item{fpkm.quantile}{\code{Numeric}. A number in a range (0, 1). A cutoff value
used for filtering lowly expressed annotations. The value corresponds to
the FPKM quantile estimated for the supplied annotations. Default: 0.25.}

\item{gap.dist.range}{A numeric vector specifying a range of gap distances
to test. By default, the range is from 0 to 10000 with a step of 100.}
}
\value{
The slot \code{gapDistanceTest} of the provided
    \code{TranscriptionDataSet} object will be updated by the
    \code{data.frame}, containing estimated error rates for each
    tested gap distance (see \code{\link{getTestedGapDistances}}, for the
    details).
}
\description{
The ultimate goal of \code{transcriptR} is to identify continuous regions
of transcription. However, in some areas of the genome it is not possible
to detect transcription, because of the presence of the low mappability
regions and (high copy number) repeats. Sequencing reads can not be uniquely
mapped to these positions, leading to the formation of gaps in otherwise
continuous coverage profiles and segmentation of transcribed regions into
multiple smaller fragments. The gap distance describes the maximum allowed
distance between adjacent fragments to be merged into one transcript. To
choose the optimal value for the gap distance, the detected transcripts
should largely be in agreement with available reference annotations.
To accomplish this, the function is build on the methodology proposed by
\href{http://www.sciencedirect.com/science/article/pii/S009286741100376X}{Hah et al. (Cell, 2011)}.
In brief, the two types of erros are defined:
\itemize{
    \item \code{dissected} error - the ratio of annotations that is segmented
        into two or more fragments.
    \item \code{merged} error - the ratio of non-overlapping annotations that
        merged by mistake in the experimental data.
}
There is an interdependence between two types of errors. Increasing the gap
distance decreases the \code{dissected} error, by detecting fewer, but longer
transcripts, while the \code{merged} error will increase as more detected
transcripts will span multiple annotations. The gap distance with the lowest
sum of two error types is chosen as the optimal value.
}
\examples{
### Load TranscriptionDataSet object
data(tds)

### Load reference annotations (knownGene from UCSC)
data(annot)

### Estimate gap distance minimazing error rate
### Define the range of gap distances to test
gdr <- seq(from = 0, to = 10000, by = 1000)

estimateGapDistance(object = tds, annot = annot, coverage.cutoff = 5,
filter.annot = FALSE, gap.dist.range = gdr)

### View estimated gap distance
tds

}
\references{
Hah N, Danko CG, Core L, Waterfall JJ, Siepel A, Lis JT, Kraus WL.
    A rapid, extensive, and transient transcriptional response to estrogen
    signaling in breast cancer cells. Cell. 2011.
}
\seealso{
\code{\link{constructTDS}} \code{\link{plotErrorRate}}
    \code{\link{getTestedGapDistances}}
}
\author{
Armen R. Karapetyan
}
