\name{callVariantsFisher}
\alias{callVariantsPairedFisher}
\title{Paired variant calling using fisher tests}
\description{
This function implements a simple paired variant calling strategy based on the fisher test
}
\usage{
callVariantsPairedFisher(data, sampledata, pValCutOff = 0.05, minCoverage = 5, mergeDels = TRUE, mergeAggregator = mean)
}
\arguments{
\item{data}{A \code{list} with elements \code{Counts} (a 4d
  \code{integer} array of size [1:12, 1:2, 1:k, 1:n]), 
  \code{Coverage} (a 3d \code{integer} array of size [1:2, 1:k, 1:n]),
  \code{Reference} (a 1d \code{integer} vector of size [1:n]) -- see Details.}
\item{sampledata}{A \code{data.frame} with \code{k} rows (one for each
  sample) and columns \code{Type}, \code{Column} and (\code{Group}
  or \code{Patient}). The tally file should contain this information as
  a group attribute, see \code{\link{getSampleData}} for an example.}
\item{pValCutOff}{Maximum allowed p-Value for the fisher test on contingency matrix \code{matrix(c(caseCounts, caseCoverage, controlCounts, controlCoverage), nrow=2)}.}
\item{minCoverage}{Required coverage in both sample for a call to be made}
\item{mergeDels}{Boolean flag specifying whether adjacent deletions should be merged}
\item{mergeAggregator}{Which function to use for aggregating the values associated with adjacent deletions that are being merged}
}
\details{
  \code{data} is a list which has to at least contain the
  \code{Counts}, \code{Coverages} and \code{Reference} datasets. This list will usually be
  generated by a call to the \code{h5dapply} function in which the tally
  file, chromosome, datasets and regions within the datasets would be
  specified. See \code{\link{h5dapply}} for specifics.

  \code{callVariantsPairedFisher} implements a simple pairwise variant
  callign approach based on using the \code{\link{fisher.test}} on the following contingency matrix:
  
  \tabular{rr}{
    caseSupport \tab caseCoverage - caseSupport\cr
    conttrolSupport \tab controlCoverage - controlSupport\cr
  }
  
  The results are filtered by \code{pValCutOff} and \code{minCoverage}.
}
\value{
The return value is a \code{data.frame} with the following slots:
  \item{Chrom}{The chromosome the potential variant is on}
  \item{Start}{The starting position of the variant }
  \item{End}{The end position of the variant}
  \item{Sample}{The \code{Case} sample in which the variant was observed}
  \item{refAllele}{The reference allele}
  \item{altAllele}{The alternate allele}
  \item{caseCount}{Support for the variant in the \code{Case} sample}
  \item{caseCoverage}{Coverage of the variant position in the \code{Case} sample}
  \item{controlCount}{Support for the variant in the \code{Control} sample}
  \item{controlCoverage}{Coverage of the variant position in the \code{Control} sample}
  \item{pValue}{The \code{p.value} of the \code{fisher.test}}
}
\author{
Paul Pyl
}
\examples{
library(h5vc) # loading library
tallyFile <- system.file( "extdata", "example.tally.hfs5", package = "h5vcData" )
sampleData <- getSampleData( tallyFile, "/ExampleStudy/16" )
position <- 29979629
windowsize <- 2000
vars <- h5dapply( # Calling Variants
  filename = tallyFile,
  group = "/ExampleStudy/16",
  blocksize = 1000,
  FUN = callVariantsPairedFisher,
  sampledata = sampleData,
  pValCutOff = 0.1,
  names = c("Coverages", "Counts", "Reference"),
  range = c(position - windowsize, position + windowsize),
  verbose = TRUE
)
vars <- do.call(rbind, vars)
vars
}
