\name{helpers}
\alias{formatGenomicPosition}
\alias{encodeDNAString}
\alias{defineBlocks}
\alias{getChromSize}
\title{helper functions}
\description{
These functions are helpers for dealing with tally data stored in HDF5 files.
}
\usage{
formatGenomicPosition( x, unit = "Mb", divisor = 1000000, digits = 3,
nsmall = 1 )
encodeDNAString( ds )
defineBlocks( start, stop, blocksize )
getChromSize( tallyFile, group, dataset = "Reference", posDim = 1 )
}
\arguments{
  \item{x}{ Numerical genomic position }
  \item{unit}{ Which unit to convert the position to }
  \item{divisor}{divisor corresponding to the unit, i.e. 'Mb' ->
    \code{1e6}, 'Kb' -> \code{1e3}}
  \item{digits}{number of digits to keep}
  \item{nsmall}{nsmall parameter to the format function}
  \item{ds}{A DNAString object to be encoded in the HDF5 tally file
  specific encoding of nucleotides.}
  \item{start}{first position}
  \item{stop}{last position}
  \item{blocksize}{size of blocks}
  \item{tallyFile}{Tally file to work on}
  \item{group}{Group within \code{tallyFile} that we want to find the chromosome size for}
  \item{dataset}{Datset to extract chromosome size from - default is "Reference"}
  \item{posDim}{Which dimension of the dataset describes the genomic position}
}
\details{
  formatGenomicPosition:
  Helps formatting genomic positions for annotating axes in mismatch
  plots etc.

  encodeDNAString:
  This translates a DNAString object into a comaptible encoding that can
  be written to a HDF5 based tally file in the \code{Reference} dataset.
  Since the Python script for generating tallies only sets the Reference
  dataset in positions where mismatches exists updating the Reference
  dataset becomes necessary if one would like to perform analysis
  involving sequence context (GC-bias, mutationSpectrum, etc.)
  
  defineBlocks:
  This function returns a \code{data.frame} with the columns \code{Start} and \code{End} for blocks of size \code{blocksize} spanning the interval \code{[start, stop]}.
  
  getChromSize:
  This function is a helper to quickly look-up the chromosome size of a given group and tally file.
}
\value{
  formatGenomicPosition:
  formatted genomic position, e.g. "123.4 Mb"

  encodeDNAString:
  A numeric vector encoding the nucleotide sequence provided in
  \code{ds} according to the scheme \code{c("A"=0,"C"=1,"G"=2,"T"=3)}.
  
  defineBlocks:
  A \code{data.frame} with the columns \code{Start} and \code{End} for blocks of size \code{blocksize} spanning the interval \code{[start, stop]}.
  
  getChromSize:
  Returns a numeric that is the size of the chromosome.
}
\author{
Paul Pyl
}

\examples{
  formatGenomicPosition(123456789)
  library(Biostrings)
  lapply( DNAStringSet( c("simple"="ACGT", "movie"="GATTACA") ), encodeDNAString )
  getChromSize( system.file("extdata", "example.tally.hfs5", package="h5vcData"), "/ExampleStudy/16" )
}
