% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/summarize.R
\name{summarize_numerical}
\alias{summarize_numerical}
\title{Summarize numerical data over groupings of annotated regions}
\usage{
summarize_numerical(
  annotated_regions,
  by = c("annot.type", "annot.id"),
  over,
  quiet = FALSE
)
}
\arguments{
\item{annotated_regions}{The \code{GRanges} result of \code{annotate_regions()}.}

\item{by}{A character vector of the columns of \code{as.data.frame(annotated_regions)} to group over. Default is \code{c(annot.type, annot.id)}.}

\item{over}{A character vector of the numerical columns in \code{as.data.frame(annotated_regions)} to \code{count}, take the \code{mean}, and take the \code{sd} over after grouping according to the \code{by} column. NOTE: If more than one value is used, the naming scheme for the resuling \code{dplyr::tbl} summary columns are \code{COLNAME_n}, \code{COLNAME_mean}, \code{COLNAME_sd}. If \code{over} has length one, then the column names are \code{n}, \code{mean}, \code{sd}.}

\item{quiet}{Print progress messages (FALSE) or not (TRUE).}
}
\value{
A grouped \code{dplyr::tbl_df}, and the \code{count}, \code{mean}, and \code{sd} of the \code{cols} \code{by} the groupings.
}
\description{
Given a \code{GRanges} of annotated regions, summarize numerical data columns based on a grouping.
}
\details{
NOTE: We do not take the distinct values of \code{seqnames}, \code{start}, \code{end}, \code{annot.type} as in the other \code{summarize_*()} functions because in the case of a region that intersected two distinct exons, using \code{distinct()} would destroy the information of the mean of the numerical column over one of the exons, which is not desirable.
}
\examples{
### Test on a very simple bed file to demonstrate different options

# Get premade CpG annotations
data('annotations', package = 'annotatr')

r_file = system.file('extdata', 'test_read_multiple_data_nohead.bed', package='annotatr')
extraCols = c(pval = 'numeric', mu1 = 'integer', mu0 = 'integer', diff_exp = 'character')
r = read_regions(con = r_file, genome = 'hg19', extraCols = extraCols, rename_score = 'coverage')

a = annotate_regions(
       regions = r,
       annotations = annotations,
       ignore.strand = TRUE)

# Testing over normal by
sn1 = summarize_numerical(
       annotated_regions = a,
       by = c('annot.type', 'annot.id'),
       over = c('coverage', 'mu1', 'mu0'),
       quiet = FALSE)

# Testing over a different by
sn2 = summarize_numerical(
       annotated_regions = a,
       by = c('diff_exp'),
       over = c('coverage', 'mu1', 'mu0'))

}
