% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/do.ora.R
\name{ora}
\alias{ora}
\alias{plot_ora_bias}
\title{Performs an overrepresentation analysis, (optionally) accounting for bias.}
\usage{
ora(
  x,
  gsd,
  selected = "significant",
  groups = NULL,
  feature.bias = NULL,
  universe = NULL,
  restrict.universe = FALSE,
  plot.bias = FALSE,
  ...,
  as.dt = FALSE
)

plot_ora_bias(x, selected, feature.bias, ...)
}
\arguments{
\item{x}{A data.frame with feature-level statistics. Minimally, this should
have a \code{"feature_id"} (character) column, but read on ...}

\item{gsd}{The GeneSetDb}

\item{selected}{Either the name of a logical column in \code{dat} used to subset
out the features to run the enrichement over, or a character vector of
\code{"feature_id"}s that are selected from \code{dat[["feature_id"]]}.}

\item{groups}{Encodes groups of features that we can use to test selected
features individual, as well as "all" together. This can be specified by:
(1) specifying a name of a column in \code{dat} to split the enriched features
into subgroups. (2) A named list of features to intersect with \code{selected}.
By default this is \code{NULL}, so we only run enrichment over
all elements in \code{selected}. See examples for details.}

\item{feature.bias}{If \code{NULL} (default), no bias is used in enrichment
analysis. Otherwise, can be the name of a column in \code{dat} to extract
a numeric bias vector (gene length, GC content, average expression, etc.)
or a named (using featureIds) numeric vector of the same. The BiasedUrn
CRAN package is required when this is not NULL.}

\item{universe}{Defaults to all elements in \code{dat[["feature_id"]]}.}

\item{restrict.universe}{See same parameter in \code{\link[limma:goana]{limma::kegga()}}}

\item{plot.bias}{See \code{plot} parameter in \code{\link[limma:goana]{limma::kegga()}}. You can generate
this plot without running \code{ora} using the \code{\link[=plot_ora_bias]{plot_ora_bias()}},
like so:
\code{plot_ora_bias(dat, selected = selected, groups = groups, feature.bias = feature.bias)}}

\item{...}{parameters passed to \code{conform()}}

\item{as.dt}{If \code{FALSE} (default), the data.frame like thing that
this funciton returns will be set to a data.frame. Set this to \code{TRUE}
to keep this object as a \code{data.table}}
}
\value{
A data.frame of pathway enrichment. The last N colums are enrichment
statistics per pathway, grouped by the \code{groups} parameter. \code{P.all} are the
stats for all selected features, and the remaingin \verb{P.*} columns are for
the features specifed by \code{groups}.
}
\description{
This function wraps \code{\link[limma:goana]{limma::kegga()}} to perform biased overrepresntation
analysis over gene set collection stored in a GeneSetDb (\code{gsd}) object. Its
easiest to use this function when the biases and selection criteria are
stored as columns of the input data.frame \code{dat}.
}
\details{
In principle, this test does what \code{goseq} does, however I found that
sometimes calling goseq would throw errors within \code{goseq::nullp()} when
calling \code{makesplines}. I stumbled onto this implementation when googling
for these errors and landing here:
https://support.bioconductor.org/p/65789/#65914

The meat and potatoes of this function's code was extracted from
\code{\link[limma:goana]{limma::kegga()}}, written by Gordon Smyth and Yifang Hu.

Note that the BiasedUrn CRAN package needs to be installed to support biased
enrichment testing
}
\section{Functions}{
\itemize{
\item \code{plot_ora_bias()}: plots the bias of coviarate to DE / selected status. Code
taken from \code{\link[limma:goana]{limma::kegga()}}

}}
\examples{
dgestats <- exampleDgeResult()
gdb <- randomGeneSetDb(dgestats)

# Run enrichmnent without accounting for any bias
nobias <- ora(dgestats, gdb, selected = "selected", groups = "direction",
              feature.bias = NULL)

# Run enrichment and account for gene length
lbias <- ora(dgestats, gdb, selected = "selected",
             feature.bias = "effective_length")

# plot length bias with DGE status
plot_ora_bias(dgestats, "selected", "effective_length")

# induce length bias and see what is the what ...............................
biased <- dgestats[order(dgestats$pval),]
biased$effective_length <- sort(biased$effective_length, decreasing = TRUE)
plot_ora_bias(biased, "selected", "effective_length")
etest <- ora(biased, gdb, selected = "selected",
             groups = "direction",
             feature.bias = "effective_length")
}
\references{
Young, M. D., Wakefield, M. J., Smyth, G. K., Oshlack, A. (2010).
Gene ontology analysis for RNA-seq: accounting for selection bias.
\emph{Genome Biology} 11, R14. http://genomebiology.com/2010/11/2/R14
}
