% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/03_addSignificantGenes.R
\name{addSignificantGenes}
\alias{addSignificantGenes}
\alias{addSignificantGenes,list-method}
\alias{addSignificantGenes,character-method}
\alias{addSignificantGenes,GeneSet-method}
\alias{addSignificantGenes,GeneSetCollection-method}
\alias{getSignificantGenes}
\alias{getSignificantGenes,list-method}
\alias{getSignificantGenes,character-method}
\alias{getSignificantGenes,factor-method}
\alias{getSignificantGenes,GeneSet-method}
\alias{getSignificantGenes,GeneSetCollection-method}
\title{Add significant genes}
\usage{
addSignificantGenes(
  resultsTable,
  geneSets,
  rankings,
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = "geneList",
  method = "aprox",
  nMean = 50,
  nCores = 1
)

\S4method{addSignificantGenes}{list}(
  resultsTable,
  geneSets,
  rankings,
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = "geneList",
  method = "aprox",
  nMean = 50,
  nCores = 1
)

\S4method{addSignificantGenes}{character}(
  resultsTable,
  geneSets,
  rankings,
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = "geneList",
  method = "aprox",
  nMean = 50,
  nCores = 1
)

\S4method{addSignificantGenes}{GeneSet}(
  resultsTable,
  geneSets,
  rankings,
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = "geneList",
  method = "aprox",
  nMean = 50,
  nCores = 1
)

\S4method{addSignificantGenes}{GeneSetCollection}(
  resultsTable,
  geneSets,
  rankings,
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = "geneList",
  method = "aprox",
  nMean = 50,
  nCores = 1
)

getSignificantGenes(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)

\S4method{getSignificantGenes}{list}(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)

\S4method{getSignificantGenes}{character}(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)

\S4method{getSignificantGenes}{factor}(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)

\S4method{getSignificantGenes}{GeneSet}(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)

\S4method{getSignificantGenes}{GeneSetCollection}(
  geneSet,
  rankings,
  signifRankingNames = NULL,
  method = "iCisTarget",
  maxRank = 5000,
  plotCurve = FALSE,
  genesFormat = c("geneList", "incidMatrix"),
  nCores = 1,
  digits = 3,
  nMean = 50
)
}
\arguments{
\item{resultsTable}{[addSignificantGenes]
Output table from \code{\link{addMotifAnnotation}}}

\item{geneSets}{[addSignificantGenes] List of gene-sets which was analyzed.}

\item{rankings}{Motif rankings used to analyze the gene list
(They should be the same as used for calcAUC in this same analysis).}

\item{maxRank}{Maximum rank to take into account for the recovery curve
(Default: 5000).}

\item{plotCurve}{Logical. Wether to plot the recovery curve (Default: FALSE).}

\item{genesFormat}{"geneList" or "incidMatrix". Format to return the genes
(Default: "geneList").}

\item{method}{"iCisTarget" or "aprox". There are two methods to identify the
highly ranked genes:
 (1) equivalent to the ones used in iRegulon and i-cisTarget
 (method="iCisTarget", recommended if running time is not an issue),
 and (2) a faster implementation based on an approximate distribution
 using the average at each rank (method="aprox",
 useful to scan multiple gene sets). (Default: "aprox")}

\item{nMean}{Only used for "aprox" method: Interval to calculate the running
mean and sd. Default: 50 (aprox. nGenesInRanking/400).}

\item{nCores}{Number of cores to use for parallelization (Default: 1).}

\item{geneSet}{[getSignificantGenes] Gene-set to analyze (Only one).}

\item{signifRankingNames}{[getSignificantGenes] Motif ranking name.}

\item{digits}{[getSignificantGenes]
Number of digits to include in the output.}
}
\value{
Output from \code{\link{addMotifAnnotation}}
adding the folowing columns:
\itemize{
  \item nEnrGenes: Number of genes highly ranked
  \item rankAtMax: Ranking at the maximum enrichment,
  used to determine the number of enriched genes.
  \item enrichedGenes: Genes that are highly ranked for the given motif.
  If genesFormat="geneList", the gene names are collapsed into a comma
  separated text field (alphabetical order). If genesFormat="incidMatrix",
  they are formatted as an indicence matrix, i.e. indicanting with 1 the
  genes present, and 0 absent.
}

If plotCurve=TRUE, the recovery curve is plotted.
}
\description{
Identify which genes (of the gene-set) are highly ranked
for each motif.

\itemize{
  \item addSignificantGenes(): adds them to the results table.
  \item getSignificantGenes():
  Calculates the significant genes for ONE gene set.
  It provides the plot and the gene list (it is used by addSignificantGenes).
}
}
\details{
The highly ranked genes are selected based on the distribution of the
recovery curves of the gene set across all the motifs in the database.
In the plot, the red line indicates the average of the recovery curves of
all the motifs, the green line the average + standard deviation, and the
blue line the recovery curve of the current motif.
The point of maximum distance between the current motif and the green curve
(mean+sd), is the rank selected as maximum enrichment.
All the genes with lower rank will be considered enriched.

Depending on whether the method is "iCisTarget" or "aprox", the mean and
SD at each rank are calculated slightly different.
"iCisTarget" method calculates the recovery curves for all the motifs, and
 then calculates the average and SD at each rank.
Due to the implementation of the function in R, this method is slower than
just subsetting the ranks of the genes in for each motif,
and calculating the average of the available ones at each position with a
sliding window.
Since there are over 18k motifs, the chances of getting several measures at
each rank are very high and highly resemble the results calculated
by iCisTarget, though they are often not exactly the same
(hence the name: "aprox" method).
}
\examples{

##################################################
# Setup & previous steps in the workflow:

#### Gene sets
# As example, the package includes an Hypoxia gene set:
txtFile <- paste(file.path(system.file('examples', package='RcisTarget')),
                 "hypoxiaGeneSet.txt", sep="/")
geneLists <- list(hypoxia=read.table(txtFile, stringsAsFactors=FALSE)[,1])

#### Databases
## Motif rankings: Select according to organism and distance around TSS
## (See the vignette for URLs to download)
# motifRankings <- importRankings("hg19-500bp-upstream-7species.mc9nr.feather")

## For this example we will use a SUBSET of the ranking/motif databases:
library(RcisTarget.hg19.motifDBs.cisbpOnly.500bp)
data(hg19_500bpUpstream_motifRanking_cispbOnly)
motifRankings <- hg19_500bpUpstream_motifRanking_cispbOnly

## Motif - TF annotation:
data(motifAnnotations_hgnc_v9) # human TFs (for motif collection 9)
motifAnnotation <- motifAnnotations_hgnc_v9

### Run RcisTarget
# Step 1. Calculate AUC
motifs_AUC <- calcAUC(geneLists, motifRankings)
# Step 2. Select significant motifs, add TF annotation & format as table
motifEnrichmentTable <- addMotifAnnotation(motifs_AUC,
           motifAnnot=motifAnnotation)

##################################################

##################################################
# (This step: Step 3)
# Identify the genes that have the motif significantly enriched
# (i.e. genes from the gene set in the top of the ranking)
par(mfrow=c(1,2))
motifEnrichmentTable_wGenes <- addSignificantGenes(motifEnrichmentTable,
                                       genesFormat="geneList",
                                       plotCurve=TRUE,
                                       geneSets=geneLists,
                                       rankings=motifRankings,
                                       method="aprox")

#### Exploring the output:
# The object returned is a data.table
# Feel free to convert it to a data.frame:
motifEnrichmentTable_wGenes <- as.data.frame(motifEnrichmentTable_wGenes)

# Enriched genes
enrGenes <- motifEnrichmentTable_wGenes[1,"enrichedGenes"]
enrGenes
strsplit(enrGenes, ";")

# As incidence matrix
motifEnr_wIncidMat <- addSignificantGenes(motifEnrichmentTable,
                geneSets=geneLists, rankings=motifRankings,
                method="aprox",
                genesFormat = "incidMatrix")

motifEnr_wIncidMat <- as.data.frame(motifEnr_wIncidMat)
which(colnames(motifEnr_wIncidMat) == "rankAtMax")

incidMat <- motifEnr_wIncidMat[,8:ncol(motifEnr_wIncidMat)]
rownames(incidMat) <- motifEnr_wIncidMat[,"motif"]
incidMat <- incidMat[, colSums(incidMat)>0, drop=FALSE]

# Plot as network
par(mfrow=c(1,1))
library(igraph)
plot(graph.incidence(incidMat))

###############################################################
# Alternative method: getSignificantGenes()
selectedMotif <- rownames(incidMat)
onlyGenes <- getSignificantGenes(geneSet=geneLists$hypoxia,
                            signifRankingNames=selectedMotif,
                            genesFormat="incidMatrix",
                            plotCurve=TRUE,
                            rankings=motifRankings,
                            method="aprox")


}
\seealso{
Previous step in the workflow: \code{\link{addMotifAnnotation}}.

See the package vignette for examples and more details:
\code{vignette("RcisTarget")}
}
