% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generateResultsTable.R,
%   R/generateEnzymesTable.R, R/generateResultsGraph.R, R/exportResults.R,
%   R/addGOToGraph.R, R/plotGraph.R
\name{export-funs}
\alias{export-funs}
\alias{generateResultsTable}
\alias{generateEnzymesTable}
\alias{generateResultsGraph}
\alias{exportResults}
\alias{addGOToGraph}
\alias{plotGraph}
\title{Generate and manipulate tables and sub-networks from an enrichment}
\usage{
generateResultsTable(method = "diffusion", threshold = 0.05,
    plimit = 15, nlimit = 250, LabelLengthAtPlot = 45,
    capPscores = 1e-06, object = NULL, data = NULL, ...)

generateEnzymesTable(method = "diffusion", threshold = 0.05,
    nlimit = 250, LabelLengthAtPlot = 45, capPscores = 1e-06,
    mart.options = list(biomart = "ensembl", dataset =
    "hsapiens_gene_ensembl"), object = NULL, data = NULL, ...)

generateResultsGraph(method = "diffusion", threshold = 0.05,
    plimit = 15, nlimit = 250, thresholdConnectedComponent = 0.05,
    LabelLengthAtPlot = 22, object = NULL, data = NULL, ...)

exportResults(format = "csv", file = "myOutput",
    method = "diffusion", object = NULL, data = NULL, ...)

addGOToGraph(graph = NULL, GOterm = NULL, godata.options = list(OrgDb
    = "org.Hs.eg.db", ont = "CC"), mart.options = list(biomart = "ensembl",
    dataset = "hsapiens_gene_ensembl"))

plotGraph(graph = NULL, layout = FALSE, graph.layout = NULL,
    plotLegend = TRUE, plot.fun = "plot.igraph", NamesAsLabels = TRUE,
    ...)
}
\arguments{
\item{method}{one in \code{"diffusion"}, \code{"pagerank"}}

\item{threshold}{Numeric value between 0 and 1. 
\code{p.score} threshold applied when filtering KEGG nodes. 
Lower thresholds are more stringent.}

\item{plimit}{Pathway limit, must be a numeric value between 1 and 50. 
Limits the amount of pathways in \code{method = "hypergeom"}}

\item{nlimit}{Node limit, must be a numeric value between 1 and 1000. 
Limits the order of the solution sub-graph when 
in \code{method = "diffusion"} and \code{method = "pagerank"}}

\item{LabelLengthAtPlot}{Numeric value between 10 and 50. 
Maximum length that a label can reach when plotting the graph. 
The remaining characters will be truncated using "..."}

\item{capPscores}{Numeric value, minimum p-score 
admitted for the readable 
formatting. Smaller p-scores will be displayed 
as \code{< capPscores}}

\item{object}{FELLA.USER object}

\item{data}{FELLA.DATA object}

\item{...}{Optional arguments for the plotting function 
in \code{plotGraph}. Arguments passed to the exporting function 
in \code{exportResults}. Ignored otherwise.}

\item{mart.options}{List, options for the \code{biomaRt} function
\code{\link[biomaRt]{getBM}}. Importantly, this defines the organism, 
see \code{\link[biomaRt]{listDatasets}} for possibilities. 
If calling \code{generateEnzymesTable}, the user can set 
\code{mart.options = NULL} to avoid adding GO labels to enzymes.}

\item{thresholdConnectedComponent}{Numeric value between 0 and 1. 
Connected components that are below the threshold are kept, 
while the ones exceeding it (because they are too small) are discarded.}

\item{format}{Character, one of: \code{"csv"} for regular 
results table, \code{"enzyme"} for table with enzyme data, 
\code{"igraph"} for igraph format. 
Alternatively, any format supported by igraph, 
see \code{\link[igraph]{write_graph}}}

\item{file}{Character specifying the output file name}

\item{graph}{An \pkg{igraph} object, 
typically a small one, 
coming from an enrichment through \code{"diffusion"} or \code{"pagerank"}.}

\item{GOterm}{Character, GO entry to draw 
semantic similarity in the solution graph. 
If \code{NULL}, the GO labels will be appended without similarities.}

\item{godata.options}{List, options for the database creator 
\code{\link[GOSemSim]{godata}}}

\item{layout}{Logical, should the plot be returned as a layout?}

\item{graph.layout}{Two-column numeric matrix, 
if this argument is not null 
then it is used as graph layout}

\item{plotLegend}{Logical, should the legend be plotted as well?}

\item{plot.fun}{Character, can be either 
\code{plot.igraph} or \code{tkplot}}

\item{NamesAsLabels}{Logical, should KEGG names be displayed 
as labels instead of KEGG identifiers?}
}
\value{
\code{generateResultsTable} returns a 
data.frame that contains the nodes below the \code{p.score} threshold 
from an enrichment analysis

\code{generateEnzymesTable} returns a 
data.frame that contains the enzymes below the \code{p.score} threshold,
along with their genes and GO labels

\code{generateResultsGraph} returns 
an \pkg{igraph}
object: a sub-network from the whole 
KEGG knowledge model under the specified thresholds 
(\code{threshold} and \code{thresholdConnectedComponent})

\code{exportResults} returns \code{invisible()}, 
but as a side effect the specified \code{file} is created.

\code{addGOToGraph} returns 
an \pkg{igraph} object, 
which is the input \code{graph} with 
extra attributes: GO labels in \code{V(graph)$GO}, and 
semantic similarities in \code{V(graph)$GO.simil} if 
\code{GOterm != NULL}

\code{plotGraph} returns 
\code{invisible()} if \code{layout = F} and 
the plotting layout as a data.frame otherwise.
}
\description{
In general, \code{generateResultsTable}, \code{generateEnzymesTable} 
and \code{generateResultsGraph} provide the results of an enrichment 
in several formats. 

Function \code{generateResultsTable} returns a table 
that contains the best hits from
a \code{\link{FELLA.USER}} object 
with a successful enrichment analysis.
Similarly, \code{generateEnzymesTable} returns 
a data frame with the best scoring enzyme families and their 
annotated genes.

Function \code{generateResultsGraph} 
gives a sub-network, plottable through 
\code{plotGraph}, witht the nodes with 
the lowest \code{p.score} from an enrichment analysis. 
Function \code{addGOToGraph} can be applied to such 
sub-networks to overlay GO labels and 
similarity to a user-defined GO term.

Function \code{exportResults} 
is a wrapper around \code{generateResultsTable}, 
\code{generateEnzymesTable} and \code{generateResultsGraph} 
to write the results to files.
}
\details{
Functions \code{generateResultsTable} and 
\code{generateEnzymesTable} need a 
\code{\link{FELLA.DATA}} object and a 
\code{\link{FELLA.USER}} object with a successful enrichment.
\code{generateResultsTable} provides the entries 
whose p-score is below the chosen \code{threshold} in a tabular format. 
\code{generateEnzymesTable} returns a table 
that contains (1) the enzymes that are below the user-defined 
p-score threshold, along with (2) the genes that belong to 
the enzymatic families in the organism defined in the database, 
and (3) GO labels of such enzymes, if \code{mart.options} is 
not \code{NULL} and points to the right database.

Function \code{generateResultsGraph} returns an 
\pkg{igraph} 
object with a relevant sub-network 
for manual examination. 
A \code{\link{FELLA.USER}} 
object with a successful enrichment analysis and the corresponding 
\code{\link{FELLA.DATA}} must be supplied. 
Graph nodes are prioritised by \code{p.score} and selected through 
the most stringent between (1) p.score \code{threshold} and 
(2) maximum number of nodes \code{nlimit}. 

There is an additional filtering feature for tiny connected components, 
controllable through \code{thresholdConnectedComponent} 
(smaller is stricter). 
The user can choose to turn off this filter by setting 
\code{thresholdConnectedComponent = 1}.  
The idea is to discard connected components so small 
that are likely to arise from random selection of nodes. 
Let \code{k} be the order of the current sub-network.
A connected component of order \code{r} will
be kept only if the probability that a 
random subgraph from the whole KEGG knowledge model 
of order \code{k} contains a
connected component of order at least \code{r} 
is smaller than \code{thresholdConnectedComponent}. 
Such probabilities are estimated during 
\code{\link[=data-funs]{buildDataFromGraph}}; the amount of random 
trials can be controlled by its \code{niter} argument.

Function \code{exportResults} writes the enrichment results 
as the specified filetype.
Options are: a csv table (\code{"csv"}), 
an enzyme csv table (\code{"enzyme"}) 
an \pkg{igraph}
object as an \code{RData} file, 
or any format supported by igraph's 
\code{\link[igraph]{write_graph}}.

Function \code{addGOToGraph} takes and returns 
a graph object with class 
\pkg{igraph} 
adding the following attributes: 
GO labels in \code{V(graph)$GO}, and 
semantic similarities in \code{V(graph)$GO.simil} if 
\code{GOterm != NULL}. 

The GO database describes genes in terms of three ontologies: 
molecular function (MF), biological process (BP) and 
cellular component (CC) [Gene Ontology Consortium, 2015].
The user can be interested in finding which enzymatic families 
reported with a low \code{p.score}
are closest to a particular GO term. 
To assess similarity between GO labels, FELLA uses the 
semantic similarity defined in [Yu, 2010] and their implementation 
in the \pkg{GOSemSim} R package. 
The user will obtain, for each enzymatic family, the closest GO 
term to his or her GO query and the semantic similarity between them. 
Exact matches have a similarity of \code{1}. 
Function \code{plotGraph} detects the presence 
of the GO similarity option and plots its magnitude.

Function \code{plotGraph} 
plots a solution graph from the diffusion and pagerank analysis. 
For plotting hypergeom results, please use \code{plot} instead. 
Specific colors and shapes for each KEGG category are used: 
pathways are maroon, modules are violet, enzymes are orange, 
reactions are blue and compounds are green. 
If the graph contains the similarity to a GO term, enzymes will 
be displayed as triangles whose color depicts the strength of 
such measure (yellow: weak, purple: strong). 
At the moment, \code{plotGraph} allows plotting 
throug the static \code{\link[igraph]{plot.igraph}} and the 
interactive \code{\link[igraph]{tkplot}}.
}
\examples{
## First generate a toy enrichment
library(igraph)
data(FELLA.sample)
data(input.sample)
## Enrich input
obj <- enrich(
compounds = input.sample, 
data = FELLA.sample)

######################
## Results table
tab.res <- generateResultsTable(
method = "hypergeom",
threshold = 0.1, 
object = obj, 
data = FELLA.sample)
head(tab.res)

tab.res <- generateResultsTable(
method = "diffusion",
threshold = 0.1, 
object = obj, 
data = FELLA.sample)
head(tab.res)

######################
## Use wrapper to write the table to a file
out.file <- tempfile()
exportResults(
format = "csv", 
threshold = 0.1, 
file = out.file, 
object = obj, 
data = FELLA.sample)
tab.wrap <- read.csv(out.file)
head(tab.wrap)

######################
## Enzymes table
tab.ec <- generateEnzymesTable(
threshold = 0.1, 
object = obj, 
data = FELLA.sample, 
mart.options = NULL)
head(tab.ec)

######################
## Generate graph
g.res <- generateResultsGraph(
method = "pagerank", 
threshold = 0.1, 
object = obj, 
data = FELLA.sample)
g.res

## Plot graph (without GO terms)
plotGraph(g.res)

## Add similarity to the GO CC term "mitochondrion"
\dontrun{
g.cc <- FELLA:::addGOToGraph(
graph = g.res, 
GOterm = "GO:0005739")

## Plot graph (with GO terms)
plotGraph(g.cc)

## Without the CC
any(V(g.res)$GO.simil >= 0)
## With the CC
v.cc <- unlist(V(g.cc)$GO.simil)
sum(v.cc >= 0, na.rm = TRUE)
## Similarity values
table(v.cc)
}

}
\references{
Gene Ontology Consortium. (2015). 
Gene ontology consortium: going forward. 
Nucleic acids research, 43(D1), D1049-D1056.

Yu, G., Li, F., Qin, Y., Bo, X., Wu, Y., & Wang, S. (2010). 
GOSemSim: an R package for measuring semantic similarity 
among GO terms and gene products. Bioinformatics, 26(7), 976-978.
}
