% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/profile.R
\name{runTBsigProfiler}
\alias{runTBsigProfiler}
\title{Run TB gene signature profiling.}
\source{
Profiling for the Z-Score, PLAGE, GSVA, ssGSEA algorithms are all
conducted with the Bioconductor \code{GSVA} package. Profiling for the
singscore algorithm is conducted with the Bioconductor \code{singscore}
package.
}
\usage{
runTBsigProfiler(
  input,
  useAssay = NULL,
  signatures = NULL,
  algorithm = c("GSVA", "ssGSEA", "ASSIGN", "PLAGE", "Zscore", "singscore"),
  combineSigAndAlgorithm = FALSE,
  assignDir = NULL,
  outputFormat = NULL,
  parallel.sz = 0,
  ASSIGNiter = 1e+05,
  ASSIGNburnin = 50000,
  ssgsea_norm = TRUE,
  update_genes = TRUE
)
}
\arguments{
\item{input}{an input data object of the class \code{SummarizedExperiment},
\code{data.frame}, or \code{matrix} containing gene expression data. Required.}

\item{useAssay}{a character string specifying the assay to use for signature
profiling when \code{input} is a \code{SummarizedExperiment}. Required only for
input data of the class \code{SummarizedExperiment}. If null, the assay
used will be \code{"counts"}. The default is \code{NULL.}}

\item{signatures}{a \code{list} of signatures to run with their associated genes.
This list should be in the same format as \code{TBsignatures}, included in
the TBSignatureProfiler package. If \code{signatures = NULL}, the default set
of signatures \code{TBsignatures} list is used.
For details, run \code{?TBsignatures}.
If <2 genes in a signature are present in the sample, that signature will
not be evaluated and will not be present in the resulting SE object.
The default is \code{NULL}.}

\item{algorithm}{a vector of algorithms to run, or character string if only
one is desired. The default is \code{c("GSVA", "ssGSEA", "ASSIGN",
"PLAGE", "Zscore", "singscore")}. NOTE: ASSIGN takes a long time to run and
is not recommended for efficient use.}

\item{combineSigAndAlgorithm}{logical, if \code{TRUE}, output row names will
be of the form \if{html}{\out{<algorithm>}}_\if{html}{\out{<signature>}}. It must be set to \code{TRUE} if the
\code{ouputFormat} will be a SummarizedExperiment and
\code{length(algorithm) > 1}.
It will always be \code{FALSE} if only one algorithm is selected.
If \code{FALSE}, there will be a column named algorithm' that lists which
algorithm is used, and a column named 'pathway' that lists the signature
profiled. If \code{NULL}, and one algorithm was used, the algorithm will not
be listed. The default is \code{FALSE}.}

\item{assignDir}{a character string naming a directory to save intermediate
ASSIGN results if \code{algorithm} specifies \code{"ASSIGN"}. The default is
\code{NULL}, in which case intermediate results will not be saved.}

\item{outputFormat}{a character string specifying the output data format.
Possible values are \code{"SummarizedExperiment"}, \code{"matrix"}, or
\code{"data.frame"}. The default is to return the same type as the
\code{input} object.}

\item{parallel.sz}{an integer identifying the number of processors to use
when running the calculations in parallel for the GSVA and ssGSEA algorithms.
If \code{parallel.sz = 0}, all cores are used. The default is \code{0}.}

\item{ASSIGNiter}{an integer indicating the number of iterations to use in
the MCMC for the ASSIGN algorithm. The default is \code{100,000}.}

\item{ASSIGNburnin}{an integer indicating the number of burn-in iterations
to use in the MCMC for the ASSIGN algorithm. These iterations are discarded
when computing the posterior means of the model parameters. The default is
\code{50,000}.}

\item{ssgsea_norm}{logical, passed to \code{GSVA::gsva()}. When parameter
\code{algorithm = "ssgsea"},the profiler runs the SSGSEA method from
Barbie et al. (2009) normalizing the scores by the absolute difference
between the minimum and the maximum, as described in their paper.
When \code{ssgsea.norm = FALSE}, this last normalization step is skipped.
The default is \code{TRUE}.}

\item{update_genes}{logical, denotes whether gene names from \code{signatures}
and the rownames of \code{input} should be checked for accuracy using
\code{HGNChelper::checkGeneSymbols()}. The mapping assumes
genes are from humans and will keep unmapped genes as the original
input gene name. Default is \code{TRUE}.}
}
\value{
A \code{SummarizedExperiment} object, \code{data.frame}, or
\code{matrix} of signature profiling results. The returned object will be
of the format specified in \code{outputFormat}.
If \code{input} is a \code{SummarizedExperiment} and
\code{outputFormat = "SummarizedExperiment"}, then the output will retain
any input information stored in the input colData. In general, if
\code{outputFormat = "SummarizedExperiment"} then columns in the \code{colData}
will include the scores for each desired signature with samples on the rows.
If \code{input} is a \code{data.frame} or \code{matrix}, then the returned
object will have signatures on the rows and samples on the columns.
}
\description{
Using some subset of the signatures listed in \code{TBsignatures} and
specified scoring algorithms, this function runs gene signature profiling
on an input gene expression dataset. It allows for scores to be computed for
these signatures which can be compared using various visualization tools also
provided in the TBSignatureProfiler package.
}
\examples{
## Using a data.frame input/output
 # Create some toy data to test Zak_RISK_16 signature, using 5 samples with low
 # expression & five samples with high expression of the signatures genes.
df_testdata <- as.data.frame(rbind(matrix(c(rnorm(80), rnorm(80) + 5), 16, 10,
                             dimnames = list(TBsignatures$Zak_RISK_16,
                             paste0("sample", seq_len(10)))),
                      matrix(rnorm(1000), 100, 10,
                             dimnames = list(paste0("gene", seq_len(100)),
                             paste0("sample", seq_len(10))))))
res <- runTBsigProfiler(input = df_testdata,
                        signatures = TBsignatures["Zak_RISK_16"],
                        algorithm = c("GSVA", "ssGSEA"),
                        combineSigAndAlgorithm = FALSE,
                        parallel.sz = 1)
subset(res, res$pathway == "Zak_RISK_16")

## Using a SummarizedExperiment input/output
 # The TB_indian SummarizedExperiment data is included in the package.
GSVA_res <- runTBsigProfiler(input = TB_indian,
                             useAssay = "logcounts",
                             signatures = TBsignatures["Zak_RISK_16"],
                             algorithm = c("GSVA"),
                             combineSigAndAlgorithm = FALSE,
                             parallel.sz = 1)
GSVA_res$Zak_RISK_16
}
\references{
Barbie, D.A., Tamayo, P., Boehm, J.S., Kim, S.Y., Moody, S.E., Dunn, I.F., Schinzel, A.C.,
Sandy, P., Meylan, E., Scholl, C., et al. (2009). Systematic RNA interference reveals
that oncogenic KRAS-driven cancers require TBK1. \emph{Nature} \strong{462}, 108-112.
doi: \href{https://doi.org/10.1038/nature08460}{10.1038/nature08460}.

Foroutan, M. et al. (2018). Single sample scoring of molecular phenotypes.
\emph{BMC Bioinformatics}, \bold{19}. doi:
\href{https://doi.org/10.1186/s12859-018-2435-4}{10.1186/s12859-018-2435-4}.

Lee, E. et al. (2008). Inferring pathway activity toward precise disease
classification. \emph{PLoS Comp Biol}, 4(11):e1000217. doi:
\href{https://doi.org/10.1371/journal.pcbi.1000217}{10.1371/journal.pcbi.1000217}

Shen, Y. et al. (2015). ASSIGN: context-specific genomic profiling of
multiple heterogeneous biological pathways. \emph{Bioinformatics}, \bold{31},
1745-1753. doi:
\href{https://doi.org/10.1093/bioinformatics/btv031}{10.1093/bioinformatics/btv031}.

Subramanian, A. et al. (2005). Gene set enrichment analysis: A knowledge-based
approach for interpreting genome-wide expression profiles. \emph{PNAS},
\strong{102}, 15545-15550. doi:
\href{https://doi.org/10.1073/pnas.0506580102}{10.1073/pnas.0506580102}.

Tomfohr, J. et al. (2005). Pathway level analysis of gene expression using
singular value decomposition. \emph{BMC Bioinformatics}, 6:225. doi:
\href{https://doi.org/10.1186/1471-2105-6-225}{10.1186/1471-2105-6-225}
}
