% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/transformGamPoi.R
\name{transformGamPoi}
\alias{transformGamPoi}
\title{Variance Stabilizing Transformation for Gamma Poisson Data}
\usage{
transformGamPoi(
  data,
  transformation = c("acosh", "shifted_log", "randomized_quantile_residuals",
    "pearson_residuals", "analytic_pearson_residuals"),
  overdispersion = 0.05,
  size_factors = TRUE,
  ...,
  on_disk = NULL,
  verbose = FALSE
)
}
\arguments{
\item{data}{any matrix-like object (e.g. matrix, dgCMatrix, DelayedArray, HDF5Matrix)
with one column per sample and row per gene. It can also be an object of type \code{glmGamPoi},
in which case it is directly used to calculate the variance-stabilized values.}

\item{transformation}{one of \code{c("acosh", "shifted_log", "randomized_quantile_residuals", "pearson_residuals", "analytic_pearson_residuals")}.
See \code{\link{acosh_transform}}, \code{\link{shifted_log_transform}}, or \code{\link{residual_transform}} for more information.}

\item{overdispersion}{the simplest count model is the Poisson model. However, the Poisson model
assumes that \eqn{variance = mean}. For many applications this is too rigid and the Gamma-Poisson
allows a more flexible mean-variance relation (\eqn{variance = mean + mean^2 * overdispersion}). \cr
\code{overdispersion} can either be
\itemize{
\item a single boolean that indicates if an overdispersion is estimated for each gene.
\item a numeric vector of length \code{nrow(data)} fixing the overdispersion to those values.
\item the string \code{"global"} to indicate that one dispersion is fit across all genes.
}
Note that \code{overdispersion = 0} and \code{overdispersion = FALSE} are equivalent and both reduce
the Gamma-Poisson to the classical Poisson model. Default: \code{0.05} which is roughly the
overdispersion observed on ostensibly homogeneous cell lines.}

\item{size_factors}{in large scale experiments, each sample is typically of different size
(for example different sequencing depths). A size factor is an internal mechanism of GLMs to
correct for this effect.\cr
\code{size_factors} is either a numeric vector with positive entries that has the same lengths as columns in the data
that specifies the size factors that are used.
Or it can be a string that species the method that is used to estimate the size factors
(one of \code{c("normed_sum", "deconvolution", "poscounts")}).
Note that \code{"normed_sum"} and \code{"poscounts"} are fairly
simple methods and can lead to suboptimal results. For the best performance, I recommend to use
\code{size_factors = "deconvolution"} which calls \code{scran::calculateSumFactors()}. However, you need
to separately install the \code{scran} package from Bioconductor for this method to work.
Also note that \code{size_factors = 1} and \code{size_factors = FALSE} are equivalent. If only a single gene is given,
no size factor is estimated (ie. \code{size_factors = 1}). Default: \code{"normed_sum"}.}

\item{...}{additional parameters passed to \code{\link{acosh_transform}}, \code{\link{shifted_log_transform}}, or \code{\link{residual_transform}}}

\item{on_disk}{a boolean that indicates if the dataset is loaded into memory or if it is kept on disk
to reduce the memory usage. Processing in memory can be significantly faster than on disk.
Default: \code{NULL} which means that the data is only processed in memory if \code{data} is an in-memory
data structure.}

\item{verbose}{boolean that decides if information about the individual steps are printed.
Default: \code{FALSE}}
}
\value{
a matrix (or a vector if the input is a vector) with the transformed values.
}
\description{
Variance Stabilizing Transformation for Gamma Poisson Data
}
\examples{
  # Load a single cell dataset
  sce <- TENxPBMCData::TENxPBMCData("pbmc4k")
  # Reduce size for this example
  set.seed(1)
  sce_red <- sce[sample(which(rowSums2(counts(sce)) > 0), 1000),
                 sample(ncol(sce), 200)]

  assay(sce_red, "acosh") <- transformGamPoi(sce_red, "acosh")
  assay(sce_red, "shifted_log") <- transformGamPoi(sce_red, "shifted_log")

  # Residual Based Variance Stabilizing Transformation
  rq <- transformGamPoi(sce_red, transformation = "randomized_quantile", on_disk = FALSE,
                        verbose = TRUE)
  pearson <- transformGamPoi(sce_red, transformation = "pearson", on_disk = FALSE, verbose = TRUE)

  plot(rowMeans2(counts(sce_red)), rowVars(assay(sce_red, "acosh")), log = "x")
  points(rowMeans2(counts(sce_red)), rowVars(assay(sce_red, "shifted_log")), col = "red")
  points(rowMeans2(counts(sce_red)), rowVars(rq), col = "blue")


  # Plot first two principal components
  acosh_pca <- prcomp(t(assay(sce_red, "acosh")), rank. = 2)
  rq_pca <- prcomp(t(rq), rank. = 2)
  pearson_pca <- prcomp(t(pearson), rank. = 2)

  plot(acosh_pca$x, asp = 1)
  points(rq_pca$x, col = "blue")
  points(pearson_pca$x, col = "green")

}
\references{
Ahlmann-Eltze, Constantin, and Wolfgang Huber. "Transformation and Preprocessing of Single-Cell
RNA-Seq Data." bioRxiv (2021).

Ahlmann-Eltze, Constantin, and Wolfgang Huber. "glmGamPoi: Fitting Gamma-Poisson Generalized Linear
Models on Single Cell Count Data." Bioinformatics (2020)

Dunn, Peter K., and Gordon K. Smyth. "Randomized quantile residuals." Journal of Computational and
Graphical Statistics 5.3 (1996): 236-244.

Hafemeister, Christoph, and Rahul Satija. "Normalization and variance stabilization of single-cell
RNA-seq data using regularized negative binomial regression." Genome biology 20.1 (2019): 1-15.

Hafemeister, Christoph, and Rahul Satija. "Analyzing scRNA-seq data with the sctransform and offset
models." (2020)

Lause, Jan, Philipp Berens, and Dmitry Kobak. "Analytic Pearson residuals for normalization of
single-cell RNA-seq UMI data." Genome Biology (2021).
}
\seealso{
\code{\link{acosh_transform}}, \code{\link{shifted_log_transform}}, and \code{\link{residual_transform}}
}
