% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prepSim.R
\name{prepSim}
\alias{prepSim}
\title{SCE preparation for \code{\link{simData}}}
\usage{
prepSim(
  x,
  min_count = 1,
  min_cells = 10,
  min_genes = 100,
  min_size = 100,
  group_keep = NULL,
  verbose = TRUE
)
}
\arguments{
\item{x}{a \code{\link[SingleCellExperiment]{SingleCellExperiment}}.}

\item{min_count, min_cells}{used for filtering of genes; only genes with 
a count > \code{min_count} in >= \code{min_cells} will be retained.}

\item{min_genes}{used for filtering cells;
only cells with a count > 0 in >= \code{min_genes} will be retained.}

\item{min_size}{used for filtering subpopulation-sample combinations;
only instances with >= \code{min_size} cells will be retained.
Specifying \code{min_size = NULL} skips this step.}

\item{group_keep}{character string; if \code{nlevels(x$group_id) > 1},
specifies which group of samples to keep (see details). The default
NULL retains samples from \code{levels(x$group_id)[1]}; otherwise, 
if `colData(x)$group_id` is not specified, all samples will be kept.}

\item{verbose}{logical; should information on progress be reported?}
}
\value{
a \code{\link[SingleCellExperiment]{SingleCellExperiment}} 
  containing, for each cell, library size (\code{colData(x)$offset})
  and, for each gene, dispersion and sample-specific mean estimates 
  (\code{rowData(x)$dispersion} and \code{$beta.sample_id}, respectively).
}
\description{
\code{prepSim} prepares an input SCE for simulation 
  with \code{muscat}'s \code{\link{simData}} function by 
\enumerate{
  \item{basic filtering of genes and cells}
  \item{(optional) filtering of subpopulation-sample instances}
  \item{estimation of cell (library sizes) and gene parameters 
  (dispersions and sample-specific means), respectively.}
}
}
\details{
For each gene \eqn{g}, \code{prepSim} fits a model to estimate 
  sample-specific means \eqn{\beta_g^s}, for each sample \eqn{s}, 
  and dispersion parameters \eqn{\phi_g} using \code{edgeR}'s 
  \code{\link[edgeR]{estimateDisp}} function with default parameters. 
  Thus, the reference count data is modeled as NB distributed: 
  \deqn{Y_{gc} \sim NB(\mu_{gc}, \phi_g)}
  for gene \eqn{g} and cell \eqn{c}, where the mean 
  \eqn{\mu_{gc} = \exp(\beta_{g}^{s(c)}) \cdot \lambda_c}. Here, 
  \eqn{\beta_{g}^{s(c)}} is the relative abundance of gene \eqn{g} 
  in sample \eqn{s(c)}, \eqn{\lambda_c} is the library size 
  (total number of counts), and \eqn{\phi_g} is the dispersion.
}
\examples{
# estimate simulation parameters
data(example_sce)
ref <- prepSim(example_sce)

# tabulate number of genes/cells before vs. after
ns <- cbind(
  before = dim(example_sce), 
  after = dim(ref)) 
rownames(ns) <- c("#genes", "#cells")
ns

library(SingleCellExperiment)
head(rowData(ref)) # gene parameters
head(colData(ref)) # cell parameters

}
\references{
Crowell, HL, Soneson, C, Germain, P-L, Calini, D, 
Collin, L, Raposo, C, Malhotra, D & Robinson, MD: 
On the discovery of population-specific state transitions from 
multi-sample multi-condition single-cell RNA sequencing data. 
\emph{bioRxiv} \strong{713412} (2018). 
doi: \url{https://doi.org/10.1101/713412}
}
\author{
Helena L Crowell
}
