% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aggregateData.R
\name{aggregateData}
\alias{aggregateData}
\title{Aggregation of single-cell to pseudobulk data}
\usage{
aggregateData(
  x,
  assay = NULL,
  by = c("cluster_id", "sample_id"),
  fun = c("sum", "mean", "median", "prop.detected", "num.detected"),
  scale = FALSE,
  verbose = TRUE,
  BPPARAM = SerialParam(progressbar = verbose)
)
}
\arguments{
\item{x}{a \code{\link[SingleCellExperiment]{SingleCellExperiment}}.}

\item{assay}{character string specifying the assay slot to use as 
input data. Defaults to the 1st available (\code{assayNames(x)[1]}).}

\item{by}{character vector specifying which 
\code{colData(x)} columns to summarize by (at most 2!).}

\item{fun}{a character string.
Specifies the function to use as summary statistic.
Passed to \code{\link[scuttle]{summarizeAssayByGroup}}.}

\item{scale}{logical. Should pseudo-bulks be scaled
with the effective library size & multiplied by 1M?}

\item{verbose}{logical. Should information on progress be reported?}

\item{BPPARAM}{a \code{\link[BiocParallel]{BiocParallelParam}}
object specifying how aggregation should be parallelized.}
}
\value{
a \code{\link[SingleCellExperiment]{SingleCellExperiment}}.
\itemize{
\item{If \code{length(by) == 2}, each sheet (\code{assay}) contains 
  pseudobulks for each of \code{by[1]}, e.g., for each cluster when 
  \code{by = "cluster_id"}. Rows correspond to genes, columns to 
  \code{by[2]}, e.g., samples when \code{by = "sample_id"}}.
\item{If \code{length(by) == 1}, the returned SCE will contain only 
  a single \code{assay} with rows = genes and colums = \code{by}.}}
  
  Aggregation parameters (\code{assay, by, fun, scaled}) are stored in 
  \code{metadata()$agg_pars}, and the number of cells that were aggregated 
  are accessible in \code{int_colData()$n_cells}.
}
\description{
...
}
\examples{
# pseudobulk counts by cluster-sample
data(example_sce)
pb <- aggregateData(example_sce)

library(SingleCellExperiment)
assayNames(example_sce)  # one sheet per cluster
head(assay(example_sce)) # n_genes x n_samples

# scaled CPM
cpm <- edgeR::cpm(assay(example_sce))
assays(example_sce)$cpm <- cpm
pb <- aggregateData(example_sce, assay = "cpm", scale = TRUE)
head(assay(pb)) 

# aggregate by cluster only
pb <- aggregateData(example_sce, by = "cluster_id")
length(assays(pb)) # single assay
head(assay(pb))    # n_genes x n_clusters

}
\references{
Crowell, HL, Soneson, C, Germain, P-L, Calini, D, 
Collin, L, Raposo, C, Malhotra, D & Robinson, MD: 
On the discovery of population-specific state transitions from 
multi-sample multi-condition single-cell RNA sequencing data. 
\emph{bioRxiv} \strong{713412} (2018). 
doi: \url{https://doi.org/10.1101/713412}
}
\author{
Helena L Crowell & Mark D Robinson
}
