% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/registration_pseudobulk.R
\name{registration_pseudobulk}
\alias{registration_pseudobulk}
\title{Spatial registration: pseudobulk}
\usage{
registration_pseudobulk(
  sce,
  var_registration,
  var_sample_id,
  covars = NULL,
  min_ncells = 10,
  pseudobulk_rds_file = NULL,
  filter_expr = TRUE,
  mito_gene = NULL
)
}
\arguments{
\item{sce}{A
\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment-class}
object or one that inherits its properties.}

\item{var_registration}{A \code{character(1)} specifying the \code{colData(sce)}
variable of interest against which will be used for computing the relevant
statistics. This should be a categorical variable, with all categories
syntaticly valid (could be used as an R variable, no special characters or
leading numbers), ex. 'L1.2', 'celltype2' not 'L1/2' or '2'.}

\item{var_sample_id}{A \code{character(1)} specifying the \code{colData(sce)} variable
with the sample ID.}

\item{covars}{A \code{character()} with names of sample-level covariates.}

\item{min_ncells}{An \code{integer(1)} greater than 0 specifying the minimum
number of cells (for scRNA-seq) or spots (for spatial) that are combined
when pseudo-bulking. Pseudo-bulked samples with less than \code{min_ncells} on
\code{sce_pseudo$ncells} will be dropped.}

\item{pseudobulk_rds_file}{A \code{character(1)} specifying the path for saving
an RDS file with the pseudo-bulked object. It's useful to specify this since
pseudo-bulking can take hours to run on large datasets.}

\item{filter_expr}{A \code{logical(1)} specifying whether to filter pseudobulked
counts with \code{edgeR::filterByExpr}. Defaults to \code{TRUE}, filtering is recommended for
spatail registratrion workflow.}

\item{mito_gene}{An optional \code{logical()} vector indicating which genes are
mitochondrial, used to calculate pseudo bulked mitochondrial expression rate
\code{expr_chrM} and \code{pseudo_expr_chrM}. The length has to match the \code{nrow(sce)}.}
}
\value{
A pseudo-bulked \link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment-class} object. The \code{logcounts()} assay are \code{log2-CPM}
values calculated with \code{edgeR::cpm(log = TRUE)}. See
\url{https://github.com/LieberInstitute/spatialLIBD/issues/106} and
\url{https://support.bioconductor.org/p/9161754} for more details about the
math behind \code{scuttle::logNormFactors()}, \code{edgeR::cpm()}, and their
differences.
}
\description{
Pseudo-bulk the gene expression, filter lowly-expressed genes, and normalize.
This is the first step for spatial registration and for statistical modeling.
}
\examples{
## Ensure reproducibility of example data
set.seed(20220907)

## Generate example data
sce <- scuttle::mockSCE()

## Add some sample IDs
sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)

## Add a sample-level covariate: age
ages <- rnorm(5, mean = 20, sd = 4)
names(ages) <- LETTERS[1:5]
sce$age <- ages[sce$sample_id]

## Add gene-level information
rowData(sce)$gene_id <- paste0("ENSG", seq_len(nrow(sce)))
rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))

## Pseudo-bulk by Cell Cycle
sce_pseudo <- registration_pseudobulk(
    sce,
    var_registration = "Cell_Cycle",
    var_sample_id = "sample_id",
    covars = c("age"),
    min_ncells = NULL
)
colData(sce_pseudo)
rowData(sce_pseudo)
}
\seealso{
Other spatial registration and statistical modeling functions: 
\code{\link{registration_block_cor}()},
\code{\link{registration_model}()},
\code{\link{registration_stats_anova}()},
\code{\link{registration_stats_enrichment}()},
\code{\link{registration_stats_pairwise}()},
\code{\link{registration_wrapper}()}
}
\concept{spatial registration and statistical modeling functions}
