% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_RCTD.R
\name{createRctd}
\alias{createRctd}
\title{Preprocess data before RCTD}
\usage{
createRctd(
  spatial_experiment,
  reference_experiment,
  cell_type_col = "cell_type",
  require_int = TRUE,
  gene_cutoff = 0.000125,
  fc_cutoff = 0.5,
  gene_cutoff_reg = 2e-04,
  fc_cutoff_reg = 0.75,
  gene_obs_min = 3,
  pixel_count_min = 10,
  UMI_min = 100,
  UMI_max = 2e+07,
  UMI_min_sigma = 300,
  ref_UMI_min = 100,
  ref_n_cells_min = 25,
  ref_n_cells_max = 10000,
  cell_type_profiles = NULL,
  class_df = NULL,
  cell_type_names = NULL
)
}
\arguments{
\item{spatial_experiment}{\code{\link[SummarizedExperiment]{SummarizedExperiment}} object (or any
derivative object, including \link[SpatialExperiment]{SpatialExperiment})
containing spatial transcriptomics data to be deconvolved. The object must
contain:
\itemize{
  \item An \code{assay} matrix of gene expression counts (genes as rows,
  pixels as columns) with unique gene names as row names and unique pixel
  barcodes as column names.
  \item Optionally, a \code{spatialCoords} matrix containing x and y
  coordinates for each pixel. If \code{spatial_experiment} does not have
  \code{spatialCoords}, dummy coordinates will be used.
  \item Optionally, a \code{colData} column named \code{nUMI} containing
  total UMI counts for each pixel. If not provided, \code{nUMI} will be
  calculated as the column sums of the counts matrix.
}}

\item{reference_experiment}{\code{\link[SummarizedExperiment]{SummarizedExperiment}} object containing
annotated RNA-seq data (e.g., from snRNA-seq, scRNA-seq, or cell
type-specific bulk RNA-seq), used to learn cell type profiles. The object
must contain:
\itemize{
  \item An \code{assay} matrix of gene expression counts (genes as rows,
  cells as columns) with unique gene names as row names and unique cell
  barcodes as column names.
  \item A \code{colData} column containing cell type annotations for each
  cell (column name specified by \code{cell_type_col}).
  \item Optionally, a \code{colData} column named \code{nUMI} containing
  total UMI counts for each cell. If not provided, \code{nUMI} will be
  calculated as the column sums of the counts matrix.
}}

\item{cell_type_col}{character, name of the entry in
\code{colData(reference_experiment)} containing cell type annotations
(default: \code{"cell_type"})}

\item{require_int}{logical, whether counts and nUMI are required to be
integers (default: \code{TRUE})}

\item{gene_cutoff}{numeric, minimum normalized gene expression for genes to
be included in the platform effect normalization step (default: 0.000125)}

\item{fc_cutoff}{numeric, minimum log fold change (across cell types) for
genes to be included in the platform effect normalization step (default:
0.5)}

\item{gene_cutoff_reg}{numeric, minimum normalized gene expression for genes
to be included in the RCTD step (default: 0.0002)}

\item{fc_cutoff_reg}{numeric, minimum log fold change (across cell types) for
genes to be included in the RCTD step (default: 0.75)}

\item{gene_obs_min}{numeric, minimum number of times a gene must appear in
the spatial transcriptomics data to be included in the analysis
(default: 3)}

\item{pixel_count_min}{numeric, minimum total gene count for a pixel to be
included in the analysis (default: 10)}

\item{UMI_min}{numeric, minimum UMI count per pixel (default: 100)}

\item{UMI_max}{numeric, maximum UMI count per pixel (default: 20,000,000)}

\item{UMI_min_sigma}{numeric, minimum UMI count for pixels used in platform
effect normalization (default: 300)}

\item{ref_UMI_min}{numeric, minimum UMI count for cells to be included in the
reference (default: 100)}

\item{ref_n_cells_min}{numeric, minimum number of cells per cell type in the
reference (default: 25)}

\item{ref_n_cells_max}{numeric, maximum number of cells per cell type in the
reference. Will downsample if this number is exceeded. (default: 10,000)}

\item{cell_type_profiles}{matrix of precomputed cell type expression profiles
(genes by cell type), optional. If this option is used, gene names and cell
type names must be present in the \code{dimnames}, and the reference will
be ignored.}

\item{class_df}{data frame mapping cell types to classes, optional. If
specified, RCTD will report confidence on the class level.}

\item{cell_type_names}{character vector of cell type names to include,
optional}
}
\value{
A list with four elements:
  \itemize{
    \item \code{spatial_experiment}: Preprocessed
    \code{\link[SummarizedExperiment]{SummarizedExperiment}} object
    containing spatial transcriptomics data with filtered pixels and genes
    \item \code{cell_type_info}: List containing cell type information,
    including expression profiles and metadata
    \item \code{internal_vars}: List of internal variables used by RCTD,
    including differentially expressed gene lists and class information
    \item \code{config}: List of configuration parameters used for RCTD
  }
}
\description{
Performs initial preprocessing steps on a spatial transcriptomics dataset
and a reference dataset prior to running RCTD. This function filters pixels
and genes based on UMI counts and other thresholds, and identifies
differentially expressed genes. The output of this function should be passed
to \code{runRctd} to perform the cell type deconvolution.
}
\examples{
data(rctdSim)

# Spatial transcriptomics data
library(SpatialExperiment)
spatial_spe <- SpatialExperiment(
    assay = rctdSim$spatial_rna_counts,
    spatialCoords = rctdSim$spatial_rna_coords
)

# Reference data
library(SummarizedExperiment)
reference_se <- SummarizedExperiment(
    assays = list(counts = rctdSim$reference_counts),
    colData = rctdSim$reference_cell_types
)

# Filter spatial transcriptomics data and aggregate reference data
rctd_data <- createRctd(spatial_spe, reference_se)

# Run RCTD on filtered data
results <- runRctd(rctd_data, rctd_mode = "doublet", max_cores = 1)

# Access the cell type proportions (cell types as rows, pixels as columns)
assay(results, "weights")

# Check spot classifications for doublet mode
colData(results)$spot_class

# Access spatial coordinates
head(spatialCoords(results))

}
