% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compute_PB_counts.R
\name{compute_PB_counts}
\alias{compute_PB_counts}
\title{Discover differentially regulated genes}
\usage{
compute_PB_counts(
  sce,
  EC_list,
  design,
  sample_col_name = "sample",
  group_col_name = "group",
  sce_cluster_name = "cell_type",
  min_cells_per_cluster = 100,
  min_counts_per_gene_per_group = 20,
  min_counts_ECs = 0
)
}
\arguments{
\item{sce}{a \code{SingleCellExperiment} object, computed via \code{\link{load_USA}}.}

\item{EC_list}{a \code{list}, computed via \code{\link{load_EC}}.}

\item{design}{a \code{\linkS4class{data.frame}} indicating the design of the experiment with one row for each sample;
'design' must contain a column with the sample id and one with the group id.}

\item{sample_col_name}{a character ("sample" by default), indicating the column name of the 'design' element which stores the sample id.}

\item{group_col_name}{a character ("group" by default), indicating the column name of the 'design' element which stores the group id.}

\item{sce_cluster_name}{a character ("cell_type" by default), indicating the name of the 'colData(sce)' element, 
which stores the cluster id of each cell (i.e., colData(sce)$name_cluster).}

\item{min_cells_per_cluster}{cell cluster (e.g., cell-type) filter.
'min_cells_per_cluster' is the minimum number of cells, across all samples and groups, for a cell cluster to be considered.
Cell clusters with less than 'min_cells_per_cluster' cells will not be analyzed.}

\item{min_counts_per_gene_per_group}{minimum number of counts per gene, in each cell, across all samples of every group.
In each cell cluster, only genes with at least 'min_counts_per_gene_per_group' counts in both groups of samples will be analyzed.}

\item{min_counts_ECs}{equivalence classes (ECs) filter.
'min_counts_ECs' indicates the minimum number of counts (across all cells in a cell cluster) for each equivalence class;
by default all ECs are considered (min_counts_ECs = 0).
ECs with less or equal than 'min_counts_ECs' will be discarded.
Increasing 'min_counts_ECs' will marginally decrease computational cost computational at the cost of a marginal loss in performance.}
}
\value{
A \code{list} of objects required perform differential testing by \code{\link{DifferentialRegulation}}.
}
\description{
\code{compute_PB_counts} computese the pseudo-bulk (PB) counts, 
needed to perform differential testing by \code{\link{DifferentialRegulation}}.
}
\examples{
# load internal data to the package:
data_dir = system.file("extdata", package = "DifferentialRegulation")

# specify samples ids:
sample_ids = paste0("organoid", c(1:3, 16:18))
# set directories of each sample input data (obtained via alevin-fry):
base_dir = file.path(data_dir, "alevin-fry", sample_ids)
file.exists(base_dir)

# set paths to USA counts, cell id and gene id:
# Note that alevin-fry needs to be run with '--use-mtx' option
# to store counts in a 'quants_mat.mtx' file.
path_to_counts = file.path(base_dir,"/alevin/quants_mat.mtx")
path_to_cell_id = file.path(base_dir,"/alevin/quants_mat_rows.txt")
path_to_gene_id = file.path(base_dir,"/alevin/quants_mat_cols.txt")

# load USA counts:
sce = load_USA(path_to_counts,
               path_to_cell_id,
               path_to_gene_id,
               sample_ids)
 
# define the design of the study:
design = data.frame(sample = sample_ids,
                    group = c( rep("3 mon", 3), rep("6 mon", 3) ))
design

# cell types should be assigned to each cell;
# here we load pre-computed cell types:
path_to_DF = file.path(data_dir,"DF_cell_types.txt")
DF_cell_types = read.csv(path_to_DF, sep = "\t", header = TRUE)
matches = match(colnames(sce), DF_cell_types$cell_id)
sce$cell_type = DF_cell_types$cell_type[matches]

# set paths to EC counts and ECs:
path_to_EC_counts = file.path(base_dir,"/alevin/geqc_counts.mtx")
path_to_EC = file.path(base_dir,"/alevin/gene_eqclass.txt.gz")

# load EC counts:
EC_list = load_EC(path_to_EC_counts,
                  path_to_EC,
                  path_to_cell_id,
                  path_to_gene_id,
                  sample_ids)

PB_counts = compute_PB_counts(sce = sce,
                              EC_list = EC_list,
                              design =  design,
                              sample_col_name = "sample",
                              group_col_name = "group",
                              sce_cluster_name = "cell_type",
                              min_cells_per_cluster = 100, 
                              min_counts_per_gene_per_group = 20)

}
\seealso{
\code{\link{load_EC}}, \code{\link{load_USA}}, \code{\link{DifferentialRegulation}}, \code{\link{plot_pi}}
}
\author{
Simone Tiberi \email{simone.tiberi@unibo.it}
}
