% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/find_de_neighborhoods.R
\name{find_de_neighborhoods}
\alias{find_de_neighborhoods}
\title{Find differential expression neighborhoods}
\usage{
find_de_neighborhoods(
  fit,
  group_by,
  contrast = fit$contrast,
  selection_procedure = c("zscore", "contrast"),
  directions = c("random", "contrast", "axis_parallel"),
  min_neighborhood_size = 50,
  de_mat = SummarizedExperiment::assays(fit)[["DE"]],
  test_data = fit$test_data,
  test_data_col_data = NULL,
  test_method = c("glmGamPoi", "edgeR", "limma", "none"),
  continuous_assay_name = fit$use_assay,
  count_assay_name = "counts",
  size_factor_method = NULL,
  design = fit$design,
  alignment_design = fit$alignment_design,
  add_diff_in_diff = TRUE,
  make_neighborhoods_consistent = FALSE,
  skip_confounded_neighborhoods = FALSE,
  control_parameters = NULL,
  verbose = TRUE
)
}
\arguments{
\item{fit}{the \code{lemur_fit} generated by \code{lemur()}}

\item{group_by}{If the \code{independent_matrix} is provided, \code{group_by} defines
how the pseudobulks are formed. This is typically the variable in the column
data that represents the independent unit of replication of the experiment
(e.g., the mouse or patient ID). The argument has to be wrapped in \code{vars(...)}.}

\item{contrast}{a specification which contrast to fit. This defaults to the
\code{contrast} argument that was used for \code{test_de} and is stored in \code{fit$contrast}.}

\item{selection_procedure}{specify the algorithm that is used to select the
neighborhoods for each gene. Broadly, \code{selection_procedure = "zscore"} is faster
but less precise than \code{selection_procedure = "contrast"}.}

\item{directions}{a string to define the algorithm to select the direction onto
which the cells are projected before searching for the neighborhood.
\code{directions = "random"} produces denser neighborhoods, whereas \code{directions = "contrast"}
has usually more power. \cr
Alternatively, this can also be a matrix with one direction for each gene
(i.e., a matrix of size \code{nrow(fit) * fit$n_embedding}).}

\item{min_neighborhood_size}{the minimum number of cells per neighborhood. Default: \code{50}.}

\item{de_mat}{the matrix with the differential expression values and is only relevant if
\code{selection_procedure = "zscore"} or \code{directions = "random"}. Defaults
to an assay called \code{"DE"} that is produced by \code{lemur::test_de()}.}

\item{test_data}{a \code{SummarizedExperiment} object or a named list of matrices. The
data is used to test if the neighborhood inferred on the training data contain a
reliable significant change. If \code{test_method} is \code{"glmGamPoi"} or \code{"edgeR"} a test
using raw counts is conducted and two matching assays are needed: (1) the continuous
assay (with \code{continuous_assay_name}) is projected onto the LEMUR fit to find the latent
position of each cell and (2) the count assay (\code{count_assay_name}) is used for
forming the pseudobulk. If \code{test_method == "limma"}, only the continuous assay is needed. \cr
The arguments defaults to the test data split of when calling \code{lemur()}.}

\item{test_data_col_data}{additional column data for the \code{test_data} argument.}

\item{test_method}{choice of test for the pseudobulked differential expression.
\href{https://bioconductor.org/packages/glmGamPoi/}{glmGamPoi} and
\href{https://bioconductor.org/packages/edgeR/}{edgeR} work on an count assay.
\href{http://bioconductor.org/packages/limma/}{limma} works on the continuous assay.}

\item{continuous_assay_name, count_assay_name}{the assay or list names of \code{independent_data}.}

\item{size_factor_method}{Set the procedure to calculate the size factor after pseudobulking. This argument
is only relevant if \code{test_method} is \code{"glmGamPoi"} or \code{"edgeR"}. If \code{fit} is subsetted, using a
vector with the sequencing depth per cell ensures reasonable results.
Default: \code{NULL} which means that \code{colSums(assay(fit$test_data, count_assay_name))} is used.}

\item{design, alignment_design}{the design to use for the fit. Default: \code{fit$design}}

\item{add_diff_in_diff}{a boolean to specify if the log-fold change (plus significance) of
the DE in the neighborhood against the DE in the complement of the neighborhood is calculated.
If \code{TRUE}, the result includes three additional columns starting with \code{"did_"} short for
difference-in-difference. Default: \code{TRUE}.}

\item{make_neighborhoods_consistent}{Include cells from outside the neighborhood if they are
at least 10 times in the k-nearest neighbors of the cells inside the neighborhood. Secondly,
remove cells from the neighborhood which are less than 10 times in the k-nearest neighbors of the
other cells in the neighborhood. Default \code{FALSE}}

\item{skip_confounded_neighborhoods}{Sometimes the inferred neighborhoods are not limited to
a single cell state; this becomes problematic if the cells of the conditions compared in the contrast
are unequally distributed between the cell states. Default: \code{FALSE}}

\item{control_parameters}{named list with additional parameters passed to underlying functions.}

\item{verbose}{Should the method print information during the fitting. Default: \code{TRUE}.}
}
\value{
a data frame with one entry per gene
\describe{
\item{\code{name}}{The gene name.}
\item{\code{neighborhood}}{A list column where each element is a vector with the cell names included
in that neighborhood.}
\item{\code{n_cells}}{the number of cells in the neighborhood (\code{lengths(neighborhood)}).}
\item{\code{sel_statistic}}{The statistic that is maximized by the \code{selection_procedure}.}
\item{\code{pval}, \code{adj_pval}, \code{t_statistic}, \code{lfc}}{The p-value, Benjamini-Hochberg adjusted p-value (FDR), the
t-statistic, and the log2 fold change of the differential expression test defined by \code{contrast} for the
cells inside the neighborhood (calculated using \code{test_method}). Only present if \code{test_data} is not \code{NULL}.}
\item{\code{did_pval}, \code{did_adj_pval}, \code{did_lfc}}{The measurement if the differential expression of the cells
inside the neighborhood is significantly different from the differential expression of the cells outside
the neighborhood. Only present if \code{add_diff_in_diff = TRUE}.}
}
}
\description{
Find differential expression neighborhoods
}
\examples{
data(glioblastoma_example_data)
fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition,
             n_emb = 5, verbose = FALSE)
# Optional alignment
# fit <- align_harmony(fit)
fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl"))
nei <- find_de_neighborhoods(fit, group_by = vars(patient_id))
head(nei)

}
