% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/COTAN-getters.R, R/COTAN-modifiers.R,
%   R/establishGenesClusters.R, R/genesStatistics.R
\name{getGDI,COTAN-method}
\alias{getGDI,COTAN-method}
\alias{getGDI}
\alias{storeGDI,COTAN-method}
\alias{storeGDI}
\alias{GenesStatistics}
\alias{genesCoexSpace}
\alias{establishGenesClusters}
\alias{calculateGenesCE}
\alias{calculateGDIGivenS}
\alias{calculateGDIGivenCorr}
\alias{calculateGDI}
\alias{calculatePValue}
\alias{calculatePDI}
\title{Calculations of genes statistics}
\usage{
\S4method{getGDI}{COTAN}(objCOTAN)

\S4method{storeGDI}{COTAN}(objCOTAN, genesGDI)

genesCoexSpace(objCOTAN, primaryMarkers, numGenesPerMarker = 25L)

establishGenesClusters(
  objCOTAN,
  groupMarkers,
  numGenesPerMarker = 25L,
  kCuts = 6L,
  distance = "cosine",
  hclustMethod = "ward.D2"
)

calculateGenesCE(objCOTAN)

calculateGDIGivenS(S, rowsFraction = 0.05, cores = 1L, chunkSize = 1024L)

calculateGDIGivenCorr(corr, numDegreesOfFreedom, rowsFraction = 0.05)

calculateGDI(
  objCOTAN,
  statType = "S",
  rowsFraction = 0.05,
  cores = 1L,
  chunkSize = 1024L
)

calculatePValue(
  objCOTAN,
  statType = "S",
  geneSubsetCol = vector(mode = "character"),
  geneSubsetRow = vector(mode = "character")
)

calculatePDI(
  objCOTAN,
  statType = "S",
  geneSubsetCol = vector(mode = "character"),
  geneSubsetRow = vector(mode = "character")
)
}
\arguments{
\item{objCOTAN}{a \code{COTAN} object}

\item{genesGDI}{the named genes' \code{GDI} \code{array} to store or the output
\code{data.frame} of the function \code{\link[=calculateGDI]{calculateGDI()}}}

\item{primaryMarkers}{A vector of primary marker names.}

\item{numGenesPerMarker}{the number of correlated genes to keep as other
markers (default 25)}

\item{groupMarkers}{a named \code{list} with an element for each group comprised
of one or more marker genes}

\item{kCuts}{the number of estimated \emph{cluster} (this defines the height for
the tree cut)}

\item{distance}{type of distance to use. Default is \code{"cosine"}. Can be chosen
among those supported by \code{\link[parallelDist:parDist]{parallelDist::parDist()}}}

\item{hclustMethod}{default is "ward.D2" but can be any method defined by
\code{\link[stats:hclust]{stats::hclust()}} function}

\item{S}{a \code{matrix} object}

\item{rowsFraction}{The fraction of rows that will be averaged to calculate
the \code{GDI}. Defaults to \eqn{5\%}}

\item{cores}{number of cores to use. Default is 1.}

\item{chunkSize}{number of elements to solve in batch in a single core.
Default is 1024.}

\item{corr}{a \code{matrix} object, possibly a subset of the columns of the full
symmetric matrix}

\item{numDegreesOfFreedom}{a \code{int} that determines the number of degree of
freedom to use in the \eqn{\chi^{2}} test}

\item{statType}{Which statistics to use to compute the p-values. By default
it will use the "S" (Pearson's \eqn{\chi^{2}} test) otherwise the "G"
(G-test)}

\item{geneSubsetCol}{an array of genes. It will be put in columns. If left
empty the function will do it genome-wide.}

\item{geneSubsetRow}{an array of genes. It will be put in rows. If left empty
the function will do it genome-wide.}
}
\value{
\code{getGDI()} returns the genes' \verb{GDI`` array if available or }NULL`
otherwise

\code{storeGDI()} returns the given \code{COTAN} object  with updated \code{GDI}
genes' information

\code{genesCoexSpace()} returns a \code{list} with:
\itemize{
\item \code{"SecondaryMarkers"} a named \code{list} that for each secondary marker,
gives the \code{list} of primary markers that selected for it
\item \code{"GCS"} the relevant subset of \code{COEX} \code{matrix}
\item \code{"rankGenes"} a \code{data.frame} with the rank of each gene according to its
\emph{p-value}
}

\code{establishGenesClusters()} a \code{list} of:
\itemize{
\item \code{"g.space"} the genes' \code{COEX} space \code{data.frame}
\item \code{"plot.eig"} the eigenvalues plot
\item \code{"pca_clusters"} the \code{PCA} components \code{data.frame}
\item \code{"tree_plot"} the tree plot for the genes' \code{COEX} space
}

\code{calculateGenesCE()} returns a named \code{array} with the \emph{cross-entropy}
of each gene

\code{calculateGDIGivenS()} returns a \code{vector} with the \code{GDI} data for
each column of the input

\code{calculateGDIGivenCorr()} returns a \code{vector} with the \code{GDI} data for
each column of the input

\code{calculateGDI()} returns a \code{data.frame} with:
\itemize{
\item \code{"sum.raw.norm"} the sum of the normalized data rows
\item \code{"GDI"} the \code{GDI} data
\item \code{"exp.cells"} the percentage of cells expressing the gene
}

\code{calculatePValue()} returns a \emph{p-value} \code{matrix} as \code{dspMatrix}

\code{calculatePDI()} returns a \emph{Pair Differential Index} \code{matrix} as
\code{dspMatrix}
}
\description{
A collection of functions returning various statistics
associated to the genes. In particular the \emph{discrepancy} between the
expected probabilities of zero and their actual occurrences, both at single
gene level or looking at genes' pairs

To make the \code{GDI} more specific, it may be desirable to restrict
the set of genes against which \code{GDI} is computed to a selected subset, with
the recommendation to include a consistent fraction of cell-identity genes,
and possibly focusing on markers specific for the biological question of
interest (for instance neural cortex layering markers). In this case we
denote it as \emph{Local Differentiation Index} (\code{LDI}) relative to the selected
subset.
}
\details{
\code{getGDI()} extracts the genes' \code{GDI} array as it was stored by the
method \code{\link[=storeGDI]{storeGDI()}}

\code{storeGDI()} stored and already calculated genes' \code{GDI} \code{array} in a
\code{COTAN} object. It can be retrieved using the method \code{\link[=getGDI]{getGDI()}}

\code{genesCoexSpace()} calculates genes groups based on the primary
markers and uses them to prepare the genes' \code{COEX} space \code{data.frame}.

\code{establishGenesClusters()} perform the genes' clustering based on a
pool of gene markers, using the genes' \code{COEX} space

\code{calculateGenesCE()} is used to calculate the discrepancy between
the expected probability of zero and the observed zeros across all cells
for each gene as \emph{cross-entropy}: \eqn{-\sum_{c}{\mathbb{1}_{X_c == 0}
   \log(p_c) - \mathbb{1}_{X_c != 0} \log(1 - p_c)}} where \eqn{X_c} is the
observed count and \eqn{p_c} the probability of zero

\code{calculateGDIGivenS()} produces a \code{vector} with the \code{GDI} for each
column based on the \code{S} matrix (\emph{Pearson's \emph{\eqn{\chi^{2}}} test})

\code{calculateGDIGivenCorr()} produces a \code{vector} with the \code{GDI} for
each column based on the given correlation matrix, using the \emph{Pearson's
\emph{\eqn{\chi^{2}}} test}

\code{calculateGDI()} produces a \code{data.frame} with the \code{GDI} for each
gene based on the \code{COEX} matrix

\code{calculatePValue()} computes the p-values for genes in the \code{COTAN}
object. It can be used genome-wide or by setting some specific genes of
interest. By default it computes the \emph{p-values} using the \code{S} statistics
(\eqn{\chi^{2}})

\code{calculatePDI()} computes the p-values for genes in the \code{COTAN}
object using \code{\link[=calculatePValue]{calculatePValue()}} and takes their
\eqn{\log{({-\log{(\cdot)}})}} to calculate the genes' \emph{Pair Differential
Index}
}
\examples{
data("test.dataset")
objCOTAN <- COTAN(raw = test.dataset)
objCOTAN <- proceedToCoex(objCOTAN, cores = 6L, saveObj = FALSE)

markers <- getGenes(objCOTAN)[sample(getNumGenes(objCOTAN), 10)]
GCS <- genesCoexSpace(objCOTAN, primaryMarkers = markers,
                      numGenesPerMarker = 15)

groupMarkers <- list(G1 = c("g-000010", "g-000020", "g-000138"),
                     G2 = c("g-000300"),
                     G3 = c("g-000510", "g-000530", "g-000550",
                            "g-000570", "g-000590"))

resList <-  establishGenesClusters(objCOTAN, groupMarkers = groupMarkers,
                                   numGenesPerMarker = 11)

}
