% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/testDA_censoredGLMM.R
\name{testDA_censoredGLMM}
\alias{testDA_censoredGLMM}
\title{Test for differential abundance: method 'censcyt-DA-censored-GLMM'}
\usage{
testDA_censoredGLMM(
  d_counts,
  formula,
  contrast,
  mi_reps = 10,
  imputation_method = c("km", "km_exp", "km_wei", "km_os", "rs", "mrl", "cc", "pmm"),
  min_cells = 3,
  min_samples = NULL,
  normalize = FALSE,
  norm_factors = "TMM",
  BPPARAM = BiocParallel::SerialParam(),
  verbose = FALSE
)
}
\arguments{
\item{d_counts}{\code{\link{SummarizedExperiment}} object containing cluster cell
counts, from \code{\link[diffcyt]{calcCounts}}.}

\item{formula}{Model formula object, see \code{\link[diffcyt]{testDA_GLMM}} and for more 
details \code{\link[diffcyt]{createFormula}}. Be aware of the special format required 
for the censored covariate: instead of just the covariate name (e.g. 'X') the 
columnname of the data being an event indicator (e.g. 'I', with 'I' = 1 if 
'X' is observed and 'I' = 0 if 'X' is censored, ) needs to specified as well. 
The notation in the formula is then 'Surv(X,I)'.}

\item{contrast}{Contrast matrix, created with \code{\link[diffcyt]{createContrast}}. See
\code{\link[diffcyt]{createContrast}} for details.}

\item{mi_reps}{number of imputations in multiple imputation. default = 10.}

\item{imputation_method}{which method should be used in the imputation step. One of
'km','km_exp','km_wei','km_os', 'rs', 'mrl', 'cc', 'pmm'. See details. default = 'km'.}

\item{min_cells}{Filtering parameter. Default = 3. Clusters are kept for differential
testing if they have at least \code{min_cells} cells in at least \code{min_samples}
samples.}

\item{min_samples}{Filtering parameter. Default = \code{number of samples / 2}, which
is appropriate for two-group comparisons (of equal size). Clusters are kept for
differential testing if they have at least \code{min_cells} cells in at least
\code{min_samples} samples.}

\item{normalize}{Whether to include optional normalization factors to adjust for
composition effects (see details). Default = FALSE.}

\item{norm_factors}{Normalization factors to use, if \code{normalize = TRUE}. Default =
\code{"TMM"}, in which case normalization factors are calculated automatically using
the 'trimmed mean of M-values' (TMM) method from the \code{edgeR} package.
Alternatively, a vector of values can be provided (the values should multiply to 1).}

\item{BPPARAM}{specify parallelization option as one of 
\code{\link{BiocParallelParam}} if 'BiocParallel' is available
otherwise no parallelization.
e.g. \code{\link[BiocParallel]{MulticoreParam-class}}(workers=2) for parallelization 
with two cores. Default is \code{\link[BiocParallel]{SerialParam-class}}()
(no parallelization).}

\item{verbose}{Logical.}
}
\value{
Returns a new \code{\link{SummarizedExperiment}} object, with differential test
  results stored in the \code{rowData} slot. Results include raw p-values
  (\code{p_val}) and adjusted p-values (\code{p_adj}), which can be used to rank
  clusters by evidence for differential abundance. The results can be accessed with the
  \code{\link{rowData}} accessor function.
}
\description{
Calculate tests for differential abundance of cell populations using method
'censcyt-DA-censored-GLMM'
}
\details{
Calculates tests for differential abundance of clusters, using generalized linear mixed
models (GLMMs) where a covariate is subject to right censoring.


 The same underlying testing as described in \code{\link[diffcyt]{testDA_GLMM}} is 
 applied here. The main difference is that multiple imputation is used to 
 handle a censored covariate. In short, multiple imputation consists of three
 steps: imputation, analysis and pooling. In the imputation step multiple complete
 data sets are generated by imputation. The imputed data is then analysed in 
 the second step and the results are combined in the third step. See also \code{\link[mice]{pool}}.
 The imputation in the first step is specific for censored data in contrast to 
 the 'normal' use of multiple imputation where data is missing. 
 Alternatively the samples with censored data can be removed (complete case analysis) 
 or the censored values can be treated as missing (predictive mean matching).
 
 Possible imputation methods in argument 'imputation_method' are:
\describe{
  \item{'km'}{Kaplan Meier imputation is similar to 'rs' (Risk set imputation) 
              but the random draw is according to the survival function of
              the respective risk set. The largest value is treated as observed
              to obtain a complete survival function. (Taylor et al. 2002)}
  \item{'km_exp'}{The same as 'km' but if the largest value is censored the 
             tail of the survival function is modeled as an exponential 
             distribution where the rate parameter is obtained by fixing
             the distribution to the last observed value. 
             See (Moeschberger and Klein, 1985).}
  \item{'km_wei'}{The same as 'km' but if the largest value is censored the 
             tail of the survival function is modeled as an weibull 
             distribution where the parameters are obtained by MLE fitting on
             the whole data. See (Moeschberger and Klein, 1985).}
  \item{'km_os'}{The same as 'km' but if the largest value is censored the 
             tail of the survival function is modeled by order statistics. 
             See (Moeschberger and Klein, 1985).}
  \item{'rs'}{Risk Set imputation replaces the censored values with a random
              draw from the risk set of the respective censored value. (Taylor et al. 2002)}
  \item{'mrl'}{Mean Residual Life (Conditional multiple imputation, See Atem
               et al. 2017) is a multiple imputation procedure that bootstraps 
               the data and imputes the censored values by replacing them with their 
               respective mean residual life.}
  \item{'cc'}{complete case (listwise deletion) analysis removes incomlete samples.}
  \item{'pmm'}{predictive mean matching treats censored values as missing and
               uses predictive mean matching from \code{\link[mice]{mice}}.}
}
}
\examples{
# create small data set with 2 differential clusters with 10 samples.
d_counts <- simulate_multicluster(alphas = runif(10,1e4,1e5),
                                  sizes = runif(10,1e4,1e5),
                                  nr_diff = 2,
                                  group=2,
                                  return_summarized_experiment = TRUE)$counts

# extract covariates data.frame
experiment_info <- SummarizedExperiment::colData(d_counts)
# add censoring
experiment_info$status <- sample(c(0,1),size=10,replace = TRUE,prob = c(0.3,0.7))
experiment_info$covariate[experiment_info$status == 0] <-
  runif(10-sum(experiment_info$status),
        min=0,
        max=experiment_info$covariate[experiment_info$status == 0])

# create model formula object
da_formula <- createFormula(experiment_info,
                            cols_fixed = c("covariate", "group_covariate"),
                            cols_random = "sample",event_indicator = "status")

# create contrast matrix
contrast <- diffcyt::createContrast(c(0, 1, 0))

# run testing with imputation method 'km'
outs <- testDA_censoredGLMM(d_counts = d_counts, formula = da_formula,
                            contrast = contrast, mi_reps = 2, imputation_method = "km")
diffcyt::topTable(outs)
# differential clusters:
which(!is.na(SummarizedExperiment::rowData(d_counts)$paired))

}
\references{
{
 A Comparison of Several Methods of Estimating the Survival Function When 
 There is Extreme Right Censoring (M. L. Moeschberger and John P. Klein, 1985)
 
 Improved conditional imputation for linear regression with a randomly 
 censored predictor (Atem et al. 2017)
 
 Survival estimation and testing via multiple imputation (Taylor et al. 2002)
 }
}
