#' Test significant change in Barcode proportion or occurrence
#'  between sample groups
#'
#' `testBarcodeSignif()` tests differential proportion (diffProp) or
#'  occurrence (diffOcc) for each Barcode between sample groups,
#'  with the option account for multiple factors using regression models.
#'  The results can be visualized by [plotBarcodePValue],
#'  [plotSignifBarcodeHeatmap], [plotBarcodeMA], and [plotSignifBarcodeProportion].
#'
#' @param barbieQ A `barbieQ` object created by the [createBarbieQ] function.
#' @param method A string specifying what is to be tested.
#'  Options: 'diffProp' and 'diffOcc'. Defaults to 'diffProp'.
#' @param sampleMetadata A `matrix` or `data.frame` of sample conditions,
#'  where each factor is represented in a separate column. Defaults to NULL,
#'  in which case sample conditions are inherited from `barbieQ$metadata`.
#' @param sampleGroup A string representing the name of a factor from the
#'  sample conditions passed by `barbieQ` or `sampleMetadata`, or a vector of
#'  sample conditions, indicating the primary factor to be tested.
#'  Defaults to the first factor in the sample conditions.
#' @param contrastFormula A string indicating the contrast between sample conditions.
#'  Defaults to contrast bewteen the original levels of conditions in the specified factor.
#' @param designFormula A formula to compute the `designMatrix`, generated by
#'  the [stats::as.formula] function. Defaults to include all factors
#'  in the sample conditions.
#' @param designMatrix A numeric matrix standardizing `sampleMetadata`, generated by
#'  the [stats::model.matrix] function. Defaults to be generated by
#'  `designFormula`.
#' @param block A vector (array) indicating sample duplicates. Defaults to
#'  no duplicates among the samples.
#' @param transformation A string specifying the transformation method when
#'  testing 'diffProp'. Options include: 'asin-sqrt', 'logit', and 'none'.
#'  Defaults to 'asin-sqrt'.
#' @param regularization A string specifying the regularization method when
#'  testing 'diffOcc'. Options: 'firth' and 'none'. Defaults to 'firth'.
#'
#' @return A `barbieQ` object updated with a `testBarcodes` component, adding
#'  a `list` named by the test name, containing:
#'   * `results`: a `data.frame` of statistical test results,
#'    including p-values, etc.
#'   * `methods`: a `list` indicating the specific statistical test method used.
#'   * `targets`: a numeric `matrix` of the standardized design matrix
#'    used in the test.
#'
#' @export
#'
#' @importFrom limma makeContrasts
#' @importFrom magrittr %>%
#' @importFrom dplyr setdiff
#' @importFrom dplyr recode
#' @importFrom stats as.formula
#' @importFrom stats model.matrix
#' @importFrom stats setNames
#' @importClassesFrom SummarizedExperiment SummarizedExperiment
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom SummarizedExperiment colData
#' @importFrom SummarizedExperiment rowData
#' @importFrom SummarizedExperiment assays
#' @importFrom S4Vectors metadata
#'
#' @examples
#' Block <- c(1, 1, 2, 3, 3, 4, 1, 1, 2, 3, 3, 4)
#' Treat <- factor(rep(seq_len(2), each = 6))
#' Time <- rep(rep(seq_len(2), each = 3), 2)
#' nbarcodes <- 50
#' nsamples <- 12
#' count <- abs(matrix(rnorm(nbarcodes * nsamples), nbarcodes, nsamples))
#' rownames(count) <- paste0('Barcode', seq_len(nbarcodes))
#' barbieQ <- createBarbieQ(count, data.frame(Treat = Treat, Time = Time))
#' testBarcodeSignif(barbieQ, sampleGroup = 'Treat')
testBarcodeSignif <- function(barbieQ, method = "diffProp", sampleMetadata = NULL, sampleGroup = NULL,
    contrastFormula = NULL, designFormula = NULL, designMatrix = NULL,
    block = NULL, transformation = "asin-sqrt", regularization = "firth") {

    ## check method: confirm method is chosen from 'diffProp' and 'diffOcc'
    method <- match.arg(method, c("diffProp", "diffOcc"))
    ## extract sampleMetadata and primary effector based on arguments
    sampleMetadata <- extractSampleMetadataAndPrimaryFactor(barbieQ = barbieQ, sampleMetadata = sampleMetadata,
        sampleGroup = sampleGroup)
    primaryFactor <- S4Vectors::metadata(sampleMetadata)$primaryFactor

    ## confirm all effectors (columns) in 'sampleMetadata' are factor() or numeric()
    ## convert columns that are neither factor nor numeric into factor
    nonFac <- vapply(sampleMetadata, function(x) !(is.factor(x) | is.numeric(x)), logical(1))
    for (col in seq(nonFac)[nonFac]) {
        sampleMetadata[, col] <- factor(sampleMetadata[, col])
    }
    ## remove factor columns with only one level - will be problematic in model.matrix
    oneLevelFactors <- vapply(sampleMetadata, function(x) {
        is.factor(x) && length(unique(x)) == 1
    }, logical(1))
    if (any(oneLevelFactors)) {
        sampleMetadata <- sampleMetadata[, !oneLevelFactors]
        message("removing factors with only one level from sampleMetadata: ", paste0(colnames(sampleMetadata)[oneLevelFactors],
            collapse = ", "))
    }

    ## import 'design' using tidy evaluation if designFormula not specified, taking
    ## all effectors from 'sampleMetadata' into account prioritize the column of
    ## sampleGroup to be compared
    if (is.null(designFormula)) {
        otherCols <- dplyr::setdiff(colnames(sampleMetadata), primaryFactor)
        ## create the formula string based on the presence of other columns
        if (length(otherCols) > 0L) {
            formulaStr <- paste("~0 +", primaryFactor, "+", paste(otherCols, collapse = " + "))
        } else {
            formulaStr <- paste("~0 +", primaryFactor)
        }
        ## convert to formula
        designFormula <- stats::as.formula(formulaStr)
    }

    ## check designFormula format
    if (!inherits(designFormula, "formula")) {
        stop("The `designFormula` argument must be a valid `formula`.")
    } else {
        ## check if all variables in designFormula are present in 'sampleMetadata'
        missingTerms <- dplyr::setdiff(all.vars(designFormula), colnames(sampleMetadata))
        if (length(missingTerms) > 0) {
            stop("The following variables in the `designFormula` are missing from `sampleMetadata`:",
                paste(missingTerms, collapse = ", "))
        }
    }

    ## if designMatrix not specified, generate it by designFormula
    if (is.null(designMatrix)) {
        designMatrix <- stats::model.matrix(designFormula, data = sampleMetadata)
    } else {
        ## check designMatrix format and dimension
        if (is.matrix(designMatrix) || is.data.frame(designMatrix)) {
            if (nrow(designMatrix) != nrow(sampleMetadata)) {
                stop("nrow (sample size) don't match between `designMatrix` and `sampleMetadata`.")
            } else {
                ## using specified `designMatrix` and deleting the default
                ## `designFormula` to avoid confusion
                designFormula <- "NA"
            }
        } else {
            stop("`designMatrix` should always be a matrix. use fucntion `model.matrix` to create it.")
        }
    }
    rownames(designMatrix) <- colnames(barbieQ)

    ## make designMatrix full rank by deleting columns of nested effectors, ie.
    ## linearly related vectors compute QR decomposition of the designMatrix
    q <- qr(designMatrix)
    keep <- rep(TRUE, ncol(designMatrix))
    ## select the indices in the pivot vector after the rank of the matrix the columns
    ## of matrix that are linearly dependent (those that do not contribute to the
    ## rank)
    keep[q$pivot[-seq(q$rank)]] <- FALSE
    designMatrix <- designMatrix[, keep, drop = FALSE]
    ## message the users if any linearly related vectors are deleted
    if (any(!keep)) {
        message("deleting ", sum(!keep), " nested factor(s) because `designMatrix` must be full rank.")
    }

    ## check block groups if it's specified
    if (!(is.null(block))) {
        if (length(block) != ncol(barbieQ)) {
            stop("the length of `block` doesn't match the sample size in `sampleMetadata`.")
        }
    }

    ## computing `myContrast` 'primaryFactor' indicates which column relates to
    ## 'sampleGroup': either a imported 'sampleGroup' column or a column name
    ## specified by 'sampleGroup' like 'Treat' check `contrastFormula`
    if (is.null(contrastFormula)) {
        if (is.factor(sampleMetadata[, primaryFactor])) {
            ## case when sampleGroup column is factor
            contrastLevels <- levels(sampleMetadata[, primaryFactor])
            contrastFormula <- paste0(primaryFactor, contrastLevels[2], " - ", primaryFactor,
                contrastLevels[1])
        } else if (is.numeric(sampleMetadata[, primaryFactor])) {
            ## case when sampleGroup column is numeric
            contrastFormula <- primaryFactor
            contrastLevels <- paste0(primaryFactor, c("downtrend", "uptrend"))
        }
        message("setting up contrastFormula: ", contrastFormula)
    }
    ## create `myContrast` based on `contrastFormula`
    mycontrasts <- tryCatch({
        limma::makeContrasts(contrasts = contrastFormula, levels = colnames(designMatrix))
    }, error = function(e) {
        stop("Invalid `contrastFormula`: ", e$message)
    })
    ## extract contrastLevels from formula
    levelHigh <- paste(rownames(mycontrasts)[mycontrasts > 0], collapse = " + ")
    levelLow <- paste(rownames(mycontrasts)[mycontrasts < 0], collapse = " + ")
    if (levelLow == "") {
        ## numeric variable only has one contrast level
        contrastLevels <- matrix(paste0(primaryFactor, c("downtrend", "uptrend")), ncol = 2)
        colnames(contrastLevels) <- c("levelLow", "levelHigh")
    } else {
        contrastLevels <- cbind(levelLow, levelHigh)
    }

    ## dispatch test functions based on the specified method default setting is
    ## 'diffProp'
    if (method == "diffProp") {
        statsDf <- testDiffProp(proportion = SummarizedExperiment::assays(barbieQ)$proportion,
            count = SummarizedExperiment::assay(barbieQ),
            transformation = transformation, mycontrasts = mycontrasts[, 1], designMatrix = designMatrix,
            block = block)

        ## save as a DFrame
        statsDf <- S4Vectors::DataFrame(statsDf)
        S4Vectors::metadata(statsDf)$method <- method
        S4Vectors::metadata(statsDf)$design <- designMatrix
        S4Vectors::metadata(statsDf)$contrasts <- mycontrasts
        S4Vectors::metadata(statsDf)$contrastGroups <- contrastLevels[1, ]
        S4Vectors::metadata(statsDf)$transformation <- transformation
        S4Vectors::metadata(statsDf)$adj.P.method <- "BH"

    } else if (method == "diffOcc") {
        ## logistic regression, default regularization is 'firth'
        statsList <- testDiffOcc(SummarizedExperiment::assays(barbieQ)$occurrence, regularization = regularization,
            mycontrasts = mycontrasts[, 1], designMatrix = designMatrix)
        statsDf <- statsList$statsDf

        ## save as a DFrame
        statsDf <- S4Vectors::DataFrame(statsDf)
        S4Vectors::metadata(statsDf)$method <- method
        S4Vectors::metadata(statsDf)$pseudo.design <- designMatrix
        S4Vectors::metadata(statsDf)$design <- statsList$design
        S4Vectors::metadata(statsDf)$pseudo.contrasts <- mycontrasts
        S4Vectors::metadata(statsDf)$contrastVariables <- statsList$variables
        S4Vectors::metadata(statsDf)$contrastGroups <- contrastLevels[1, ]
        S4Vectors::metadata(statsDf)$regularization <- paste0(regularization, " regularization on likelihood")
        S4Vectors::metadata(statsDf)$adj.P.method <- "BH"

    }

    ## compute increasing tendency to which group
    statsDf$tendencyTo <- dplyr::recode(as.character(statsDf$direction), `1` = contrastLevels[1,
        "levelHigh"], `-1` = contrastLevels[1, "levelLow"], `0` = "n.s.") %>%
        as.vector()

    ## save testing results to rowData of `barbieQ`
    SummarizedExperiment::rowData(barbieQ)$testingBarcode <- statsDf

    ## assign colors for the test results
    S4Vectors::metadata(barbieQ)$factorColors$testingBarcode <- stats::setNames(c("#33AAFF",
        "#FF5959", "#FFC000"), c(contrastLevels[1, "levelLow"], contrastLevels[1, "levelHigh"],
        "n.s."))

    return(barbieQ)
}
