#' @title Build predictor and response blocks with superpathway input
#'
#' @description
#' Builds the predictor block and matrix response for its fit in asmbPLS-DA
#'
#' @param object A superpathway input list object
#' @rdname matrixToBlock-method
#'
#' @returns A list containing the predictor block, response matrix, dimension of
#' each block and observed gene sets with respect to gene_sets_celltype for the
#' original pseudobulk_lognorm matrix, for its use in asmbPLS-DA fit
#'
#' @import checkmate stats
#' @export
#' @examples
#' file <- system.file("extdata", "example_superpathway_input.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_input
#' matrixToBlock(data)
matrixToBlock <- function(object){
    check_superpathway_input(superpathway_info = object$superpathway_info,
                             hyperparameters_info = object$hyperparameters_info,
                             pseudobulk_lognorm = object$pseudobulk_lognorm,
                             sample_id = object$sample_id,
                             sample_class = object$sample_class,
                             base_class = object$base_class,
                             target_class = object$target_class
                             )
    matrix <- object$pseudobulk_lognorm
    split_all <- strsplit(rownames(object$pseudobulk_lognorm), "_")
    all_samples <- vapply(split_all, `[`, 2, FUN.VALUE = "")
    matrix <- add_missing_psb_rows(mat=matrix,
                                   celltypes=object$superpathway_info$celltypes,
                                   sample_ids = unique(all_samples))
    aux <-  base::do.call(base::rbind, base::strsplit(rownames(matrix),"_"))[,1]
    keep <- aux %in% object$superpathway_info$celltypes
    matrix <- matrix[keep, ]
    gene_sets_celltype <- object$superpathway_info$gene_sets_celltype
    observed_gene_sets <- base::lapply(gene_sets_celltype, function(x)
        intersect(x, colnames(matrix)))
    block_dim <- vapply(observed_gene_sets, length, FUN.VALUE = numeric(1))
    if(all(block_dim > 0)){
        split <- base::do.call(base::rbind, base::strsplit(rownames(matrix),
                                                           "_"))
        block_celltypes <- split[, 1]
        block_sample_id <- split[, 2]
        block_predictor <- matrix(ncol = sum(block_dim),
                                  nrow = length(unique(block_sample_id)))
        rownames(block_predictor) <- unique(block_sample_id)
        colnames(block_predictor) <- as.vector(unlist(Map(
            function(prefix, value)
                paste0(prefix, "*", value),unique(block_celltypes),
            observed_gene_sets)))
        # We use Reduce() to update block_predictor accumulatively
        block_predictor <- base::Reduce(function(bm, i){
            update_block(unique(block_celltypes)[i], observed_gene_sets[[i]],
                         bm, matrix = matrix)
        }, seq_along(unique(block_celltypes)), init = block_predictor)
    }else{
        stop("Check that at least one gene exists in your pseudobulk_lognorm
              matrix for each cell-type gene sets.")
    }
    categories_class <- base::factor(object$sample_class)
    categories_class <- stats::relevel(categories_class,ref = object$base_class)
    matrix_response <- stats::model.matrix(~ categories_class -1)
    if(object$hyperparameters_info$outcome_type == "binary"){
        matrix_response <- matrix_response[ , 2:ncol(matrix_response),
                                            drop = FALSE]
    }else{
        matrix_response <- matrix_response[ , , drop = FALSE]
    }
    block_predictor <- block_predictor[order(rownames(block_predictor)), ]
    output <- list(block_predictor = block_predictor,
                   matrix_response = matrix_response, block_dim = block_dim,
                   observed_gene_sets = observed_gene_sets)
    return(output)
}

#' @title Compute performance metrics of predicted asmbPLSDA
#'
#' @param Y_predict Predicted matrix from asmbPLSDA
#' @param Y_true True class used to fit asmbPLSDA
#' @param outcome.type Outcome type either `"binary"` or `"multiclass"`
#'
#' @returns
#' A vector with accuracy, balanced accuracy, precision, recall and F1 metric
#' @export
#' @examples
#' Results_comparison_measure(c(1,0,1,0,1), c(0,0,1,1,1),
#' outcome.type = "binary")
Results_comparison_measure <- function(Y_predict,
                                        Y_true,
                                        outcome.type= c("binary","multiclass")){
    Y_col <- length(as.vector(Y_true))
    n_match <- n_TP <- n_TN <- n_FP <- n_FN <-
    balanced_accuracy_multicalss <- n_recall_multiclass <- 0
    for (i in seq_len(Y_col)) {
        temp_accu <- which(as.vector(Y_predict)[i] == as.vector(Y_true)[i])
        temp_TP <- which(as.vector(Y_predict)[i] == 1 &&
                            as.vector(Y_true)[i] == 1)
        temp_TN <- which(as.vector(Y_predict)[i] == 0 &&
                            as.vector(Y_true)[i] == 0)
        temp_FP <- which(as.vector(Y_predict)[i] == 1 &&
                            as.vector(Y_true)[i] == 0)
        temp_FN <- which(as.vector(Y_predict)[i] == 0 &&
                            as.vector(Y_true)[i] == 1)
        n_temp_TP <- length(temp_TP)
        n_temp_TN <- length(temp_TN)
        n_temp_FP <- length(temp_FP)
        n_temp_FN <- length(temp_FN)
        n_match <- n_match + temp_accu
        n_TP <- n_TP + n_temp_TP
        n_TN <- n_TN + n_temp_TN
        n_FP <- n_FP + n_temp_FP
        n_FN <- n_FN + n_temp_FN
        n_recall_multiclass <- n_temp_TP/(n_temp_TP + n_temp_FN)
        balanced_accuracy_multicalss <- balanced_accuracy_multicalss +
            n_recall_multiclass
    }
    accuracy <- (n_TP + n_TN)/(n_TP + n_TN + n_FP + n_FN)
    precision <- n_TP/(n_TP + n_FP)
    recall <- n_TP/(n_TP + n_FN) # Sensitivity also
    specificity <- n_TN/(n_TN + n_FP)
    F1 <- 2 * precision * recall/(precision + recall)
    balanced_accuracy <- 0
    balanced_accuracy_binary <- (recall + specificity)/2
    balanced_accuracy_multicalss <- balanced_accuracy_multicalss/Y_col
    if (outcome.type == "binary") {
        balanced_accuracy <- balanced_accuracy_binary
    }else if(outcome.type == "multiclass"){
        balanced_accuracy <- balanced_accuracy_multicalss
    }
    if(outcome.type == "binary" && length(unique(Y_predict))==2 && is.nan(F1)){
        F1 <- 0 # recall + precision = 0 bad classification all classes present
    }
    output <- c("accuracy" = accuracy, "balanced_accuracy" = balanced_accuracy,
                "precision" = precision, "recall" = recall, "F1" = F1)
    return(output)
}

#' @title Leave-one-out Cross-validation
#'
#' @param X.matrix Predictor block matrix from \code{matrixToBlock}
#' @param Y.matrix Response matrix from \code{matrixToBlock}
#' @param PLS_term An integer with the number of PLS components to use passed
#' from hyperparameter list
#' @param X.dim A list with the observed gene set size for each cell type
#' from \code{matrixToBlock}
#' @param quantile.comb.table A matrix with the quantile comb table passed
#' from hyperparameters list object
#' @param outcome.type A character indicating `binary` or `multiclass` passed
#' from hyperparameters list object
#' @param Method A parameter passed from \code{fitOptimal}
#' @param measure A parameter passed from \code{fitOptimal}
#' @param parallel A parameter passed from \code{fitOptimal}
#' @param expected.measure.increase A parameter passed from \code{fitOptimal}
#' @param center A parameter passed from \code{fitOptimal}
#' @param scale A parameter passed from \code{fitOptimal}
#' @param maxiter A parameter passed from \code{fitOptimal}
#' @returns
#' A list containing the optimal quantiles for each PLS component and the
#' optimal number of PLS components.
#' @export
#'
#' @examples
#' file <- system.file("extdata", "example_superpathway_input.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_input
#' matrices <- matrixToBlock(data)
#' X.matrix <- matrices$block_predictor
#' Y.matrix <- matrices$matrix_response
#' X.dim <- matrices$block_dim
#' quantile.comb.table <- data$hyperparameters_info$quantile_comb_table
#' outcome.type <- data$hyperparameters_info$outcome_type
#' asmbPLSDA.cv.loo(X.matrix, Y.matrix, PLS_term = 1, X.dim,quantile.comb.table,
#' Method = NULL, measure = "B_accuracy", parallel = TRUE,
#' outcome.type = outcome.type, expected.measure.increase = 0.005,
#' center = TRUE, scale = TRUE,maxiter = 100)
asmbPLSDA.cv.loo <- function(X.matrix, Y.matrix, PLS_term = 1, X.dim,
                            quantile.comb.table, outcome.type =
                            c("binary", "multiclass"), Method = NULL,
                            measure = "B_accuracy", parallel = FALSE,
                            expected.measure.increase = 0.005,
                            center = TRUE,scale = TRUE, maxiter = 100){
    n_group <- ncol(Y.matrix)
    measure_selected <- get_measure_index(measure)
    K <- nrow(Y.matrix)
    n_quantile_comb <- nrow(quantile.comb.table)
    quantile_table_CV <- matrix(data = 0, nrow = PLS_term,
                                ncol = (length(X.dim) + 5))
    for (i in seq_len(PLS_term)) {
        results_CV_summary_n <- matrix(0, nrow = n_quantile_comb, ncol = K)
        F_matrix_validation_bind <- matrix(0, nrow = n_quantile_comb, ncol = K)
        if (parallel) {
            results <- execute_parallel_cv(
                K, results_CV_summary_n, F_matrix_validation_bind,
                X.matrix, Y.matrix, i, X.dim, quantile.comb.table, outcome.type,
                quantile_table_CV, Method, measure, expected.measure.increase,
                center, scale, maxiter)
            results_CV_summary_n <- results$results_CV_summary_n
            F_matrix_validation_bind <- results$F_matrix_validation_bind
        } else {
            results <- execute_sequential_cv(
                K, n_quantile_comb, results_CV_summary_n,
                F_matrix_validation_bind,X.matrix, Y.matrix, i, X.dim,
                quantile.comb.table, outcome.type, quantile_table_CV,measure,
                expected.measure.increase, center, scale, maxiter, Method)
            results_CV_summary_n <- results$results_CV_summary_n
            F_matrix_validation_bind <- results$F_matrix_validation_bind
        }
        measure_acc <- performance_measures(n_quantile_comb,
                                            results_CV_summary_n,
                                            F_matrix_validation_bind,
                                            outcome.type,
                                            measure_selected)
        index_max_measure <- which.max(measure_acc)
        quantile_table_CV[i, seq_len(length(X.dim))] <-
            quantile.comb.table[index_max_measure, ]
        quantile_table_CV <- compute_final_measures(
            K, X.matrix, Y.matrix, i, X.dim, quantile_table_CV, outcome.type,
            center, scale, maxiter, Method)
    }
    optimal_nPLS <- select_optimal_PLS(PLS_term, quantile_table_CV, X.dim,
                                    measure_selected, expected.measure.increase)
    return(list("quantile_table_CV" = quantile_table_CV,
                "optimal_nPLS" = optimal_nPLS))
}

#' @title K‐fold × Repeated Cross‐Validation for asmbPLS-DA
#'
#' @description
#' Implements stratified K‐fold cross‐validation with repetitions, mirroring the
#' structure of \code{asmbPLSDA.cv.loo} but using K k and ncv instead of LOO.
#'
#' @param X.matrix Predictor matrix (n×p)
#' @param Y.matrix Response one‐hot matrix (n×q)
#' @param PLS_term Integer: maximum number of PLS components
#' @param X.dim Vector: feature counts per block
#' @param quantile.comb.table Matrix (C×length(X.dim)): quantile combinations
#' @param k Integer: number of CV k (K)
#' @param ncv Integer: number of ncv
#' @param outcome.type "binary" or "multiclass"
#' @param Method Prediction method
#' @param measure "B_accuracy","accuracy","precision","recall","F1"
#' @param parallel Logical: TRUE to parallelize per-fold
#' @param expected.measure.increase Numeric: min performance gain to add PLS
#' @param center Logical: center predictors
#' @param scale Logical: scale predictors
#' @param maxiter Integer: max iterations for asmbPLSDA.fit
#' @return A list with:
#'   \item{quantile_table_CV}{Matrix (PLS_term × (blocks + metrics)) of optimal
#'   quantiles and CV metrics}
#'   \item{optimal_nPLS}{Integer: selected number of PLS components}
#'   \item{splits}{List of length (k*ncv) of train/validation splits}
#' @export
#' @examples
#' # example code
#' file <- system.file("extdata", "example_superpathway_input.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_input
#' matrices <- matrixToBlock(data)
#' X.matrix <- matrices$block_predictor
#' Y.matrix <- matrices$matrix_response
#' X.dim <- matrices$block_dim
#' quantile.comb.table <- data$hyperparameters_info$quantile_comb_table
#' quantile.comb.table <- rbind(quantile.comb.table, c(0.1, 0.2)) # Add 2 cases
#' outcome.type <- data$hyperparameters_info$outcome_type
#' asmbPLSDA.cv.kcv(X.matrix, Y.matrix, PLS_term = 1,
#' X.dim,quantile.comb.table,Method = NULL, measure = "B_accuracy",
#' parallel = TRUE, outcome.type = outcome.type,
#' expected.measure.increase = 0.005, center = TRUE, scale = TRUE,
#' maxiter = 100)
asmbPLSDA.cv.kcv <- function(
        X.matrix, Y.matrix, PLS_term = 2, X.dim,
        quantile.comb.table, k = 4, ncv = 10,
        outcome.type = c("binary","multiclass"),
        Method = NULL, measure = "B_accuracy",
        parallel = FALSE,
        expected.measure.increase = 0.005,
        center = TRUE, scale = TRUE, maxiter = 100
) {
    metrics <- c("accuracy","balanced_accuracy","precision","recall","F1")
    m_idx <- get_measure_index(measure)
    C <- nrow(quantile.comb.table)
    splits <- make_splits_R(Y.matrix, k, ncv)
    cv_results <- array(NA_real_, c(C, k*ncv, length(metrics)),
                        dimnames=list(NULL,NULL,metrics))
    qtCV <- matrix(NA_real_, nrow=PLS_term, ncol=length(X.dim)+length(metrics),
                    dimnames=list(NULL, c(paste0("block", seq_along(X.dim)),
                    metrics)))
    prev_qc <- NULL
    for(i in seq_len(PLS_term)) {
        for(s in seq_along(splits)){
            X.matrix_imp <- impute_split_mfa(X.matrix, splits[[s]], X.dim)
            for(l in seq_len(C)) {
                qc_mat <- if(i>1) rbind(prev_qc, quantile.comb.table[l,]) else
                    matrix(quantile.comb.table[l,],1)
                cv_results[l,s,] <- eval_split_combo_R(
                    X.matrix_imp, Y.matrix, splits[[s]], qc_mat, i,
                    X.dim, Method, measure, outcome.type,
                    center, scale, maxiter, metrics
                )
            }
        }
        avg_sel <- rowMeans(cv_results[,,m_idx], na.rm=TRUE)
        best_l <- which.max(avg_sel)
        qtCV[i, seq_len(length(X.dim))] <- quantile.comb.table[best_l,]
        qtCV[i, length(X.dim)+1] <- avg_sel[best_l]
        qtCV[i, (length(X.dim)+1):ncol(qtCV)] <- colMeans(cv_results[best_l,,],
                                                            na.rm=TRUE)
        prev_qc <- qtCV[seq_len(i), seq_len(length(X.dim)), drop=FALSE]
    }
    optPLS <- select_optimal_PLS(
        PLS_term = PLS_term, quantile_table_CV = qtCV,
        X.dim = X.dim, measure_selected = m_idx,
        expected.measure.increase = expected.measure.increase
    )
    list(quantile_table_CV = qtCV, optimal_nPLS = optPLS, splits = splits)
}

#' @title Compute Cell Importance Projection (CIP) and Gene Importance
#' Projection (GIP)
#' @description
#' Computes CIP and GIP metrics from a superpathway fit model list for the
#' target class
#'
#' @param object A superpathway fit model list
#'
#' @returns
#' A list with the CIP and GIP metrics for all cell types. The metrics are
#' computed for the target class.
#' @import checkmate stringr
#' @export
#' @examples
#' file <- system.file("extdata", "example_superpathway_fit_model.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_fit_model
#' CIP_GIP(data)
CIP_GIP <- function(object){
    check_fit_model(object$superpathway_input, object$hyperparameters_fit,
                    object$model_fit, object$model_validation)
    # Identify target class position
    class_position <- stringr::str_replace(
        colnames(object$model_fit$response_matrix), ".*categories_class", "")
    target_class_position <- which(
        class_position == object$superpathway_input$target_class)
    model <- object$model_fit$`asmbPLS-DA`
    # Loadings
    w_super <- model$X_super_weight^2
    q <- model$Y_weight[target_class_position, , drop = FALSE]
    nblocks <- length(object$model_fit$observed_gene_sets)
    # Compute CIP
    CIP <- (w_super %*% t(q))/sum(q)
    rownames(CIP) <- object$superpathway_input$superpathway_info$celltypes
    # Compute GIP
    GIP <- vector("list", nblocks)
    names(GIP) <- object$superpathway_input$superpathway_info$celltypes
    for(b in seq_len(nblocks)){
        w <- model$X_weight[[b]]^2
        GIP[[b]] <- (w %*% t(q))/sum(q)
        rownames(GIP[[b]]) <- rownames(w)
    }
    return(list("GIP" = GIP, "CIP" = CIP))
}

#' @title Mann-Whitney Wilcoxon test p-value
#'
#' @param ref_distr A vector with the reference distribution
#' @param null_distr A vector with the null distribution
#' @param ... Other parameters to be passed onto wilcox.test
#' @returns
#' A pvalue with the Mann-Whitney Wilcoxon test with the "greater" as the
#' alternative hypothesis
#'
#' @import stats
#' @export
#' @examples
#' ref_distr <- rnorm(100, mean = 30, sd = 2)
#' null_distr <- rnorm(100, mean = 0, sd = 1)
#' wilcox_CIP_GIP(ref_distr, null_distr)
wilcox_CIP_GIP <- function(ref_distr, null_distr, ...){
    return(stats::wilcox.test(
        ref_distr, null_distr, alternative = "greater", ...)$p.value)
}

#' @title Permutation test for asmbPLSDA global significance for LOO
#'
#' @description
#' Performs permutation testing for asmbPLS-DA to evaluate model validity.
#'
#' @param object A superpathway fit model list.
#' @param npermut Number of permutations (default: 100).
#' @param nbObsPermut Number of samples to permute per iteration
#' (default: NULL).
#' @param Nc Number of samples dropped per permutation (default: 1).
#' @param CV_error Cross-validation error of the fitted model.
#' @param measure Accuracy measure (`"F1"`, `"accuracy"`, `"B_accuracy"`,
#' `"precision"`, `"recall"`, default: `"B_accuracy"`).
#' @param Method Decision rule for prediction (default: NULL).
#' @param maxiter Maximum iterations (default: 100).
#'
#' @return A list with permutation statistics, p-value, and confidence
#' intervals.
#' @export
#' @examples
#' file <- system.file("extdata", "example_superpathway_fit_model.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_fit_model
#' permut_asmbplsda(data, npermut = 5, Nc = 1,
#' CV_error = 1)
permut_asmbplsda <- function(object, npermut = 100, nbObsPermut = NULL,
                             Nc = 1, CV_error, measure = "B_accuracy",
                             Method = NULL, maxiter = 100) {
    Y.matrix <- object$model_fit$`asmbPLS-DA`$Y_group
    X.matrix <- object$model_fit$predictor_block
    nr <- nrow(Y.matrix)
    q <- ncol(Y.matrix)
    res <- initialize_results(npermut, q)
    for (j in seq_len(npermut + 1)) {
        Ypermut <- permute_Y_matrix(Y.matrix, nr, nbObsPermut, j)
        res <- compute_permutation_stats(res, Y.matrix, Ypermut, j, q, nr)
        s <- select_samples(object, nr, Nc)
        X_train <- X.matrix[-s, , drop = FALSE]
        X_val <- X.matrix[s, , drop = FALSE]
        Y_train <- Ypermut[-s, , drop = FALSE]
        Y_val <- Ypermut[s, , drop = FALSE]
        Modelpermut <- fit_permuted_model(object, X_train, Y_train, maxiter)
        res <- evaluate_performance(res, Modelpermut, X_train, X_val,
                                    Y.matrix, s, measure, j, nr, Method, object)
    }
    null_errors <- as.vector(
        res$prct.Ychange.values[ , ncol(res$prct.Ychange.values)])
    res$pvalue <- compute_pvalue(null_errors, CV_error)
    res$IC <- compute_IC95(null_errors)
    return(res)
}

#' @title Permutation test for asmbPLS-DA global significance (LOO or KCV)
#' @description
#' If `splits=NULL`, runs LOOCV‐based permutation. Otherwise treats `splits` as
#' a list of train/validate splits (e.g. from make_splits_R()) and does a fixed
#' splits K‐fold×repeats permutation test.
#' @param object A superpathway fit model list
#' @param npermut Number of permutations (default: 100)
#' @param nbObsPermut Number of samples to permute per iteration
#' (default: NULL).
#' @param Nc Number of samples dropped per permutation (default: 1 if LOOCV).
#' @param splits  Optional list of splits; if NULL uses LOOCV branch
#' @param CV_error Error obtained from optimal model CV process
#' @param measure Accuracy measure (`"F1"`, `"accuracy"`, `"B_accuracy"`,
#' `"precision"`, `"recall"`, default: `"B_accuracy"`).
#' @param Method Decision rule for prediction (default: NULL).
#' @param maxiter Maximum iterations (default: 100).
#' @param ...     Other args passed to LOOCV or to evaluate_performance
#' @return A list with null distribution, p-value, and (for KCV) splits
#' @export
permut_asmbplsda_kcv <- function(object, npermut = 100, splits  = NULL,
                                    measure = "B_accuracy", nbObsPermut = NULL,
                                    Nc = 1, Method  = NULL, maxiter = 100, 
                                    CV_error = NULL,...) {
    X_blocks <- object$model_fit$predictor_block
    Y_group  <- object$model_fit$`asmbPLS-DA`$Y_group
    if (is.null(splits)) {
        return(permut_asmbplsda(object, npermut = npermut,
                                nbObsPermut = nbObsPermut,
                                Nc = Nc, CV_error, measure = measure,
                                Method = Method, maxiter = maxiter, ...))}
    null_scores   <- numeric(npermut)
    for (i in seq_len(npermut)) {
        Yp <- Y_group[sample(nrow(Y_group)), , drop = FALSE]
        folds <- make_splits_R(Yp, k = object$hyperparameters_fit$folds_CV, 
                               ncv = object$hyperparameters_fit$repetition_CV)
        total_splits <- length(folds)
        fold_scores <- numeric(total_splits)
        for (j in seq_along(splits)) {
            tr <- splits[[j]]$train
            va <- splits[[j]]$validate
            X_tr <- X_blocks[tr, , drop = FALSE]
            Y_tr <- Yp[  tr, , drop = FALSE]
            X_va <- X_blocks[va, , drop = FALSE]
            Y_va <- Yp[  va, , drop = FALSE]
            if(ncol(Y_tr) == 1 && length(unique(as.numeric(Y_tr[, 1]))) < 2){
                fold_scores[j] <- NA_real_
                next
            }
            fit_p  <- asmbPLS::asmbPLSDA.fit(X.matrix = X_tr,Y.matrix = Y_tr,
                    PLS.comp = object$hyperparameters_fit$number_PLS,
                    X.dim = lengths(object$model_fit$observed_gene_sets),
                    quantile.comb=
                        object$hyperparameters_fit$quantile_comb_table,
                    outcome.type = object$hyperparameters_fit$outcome_type,
                    center = TRUE, scale = TRUE, maxiter = maxiter)
            pred <- asmbPLS::asmbPLSDA.predict(
                fit_p, X_va, PLS.comp = object$hyperparameters_fit$number_PLS,
                method = Method)$Y_pred
            res  <- Results_comparison_measure(
                as.numeric(pred), as.numeric(Y_va[,1]),
                object$hyperparameters_fit$outcome_type)
            fold_scores[j] <- res[get_measure_index(measure)]
        }
        null_scores[i] <- mean(fold_scores, na.rm = TRUE)
    }
    pvalue <- compute_pvalue(null_scores, CV_error)
    IC <- compute_IC95(null_scores)
    list(null_distribution = null_scores, pvalue = pvalue, IC = IC, 
         observed = CV_error, splits = splits)
}


#' @title Cell and Gene Importance Projections statistical significance
#'
#' @description
#' Computes Cell and Gene Importance Projection observed distribution from
#' fitted asmbPLSDA, and its associated null distributions by permuting the
#' block of predictor matrices. Returns a pvalue of the Mann-Whitney Wilcoxon
#' between the observed and null distribution for each CIP and GIP.
#'
#' @param object A superpathway fit model list
#' @param npermut Number of permutations on response block matrices
#' @param maxiter An integer indicating the maximum number of iterations.
#' If `NULL` the default is 100.
#' @param type Either `jackknife` or `subsampling`. If `jackknife` then the CIP
#' and GIP observed distribution is generated by a jackknife procedure. If
#' `subsampling` the CIP and GIP observed distribution is generated by
#' subsampling the number of samples without replacement, each subsample is
#' guaranteed to contain at least 2 samples per class. If a LOOCV was performed
#' or one has small sample size it is recommended to select `jackknife`,
#' otherwise select `subsampling`.
#' @param nsubsampling Number of subsamples to generate CIP and GIP observed
#' distributions. By default 100.
#' @param ... Other parameters to be passed onto \link{wilcox_CIP_GIP}
#' @import asmbPLS checkmate
#' @returns
#' A list containing; observed distributions of CIP and GIP (variability_param);
#' its associated null distributions generated by permutations (NULL_CIP_GIP);
#' the unadjusted pvalue of Mann-Whitney Wilcoxon for CIP distribution
#' (CIP_pvalue); and for GIP distribution (GIP_pvalue).
#' @export
#' @examples
#' file <- system.file("extdata", "example_superpathway_fit_model.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_fit_model
#' CIP_GIP_test(data, npermut = 3, type = "jackknife")
CIP_GIP_test <- function(object, npermut = 100, maxiter = 100,
                        type = c("jackknife", "subsampling"),
                        nsubsampling = 100, ...) {
    checkmate::assert_choice(type, choices = c("jackknife", "subsampling"))
    # Extract data from the object
    K <- nrow(object$model_fit$`asmbPLS-DA`$Y_group)
    M <- length(unique(object$superpathway_input$sample_class))
    X.matrix <- object$model_fit$predictor_block
    Y.matrix <- object$model_fit$response_matrix
    X.dim <- lengths(object$model_fit$observed_gene_sets)
    # Initialize result containers
    CIP_GIP_variability <- NULL_VAR_INF <- list()
    # Calculate CIP/GIP distributions based on resampling type
    if (type == "jackknife") {
        CIP_GIP_variability <- jackknife_CIP_GIP(object, X.matrix, Y.matrix, K,
                                                    maxiter, X.dim)
    } else {
        CIP_GIP_variability <- subsampling_CIP_GIP(object, X.matrix, Y.matrix,
                                                    K, M, nsubsampling, maxiter,
                                                    X.dim)
    }
    # Generate null distributions by permutation
    NULL_VAR_INF <- generate_null_distributions(object, X.matrix, Y.matrix,
                                                npermut, K, X.dim, maxiter)
    # Compute p-values for CIP and GIP distributions
    GIP_pvalue <- calculate_pvalues(CIP_GIP_variability$GIP, NULL_VAR_INF$GIP,
                                    wilcox_CIP_GIP, ...)
    CIP_pvalue <- lapply(seq_along(CIP_GIP_variability$GIP), function(i, ...){
        variability_cell_CIP <- CIP_GIP_variability$CIP[i,]
        null_cell_CIP <- NULL_VAR_INF$CIP[i,]
        tests <- wilcox_CIP_GIP(variability_cell_CIP, null_cell_CIP, ...)
        output <- tests
        return(output)
    })
    # Return results
    return(list("variability_param" = CIP_GIP_variability,
                "NULL_CIP_GIP" = NULL_VAR_INF, "CIP_pvalue" = CIP_pvalue,
                "GIP_pvalue" = GIP_pvalue))
}

#' @title Impute split via MFA
#'
#' @description
#' Given a data matrix and a train/validate split, performs MFA-based imputation
#' on the training set if needed, then applies the learned parameters to the
#' validation set. Returns the full matrix of imputed rows in original order.
#'
#' @param X.matrix Numeric matrix of features (rows = samples, cols = variables)
#' @param split A list with elements 'train' and 'validate' containing row
#' indices
#' @param X.dim Vector of block sizes for MFA
#' @param ncp Integer: number of MFA components (default=2)
#' @param center Logical: center data (default=TRUE)
#' @param scale Logical: scale data (default=TRUE)
#' @param maxiter Integer: max iterations for MFA fitting (default=100)
#' @return A matrix with the same dimensions and row order as the rows in
#' 'split', imputed.
#' @noRd
impute_split_mfa <- function(
        X.matrix, split, X.dim,
        ncp = 2
) {
    # Extract training and validation
    E_tr <- X.matrix[split$train, , drop = FALSE]
    E_va <- X.matrix[split$validate, , drop = FALSE]
    train_miss <- any(is.na(E_tr))
    val_miss <- any(is.na(E_va))
    if (train_miss || val_miss) {
        # Clean and fit MFA on training
        clean <- clean_mfa_data(E_tr)
        if (train_miss) {
            X.dim.new <- update_group_sizes(X.dim, clean$keep_cols)
            imp_tr <- fit_mfa_imputer(clean$X_clean, X.dim.new, ncp = ncp)
            E_tr <- restore_removed_columns(
                as.matrix(imp_tr$imputed), E_tr, clean$keep_cols
            )
        }else{
            imp_tr <- FactoMineR::MFA(
                base = E_tr,
                group = X.dim,
                type = rep("s", length(X.dim)),
                ncp = 2,
                graph = FALSE
            )
            imp_tr$mu <- colMeans(E_tr)
            imp_tr$loadings <-
                imp_tr$global.pca$svd$V[,seq_len(ncp),drop = FALSE]
            clean$keep_cols <- rep(TRUE, ncol(E_tr))
        }
        if (val_miss) {
            imp_va <- predict_mfa_imputer(
                E_va[, clean$keep_cols, drop = FALSE],
                mu = imp_tr$mu,
                loadings = imp_tr$loadings
            )
            E_va <- restore_removed_columns(imp_va, E_va, clean$keep_cols)
        }
    }
    # Combine back into full matrix in original row order
    imputed <- X.matrix
    imputed[split$train, ] <- E_tr
    imputed[split$validate, ] <- E_va
    as.matrix(imputed)
}

#' @title Cross validation and fit of asmbPLSDA
#'
#' @description
#' Performs Cross Validation of the provided superpathway input, fits the
#' optimal model and computes its validation metrics. The Cross Validation can
#' either be Leave One-Out Cross Validation (LOOCV) or K-Fold Cross Validation
#' (KCV). A LOOCV is performed if the number of folds was set to 1 or if the
#' number of samples per class is less than 3 for any class. A K-Fold Cross
#' Validation (KCV) is performed if the number of folds is greater or equal
#' than 3 and the number of samples per class is always greater than the number
#' of folds. If the number of samples is low for some of the classes LOOCV is
#' recommended. If KCV is performed, missing values are automatically imputed
#' in the K-CV process. The training set is imputed via missMDA::imputeMFA(),
#' and an FactoMineR::MFA() is trained on the imputed training set from which
#' we extract the mean of each gene and the estimated loadings. We then estimate
#' the validation set by projecting the samples onto MFA space of the training
#' set. Gene whose variance is 0 are excluded from the imputation, if a gene
#' has null variance and full of 0 values, the NA were imputed to 0.
#'
#' @param object A superpathway input list to fit optimal asmbPLSDA.
#' @param parallel A boolean indicating whether to parallelize (`TRUE`)
#' for LOOCV on quantile combination or not (`FALSE`). Note this option is only
#' available for LOOCV and not KCV. Default is `FALSE`.
#' @param measure Accuracy measure to be used to select optimal asmbPLSDA model.
#' Default is F1 measure. Options are: F1, accuracy, B_accuracy, precision
#' and recall.
#' @param Method Decision rule used for prediction. For binary outcome
#' `fixed_cutoff` (default), `Euclidean_distance_X`, and
#' `Mahalanobis_distance_X`. For categorical otcome with more than 2 levels,
#' the methods include `Max_Y` (default), `Euclidean_distance_X`,
#' `Mahalanobis_distance_X`, `Euclidean_distance_Y`, and
#' `PCA_Mahalanobis_distance_Y`. If `NULL` the default method is used for the
#' respective outcome binary.
#' @param expected_measure_increase A double indicating the measure you expect
#' to decrease by percent after including one more PLS component, this will
#' affect the selection of optimal number of PLS components. If `NULL` the
#' default is 0.005 (0.5%).
#' @param maxiter An integer indicating the maximum number of iterations.
#' If `NULL` the default is 100.
#' @param global_significance_full A boolean indicating whether to return a list
#' with information of each permutation for the global
#' significance test of asmbPLSDA. By default `FALSE`. Note that if the number
#' of permutations that is set is large, storing this information can
#' be a burden on memory.
#' @param CIP.GIP_significance_full A boolean indicating whether to return a
#' list with the observed and null distributions of CIP and GIP or only the
#' pvalue and adjusted pvalue. By default `FALSE`. Note that if the number of
#' permutations that is set is large, storing this information can be a burden
#' on memory.
#' @param npermut Number of permutations for the tests. By default 100.
#' Parameter passed onto \link{permut_asmbplsda} and \link{CIP_GIP_test}.
#' @param nbObsPermut An integer indicating the number of samples to permute
#' in each permutation. By default `NULL`. If `NULL` the number of samples to
#' permute at each permutation is randomly chosen (for each permutation).
#' Parameter passed onto \link{permut_asmbplsda}.
#' @param type Either `jackknife` or `subsampling`. If `jackknife` then the CIP
#' and GIP observed distribution is
#' generated by a jackknife procedure. If `subsampling` the CIP and GIP observed
#' distribution is generated by subsampling the number of samples without
#' replacement, each subsample is guaranteed to contain at least 2 samples per
#' class. If a LOOCV was performed or one has small sample size it is
#' recommended to select `jackknife`, otherwise select `subsampling`.
#' Passed onto \link{CIP_GIP_test}.
#' @param nsubsampling Number of subsamples to generate CIP and GIP observed
#' distributions. By default 100. Passed onto \link{CIP_GIP_test}.
#' @param ... Other parameters to be passed onto \link{wilcox_CIP_GIP}, wilcox
#' test of GIP statistical tests
#' @import asmbPLS checkmate
#' @rdname fitOptimal
#' @export
#' 
#' @returns
#' A superpathway fit model list object with; a
#' superpathway input list object used for CV and model fit;
#' a hyperparameters list object with the hyperparameters used to fit
#' the optimal model (includes optimal quantiles and PLS components from the
#' CV step); a list with the fitted model information including: predictor
#' and response matrices, observed gene sets, from \code{matrixToBlock},
#' and asmbPLSDA output; a list with the validaton metrics of fitted model.
#' @examples
#' # fitOptimal with jackknife for CIP/GIP statistics and 10 permutations
#' # for the global significance test of the optimal model
#' file <- system.file("extdata", "example_superpathway_input.rda",
#' package = "singIST")
#' load(file)
#' data <- example_superpathway_input
#' fitOptimal(data, npermut = 10, type = "jackknife")
#' # fitOptimal with subsampling for CIP/GIP statistics with
#' # 10 subsamples and 50 permutations for the global significance test of the
#' # optimal model
#' fitOptimal(data, npermut = 50, type = "subsampling",
#' nsubsampling = 10)
fitOptimal <- function(
        object, parallel = FALSE, measure = "B_accuracy", Method = NULL,
        expected_measure_increase = 0.005, maxiter = 100,
        global_significance_full = FALSE, CIP.GIP_significance_full = FALSE,
        npermut = 100, nbObsPermut = NULL, type = "jackknife",
        nsubsampling = 100, ...) {
    output <- create_fit_model(object,object$hyperparameters_info,list(),list())
    measure_selected <- get_measure_index(measure)
    if (min(as.vector(base::table(object$sample_class))) <= 1) {
        stop("At least one class has 1 or fewer samples.")
    }
    model_block_matrices <- matrixToBlock(object)
    nFC <- ifelse(is.null(object$hyperparameters_info$folds_CV), 5,
                    object$hyperparameters_info$folds_CV)
    if(!(min(as.vector(base::table(object$sample_class))) >= nFC)){
        if(!(min(as.vector(base::table(object$sample_class)))>= as.integer(3))){
            message("Cannot run KCV with initial Folds, running LOOCV instead")
            nFC <- as.integer(1)
        }else{
            message("Cannot run KCV with initial Folds, run KCV with 3-Folds")
            nFC <- as.integer(3)
        }
    }
    optimal_hyperparameters <- perform_cv(object, model_block_matrices, nFC,
        measure, parallel, expected_measure_increase, maxiter, Method)
    output$hyperparameters_fit$number_PLS <- as.integer(
        optimal_hyperparameters$optimal_nPLS)
    output$hyperparameters_fit$quantile_comb_table <-
        optimal_hyperparameters$quantile_table_CV
    output$hyperparameters_fit$folds_CV <- as.integer(nFC)
    model_block_matrices$block_predictor <-
        impute_X(model_block_matrices$block_predictor)
    optimal_fit <- asmbPLS::asmbPLSDA.fit(
        X.matrix = model_block_matrices$block_predictor,
        Y.matrix = model_block_matrices$matrix_response,
        PLS.comp = output$hyperparameters_fit$number_PLS,
        X.dim = model_block_matrices$block_dim, center = TRUE, scale = TRUE,
        quantile.comb = output$hyperparameters_fit$quantile_comb_table,
        outcome.type = output$hyperparameters_fit$outcome_type)
    output$model_fit <- list(
        "predictor_block" = model_block_matrices$block_predictor,
        "response_matrix" = model_block_matrices$matrix_response,
        "observed_gene_sets" = model_block_matrices$observed_gene_sets,
        "asmbPLS-DA" = optimal_fit)
    output <- compute_validation_metrics(output, optimal_hyperparameters,
            model_block_matrices, npermut, nbObsPermut, maxiter,
            global_significance_full, CIP.GIP_significance_full, type,
            nsubsampling, measure, Method)
    return(output)
}
