cache <- new.env()
cache$rna <- list()
cache$adt <- list()
cache$crispr <- list()

#' Get datasets for testing
#'
#' Get single-cell datasets from the \pkg{scRNAseq} package with varying levels of processing.
#' This is primarily intended for testing other \pkg{scrapper} functions, e.g., in their Examples section.
#'
#' @param at String specifying the level of processing.
#' For \code{"start"}, no processing was performed.
#' Otherwise, the dataset is returned after quality control (\code{"qc"}),
#' normalization (\code{"norm"}),
#' feature selection (\code{"hvg"}),
#' PCA (\code{"PCA"})
#' or graph-based clustering (\code{"cluster"}).
#'
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} containing a dataset at the specified level of processing.
#' 
#' @details
#' For \code{getTestRnaData}, this is a scRNA-seq dataset of the mouse brain,
#' where the main experiment contains RNA counts and the alternative experiments contain ERCC and repeat element counts.
#' This is obtained with \code{\link[scRNAseq]{fetchDataset}("zeisel-brain-2015", "2023-12-14")}.
#'
#' For \code{getTestAdtData}, this is a CITE-seq dataset of human PBMCs,
#' where the main experiment contains RNA counts and the alternative experiment contains ADT counts.
#' This is obtained with \code{\link[scRNAseq]{fetchDataset}("kotliarov-pbmc-2020", "2024-04-18")}.
#' Only the first 5000 cells are loaded for speed.
#'
#' For \code{getTestCrisprData}, this is a Perturb-seq dataset of a pancreatic beta cell line,
#' where the main experiment contains RNA counts and the alternative experiment contains CRISPR guide counts.
#' This is obtained with \code{\link[scRNAseq]{fetchDataset}("cao-pancreas-2025", "2025-10-10", "rqc")}.
#' Only the first 5000 cells are loaded for speed.
#'
#' @author Aaron Lun
#' @examples
#' getTestRnaData.se()
#' getTestAdtData.se()
#' getTestCrisprData.se()
#'
#' @seealso
#' \code{\link[scRNAseq]{fetchDataset}}, used to obtain each dataset.
#' 
#' @export
#' @name getTestData.se
getTestRnaData.se <- function(at = c("start", "qc", "norm", "hvg", "pca", "cluster")) {
    at <- match.arg(at)

    if (!("start" %in% names(cache$rna))) {
        cache$rna$start <- scRNAseq::fetchDataset("zeisel-brain-2015", "2023-12-14", realize.assays=TRUE)
    }
    sce <- cache$rna$start
    if (at == "start") {
        return(sce)
    }

    if (!("qc" %in% names(cache$rna))) {
        sce <- quickRnaQc.se(sce, subsets=list(mito=startsWith(rownames(sce), "mt-")), altexp.proportions="ERCC")
        sce <- sce[,sce$keep]
        cache$rna$qc <- sce
    }
    sce <- cache$rna$qc
    if (at == "qc") {
        return(sce)
    }

    if (!("norm" %in% names(cache$rna))) {
        sce <- normalizeRnaCounts.se(sce)
        cache$rna$norm <- sce
    }
    sce <- cache$rna$norm
    if (at == "norm") {
        return(sce)
    }

    if (!("hvg" %in% names(cache$rna))) {
        sce <- chooseRnaHvgs.se(sce, more.var.args=list(use.min.width=TRUE))
        cache$rna$hvg <- sce
    }
    sce <- cache$rna$hvg
    if (at == "hvg") {
        return(sce)
    }

    if (!("pca" %in% names(cache$rna))) {
        sce <- runPca.se(sce, features=SummarizedExperiment::rowData(sce)$hvg)
        cache$rna$pca <- sce
    }
    sce <- cache$rna$pca
    if (at == "pca") {
        return(sce)
    }

    if (!("cluster" %in% names(cache$rna))) {
        sce <- clusterGraph.se(sce)
        cache$rna$cluster <- sce
    }
    sce <- cache$rna$cluster
    if (at == "cluster") {
        return(sce)
    }
}

#' @export
#' @rdname getTestData.se
#' @importFrom methods as
#' @importClassesFrom Matrix dgCMatrix
getTestAdtData.se <- function(at = c("start", "qc", "norm", "hvg", "pca")) {
    at <- match.arg(at)

    if (!("start" %in% names(cache$adt))) {
        raw.sce <- scRNAseq::fetchDataset("kotliarov-pbmc-2020", "2024-04-18")
        raw.sce <- raw.sce[,1:5000] # Cutting it down a bit for speed.
        SummarizedExperiment::assay(raw.sce) <- as(SummarizedExperiment::assay(raw.sce), "dgCMatrix")

        raw.ae <- SingleCellExperiment::altExp(raw.sce, "ADT")
        SummarizedExperiment::assay(raw.ae) <- as(SummarizedExperiment::assay(raw.ae), "dgCMatrix")
        SingleCellExperiment::altExp(raw.sce, "ADT") <- raw.ae

        cache$adt$start <- raw.sce
    }
    sce <- cache$adt$start
    if (at == "start") {
        return(sce)
    }

    if (!("qc" %in% names(cache$adt))) {
        sce <- quickRnaQc.se(sce, subsets=list(mito=startsWith(rownames(sce), "MT-")))
        alt.se <- SingleCellExperiment::altExp(sce, "ADT")
        alt.se <- quickAdtQc.se(alt.se, subsets=list(igg=SummarizedExperiment::rowData(alt.se, "ADT")$isotype))
        SingleCellExperiment::altExp(sce, "ADT") <- alt.se
        sce <- sce[,sce$keep & alt.se$keep]
        cache$adt$qc <- sce
    }
    sce <- cache$adt$qc
    if (at == "qc") {
        return(sce)
    }

    if (!("norm" %in% names(cache$adt))) {
        sce <- normalizeRnaCounts.se(sce)
        SingleCellExperiment::altExp(sce, "ADT") <- normalizeAdtCounts.se(SingleCellExperiment::altExp(sce, "ADT"))
        cache$adt$norm <- sce
    }
    sce <- cache$adt$norm
    if (at == "norm") {
        return(sce)
    }

    if (!("hvg" %in% names(cache$adt))) {
        sce <- chooseRnaHvgs.se(sce)
        cache$adt$hvg <- sce
    }
    sce <- cache$adt$hvg
    if (at == "hvg") {
        return(sce)
    }

    if (!("pca" %in% names(cache$adt))) {
        sce <- runPca.se(sce, features=SummarizedExperiment::rowData(sce)$hvg)
        SingleCellExperiment::altExp(sce, "ADT") <- runPca.se(SingleCellExperiment::altExp(sce, "ADT"), features=NULL)
        cache$adt$pca <- sce
    }
    sce <- cache$adt$pca
    if (at == "pca") {
        return(sce)
    }
}

#' @export
#' @rdname getTestData.se
#' @importFrom methods as
#' @importClassesFrom Matrix dgCMatrix
getTestCrisprData.se <- function(at = c("start", "qc")) {
    at <- match.arg(at)

    if (!("start" %in% names(cache$crispr))) {
        raw.sce <- scRNAseq::fetchDataset("cao-pancreas-2025", "2025-10-10", "rqc")
        raw.sce <- raw.sce[,1:5000] # Cutting it down a bit for speed.
        SummarizedExperiment::assay(raw.sce) <- as(SummarizedExperiment::assay(raw.sce), "dgCMatrix")

        raw.ae <- SingleCellExperiment::altExp(raw.sce, "CRISPR Guide Capture")
        SummarizedExperiment::assay(raw.ae) <- as(SummarizedExperiment::assay(raw.ae), "dgCMatrix")
        SingleCellExperiment::altExp(raw.sce, "CRISPR Guide Capture") <- raw.ae

        cache$crispr$start <- raw.sce
    }
    sce <- cache$crispr$start
    if (at == "start") {
        return(sce)
    }

    if (!("qc" %in% names(cache$crispr))) {
        sce <- quickRnaQc.se(sce, subsets=list(mito=startsWith(rownames(sce), "MT-")))
        SingleCellExperiment::altExp(sce, "CRISPR Guide Capture") <- quickCrisprQc.se(SingleCellExperiment::altExp(sce, "CRISPR Guide Capture"))
        sce <- sce[,sce$keep & SingleCellExperiment::altExp(sce, "CRISPR Guide Capture")$keep]
        cache$crispr$qc <- sce
    }
    sce <- cache$crispr$qc
    if (at == "qc") {
        return(sce)
    }
}
