#' @include helpers.R
#' @include hidden_aliases.R
#' @include ChromBackend.R
NULL

#' @title Chromatographic Data Backend for Spectra Objects
#'
#' @name ChromBackendSpectra
#'
#' @description
#' The `ChromBackendSpectra` class extends `ChromBackendMemory`, inheriting
#' all its slots and methods while providing additional functionality for
#' summarizing chromatographic data from [Spectra::Spectra()] objects.
#'
#' It can be initialized with a `Spectra` object, which is stored in the
#' `spectra` slot of the backend. Users can also provide a `data.frame`
#' containing chromatographic metadata, stored in `@chromData`. This metadata
#' filters the `Spectra` object and generates `peaksData`. If `chromData` is
#' not provided, a default `data.frame` is created from the `Spectra` data.
#' An "rtMin", "rtMax", "mzMin", and "mzMax" column will be created by
#' condensing the `Spectra` data corresponding to each unique combination of
#' the `factorize.by` variables.
#'
#' By "factorization" we mean the process of grouping spectra
#' into chromatograms based on specified variables. For example, using
#' `factorize.by = c("msLevel", "dataOrigin")` means that all MS1 spectra from
#' file "A" form one chromatogram, all MS2 spectra from file "A" form another,
#' and so on. Each unique combination of the factorization variables creates
#' a separate chromatogram. This is essential for organizing spectral data into
#' meaningful chromatographic traces that can be visualized and analyzed.
#'
#' The *dataOrigin* core chromatogram variable should reflect the *dataOrigin*
#' of the `Spectra` object. The `factorize.by` parameter defines the variables
#' for grouping `Spectra` data into chromatographic data. The default is
#' `c("msLevel", "dataOrigin")`, which will define separate chromatograms for
#' each combination of `msLevel` and `dataOrigin`. These variables must be in
#' both the `spectraData()` of the `Spectra` and `chromData` (if provided).
#'
#' The `summarize.method` parameter defines how spectral data intensity is
#' summarized:
#' - **"sum"**: Sums intensity to create a Total Ion Chromatogram (TIC).
#' - **"max"**: Takes max intensity for a Base Peak Chromatogram (BPC).
#'
#' If `chromData` or its factorization columns are modified, the `factorize()`
#' method must be called to update `chromSpectraIndex`.
#'
#' @details
#' No `peaksData` is stored until the user calls a function that generates it
#' (e.g., `rtime()`, `peaksData()`, `intensity()`). The `@peaksData` slot
#' replacement is unsupported — modifications are temporary to optimize memory.
#' The `@inMemory` slot indicates this with `TRUE`.
#'
#' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a
#' `spectraSortIndex` slot that stores a sort order for the internal `Spectra`
#' object based on `dataOrigin` and `rtime`. To optimize performance, the sort
#' index is only computed and stored when the spectra are unsorted; if already
#' sorted (which is typical for most real-world data), `spectraSortIndex` remains
#' empty (`integer()`). This avoids unnecessary subsetting operations. The sort
#' index is automatically recalculated whenever the `factorize()` method is called,
#' ensuring it remains valid and consistent. This approach avoids the need to
#' physically reorder disk-backed `Spectra` objects, which would require loading
#' all data into memory.
#'
#' **Factorize and Subsetting**: The `factorize()` method updates the
#' `chromSpectraIndex` in both `chromData` and the `@spectra` to reflect
#' the current grouping, and recalculates `spectraSortIndex` to maintain the
#' correct sort order. The `[` subsetting operator properly handles subsetting
#' of both `@chromData`, `@peaksData`, and `@spectra`, while updating the
#' `spectraSortIndex` to reference valid positions in the subsetted data.
#'
#' `ChromBackendSpectra` should reuse `ChromBackendMemory` methods whenever
#' possible to keep implementations simple.
#'
#' @param chromData A `data.frame` with chromatographic data for use in
#'        `backendInitialize()`. If missing, a default is generated. Columns
#'        like `rtMin`, `rtMax`, `mzMin`, and `mzMax` must be provided and not
#'        contain `NA` values. Use `-Inf/Inf` for unspecified values. The
#'        `"dataOrigin"` column must match the `Spectra` object's
#'        `"dataOrigin"`.
#'
#' @param factorize.by A `character` vector of `spectraVariables` for grouping
#'        `Spectra` data into chromatographic data (i.e., creating separate
#'        chromatograms for each unique combination of these variables).
#'        Default: `c("msLevel", "dataOrigin")`, which creates one chromatogram
#'        per MS level per data file.
#'        If `chromData` is provided, it **must** also contain these columns.
#'
#' @param object A `ChromBackendSpectra` object.
#'
#' @param spectra A `Spectra` object.
#'
#' @param spectraVariables A `character` vector specifying which variables
#'        from the `Spectra` object should be added to the chromData. These
#'        will be mapped using the `chromSpectraIndex` variable.
#'
#' @param summarize.method A `character` string specifying intensity summary:
#'        `"sum"` (default) or `"max"`.
#'
#' @param ... Additional parameters.
#'
#' @author Philippine Louail, Johannes Rainer.
#'
#' @exportClass ChromBackendSpectra
#'
#' @return Refer to the individual function description for information on the
#'         return value.
#'
#' @importClassesFrom Spectra Spectra
#' @importFrom Spectra Spectra spectraVariables spectraData concatenateSpectra
#'
#' @examples
#' library(Spectra)
#' library(MsBackendMetaboLights)
#'
#' ## Get Spectra data from MetaboLights
#' be <- backendInitialize(MsBackendMetaboLights(),
#'     mtblsId = "MTBLS39",
#'     filePattern = c("63B.cdf")
#' )
#' s <- Spectra(be)
#'
#' s <- setBackend(s, MsBackendMemory())
#'
#' ## Initialize ChromBackendSpectra
#' be_empty <- new("ChromBackendSpectra")
#' be <- backendInitialize(be_empty, s)
#'
#' ## replace the msLevel data
#' msLevel(be) <- c(1L, 2L, 3L)
#'
#' ## re-factorize the data
#' be <- factorize(be)
#'
#' ## Create BPC : we summarize the intensity present in the Spectra object
#' ## by the maximum value, thus creating a Base Peak Chromatogram.
#' be <- backendInitialize(be_empty, s, summarize.method = "max")
#'
#' ## Can now see the details of this bpc by looking at the chromData of our
#' ## object
#' chromData(be)
#'
#' ## Another possibilities is to create eics from the Spectra object.
#' ## Here we create an EIC with a specific m/z and retention time window.
#' df <- data.frame(mzMin = 100.01, mzMax = 100.02 , rtMin = 50, rtMax = 100)
#' be <- backendInitialize(be_empty, s, summarize.method = "sum")
#' chromData(be) <- cbind(chromData(be), df)
#'
#' ## now when we call the peaksData function, we will get the intensity
#' ## of the spectra object that are in the m/z and retention time window
#' ## defined in the chromData.
#' peaksData(be)
#'
NULL

#' @noRd
ChromBackendSpectra <- setClass(
  "ChromBackendSpectra",
  contains = "ChromBackendMemory",
  slots = c(
    inMemory = "logical",
    spectra = "Spectra",
    summaryFun = "function",
    spectraSortIndex = "integer"
  ),
  prototype = prototype(
    chromData = fillCoreChromVariables(data.frame()),
    peaksData = list(.EMPTY_PEAKS_DATA),
    readonly = TRUE,
    spectra = Spectra(),
    version = "0.1",
    inMemory = FALSE,
    summaryFun = sumi,
    spectraSortIndex = integer()
  )
)

#' @rdname ChromBackendSpectra
#' @importFrom methods new
#' @export ChromBackendSpectra
ChromBackendSpectra <- function() {
  new("ChromBackendSpectra")
}

#' @rdname ChromBackendSpectra
#' @importFrom methods callNextMethod
#' @importFrom MsCoreUtils rbindFill sumi maxi
setMethod(
  "backendInitialize",
  "ChromBackendSpectra",
  function(
    object,
    spectra = Spectra(),
    factorize.by = c("msLevel", "dataOrigin"),
    summarize.method = c("sum", "max"),
    chromData = fillCoreChromVariables(),
    spectraVariables = character(),
    ...
  ) {
    summarize.method <- match.arg(summarize.method)
    object@summaryFun <- if (summarize.method == "sum") sumi else maxi
    if (!is(spectra, "Spectra")) {
      stop("'spectra' must be a 'Spectra' object.")
    }
    if (!length(spectra)) {
      return(object)
    }
    if (!all(factorize.by %in% spectraVariables(spectra))) {
      stop("All 'factorize.by' variables must exist in 'spectra'.")
    }
    if (!is.data.frame(chromData)) {
      stop("'chromData' must be a 'data.frame'.")
    }
    if (!nrow(chromData)) {
      chromData <- fillCoreChromVariables(data.frame())
    } else {
      validChromData(chromData)
    }
    if (!all(factorize.by %in% colnames(chromData))) {
      stop(
        "All 'factorize.by' variables must exist ",
        "in 'chromData'. If no chromData was provided, ",
        "it needs to be part of the `coreChromVariables()` ",
        "available."
      )
    }
    ## Spectra object are not expected to be ordered by rtime,
    ## so we store a sort index instead of concatenating.
    ## This allows us to keep disk-backed backends intact.
    ## Only store sort index if data is actually unsorted (optimization).
    sort_idx <- order(
      spectra$dataOrigin,
      spectra$rtime
    )
    if (!identical(sort_idx, seq_along(spectra))) {
      object@spectraSortIndex <- sort_idx
    }
    object@chromData <- chromData
    object@spectra <- spectra
    object <- factorize(object, factorize.by = factorize.by)
    ## map additional spectraVariables if any
    if (length(spectraVariables)) {
      object <- .map_spectra_vars(object, spectraVariables = spectraVariables)
    }
    callNextMethod(object, chromData = .chromData(object))
  }
)

#' @rdname hidden_aliases
#' @importFrom methods callNextMethod
setMethod("show", "ChromBackendSpectra", function(object) {
  callNextMethod()
  cat("\nThe Spectra object contains", length(object@spectra), "spectra\n")
  if (.inMemory(object)) cat("\nPeaks data is cached in memory\n")
})

#' @rdname ChromBackendSpectra
#' @export
chromSpectraIndex <- function(object) {
  if (!is(object, "ChromBackendSpectra")) {
    stop("The object must be a 'ChromBackendSpectra' object.")
  }
  cd <- chromData(object, columns = "chromSpectraIndex", drop = TRUE)
  if (!is.factor(cd)) {
    cd <- factor(cd)
  }
  cd <- droplevels(cd)
  cd
}

#' @rdname hidden_aliases
setMethod(
  "factorize",
  "ChromBackendSpectra",
  function(object, factorize.by = c("msLevel", "dataOrigin"), ...) {
    if (
      !all(
        factorize.by %in%
          spectraVariables(.spectra(object))
      )
    ) {
      stop("All 'factorize.by' variables must be in the ", "Spectra object.")
    }
    spectra_f <- interaction(
      as.list(
        spectraData(.spectra(object))[,
          factorize.by,
          drop = FALSE
        ]
      ),
      drop = TRUE,
      sep = "_"
    )
    cd <- .chromData(object)

    if (nrow(cd)) {
      ## chromData exists: validate and align spectra to it
      if (!all(factorize.by %in% chromVariables(object))) {
        stop("All 'factorize.by' variables must be in chromData.")
      }
      cd$chromSpectraIndex <- interaction(
        cd[, factorize.by, drop = FALSE],
        drop = TRUE,
        sep = "_"
      )
      object@spectra$chromSpectraIndex <- factor(
        as.character(spectra_f),
        levels = levels(cd$chromSpectraIndex)
      )
      ## Apply sort index for processing if needed
      if (length(object@spectraSortIndex)) {
        sorted_spectra <- .spectra(object)[object@spectraSortIndex]
        sorted_spectra_f <- spectra_f[object@spectraSortIndex]
      } else {
        sorted_spectra <- .spectra(object)
        sorted_spectra_f <- spectra_f
      }
      object@chromData <- .ensure_rt_mz_columns(
        cd,
        sorted_spectra,
        sorted_spectra_f
      )
    } else {
      ## chromData is empty: create it from spectra
      object@spectra$chromSpectraIndex <- spectra_f
      full_sp <- do.call(
        rbindFill,
        lapply(split(.spectra(object), spectra_f), .spectra_format_chromData)
      )
      rownames(full_sp) <- NULL
      object@chromData <- full_sp
    }
    ## Recalculate sort index: only store if data is unsorted (optimization)
    sort_idx <- order(
      object@spectra$dataOrigin,
      object@spectra$rtime
    )
    if (!identical(sort_idx, seq_along(object@spectra))) {
      object@spectraSortIndex <- sort_idx
    } else {
      object@spectraSortIndex <- integer()
    }
    object
  }
)

#' @rdname hidden_aliases
#' @importMethodsFrom ProtGenerics backendParallelFactor
setMethod(
  "backendParallelFactor",
  "ChromBackendSpectra",
  function(object, ...) {
    factor()
  }
)

#' @rdname hidden_aliases
#' @export
setMethod("isReadOnly", "ChromBackendSpectra", function(object) TRUE)

#' @rdname hidden_aliases
setMethod(
  "peaksData",
  "ChromBackendSpectra",
  function(object, columns = peaksVariables(object), drop = FALSE, ...) {
    if (.inMemory(object) || !length(object)) {
      return(callNextMethod())
    }
    ## Ensure chromSpectraIndex only contains relevant levels needed
    valid_f <- chromSpectraIndex(object)
    ## Apply the sort index to spectra for processing (only if unsorted)
    if (length(object@spectraSortIndex)) {
      sorted_spectra <- .spectra(object)[object@spectraSortIndex]
    } else {
      sorted_spectra <- .spectra(object)
    }
    current_vals <- as.character(sorted_spectra$chromSpectraIndex)
    if (!setequal(unique(current_vals), levels(valid_f))) {
      sorted_spectra$chromSpectraIndex <- factor(
        current_vals,
        levels = levels(valid_f)
      )
    }
    ## Process peaks data
    pd <- mapply(
      .process_peaks_data,
      cd = split(chromData(object), valid_f),
      s = split(
        sorted_spectra,
        sorted_spectra$chromSpectraIndex
      ),
      MoreArgs = list(
        columns = columns,
        fun = object@summaryFun,
        drop = drop
      ),
      SIMPLIFY = FALSE
    )
    unlist(pd, use.names = FALSE, recursive = FALSE)
  }
)

#' @rdname hidden_aliases
setReplaceMethod("peaksData", "ChromBackendSpectra", function(object, value) {
  message(
    "The `peaksData` slot will be modified but the changes will not",
    " affect the Spectra object."
  )
  object <- callNextMethod()
  object@inMemory <- TRUE
  object
})


#' @rdname hidden_aliases
#' @export
setMethod(
  "supportsSetBackend",
  "ChromBackendSpectra",
  function(object, ...) FALSE
)

#' @rdname hidden_aliases
#' @importMethodsFrom S4Vectors [ [[
#' @importFrom MsCoreUtils i2index
#' @importFrom stats setNames
#' @export
setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) {
  if (!length(i)) {
    return(ChromBackendSpectra())
  }

  i <- i2index(i, length = length(x))
  kept_indices <- chromSpectraIndex(x)[i]
  x@chromData <- .chromData(x)[i, , drop = FALSE]
  x@peaksData <- .peaksData(x)[i]
  spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices
  x@spectra <- x@spectra[spectra_keep]

  ## Update spectraSortIndex: remap old positions to new positions
  if (length(x@spectraSortIndex)) {
    old_positions_kept <- which(spectra_keep)
    ## Create mapping from old position to new position
    ## e.g., if we kept positions c(2, 5, 7), they become c(1, 2, 3)
    position_mapping <- setNames(
      seq_along(old_positions_kept),
      old_positions_kept
    )
    ## Keep only sort indices that reference kept positions
    kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept
    x@spectraSortIndex <- as.integer(
      position_mapping[as.character(x@spectraSortIndex[kept_sort_positions])]
    )
  }

  x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex)
  x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex)
  x
})

#' @rdname hidden_aliases
setMethod(
  "chromExtract",
  "ChromBackendSpectra",
  function(object, peak.table, by, ...) {
    required_cols <- c("rtMin", "rtMax", "mzMin", "mzMax", by)
    .validate_chromExtract_input(
      object = object,
      peak.table = peak.table,
      by = by,
      required_cols = required_cols
    )

    matched <- .match_chromdata_peaktable(
      object = object,
      peak.table = peak.table,
      by = by
    )
    cd <- .chromData(matched$object)
    chrom_keys <- matched$chrom_keys
    peak_keys <- matched$peak_keys
    cd_split <- split(cd, chrom_keys) ##  UT need to check that
    pk_split <- split(peak.table, peak_keys)

    ## Check overlapping columns
    overl_cols <- .check_overl_columns(
      peak.table = peak.table,
      object = object,
      required_cols = required_cols
    )

    ## Merge peak.table into chromData safely.
    new_cdata <- mapply(
      function(cd_row, pks) {
        ## could switch to bpmapply ?
        d <- suppressWarnings(cbind(cd_row, pks[!overl_cols]))
        d[, names(peak.table)[overl_cols]] <- pks[, overl_cols]
        d
      },
      cd_row = cd_split,
      pks = pk_split,
      SIMPLIFY = FALSE
    )

    new_cdata <- do.call(rbind, new_cdata)
    rownames(new_cdata) <- NULL

    object@chromData <- new_cdata
    object@peaksData <- replicate(
      nrow(new_cdata),
      .EMPTY_PEAKS_DATA,
      simplify = FALSE
    )
    object
  }
)
