## ----style, echo = FALSE, results = 'asis', message=FALSE---------------------
BiocStyle::markdown()

## ----echo = FALSE, message = FALSE--------------------------------------------
library(Chromatograms)
library(BiocStyle)
register(SerialParam())

## -----------------------------------------------------------------------------
# A data.frame with chromatogram variables.
cdata <- data.frame(
    msLevel = c(1L, 1L),
    mz = c(112.2, 123.3),
    chromIndex = c(1L, 2L)
)

# Retention time and intensity values for each chromatogram.
pdata <- list(
    data.frame(
        rtime = c(11, 12.4, 12.8, 13.2, 14.6, 15.1, 16.5),
        intensity = c(50.5, 123.3, 153.6, 2354.3, 243.4, 123.4, 83.2)
    ),
    data.frame(
        rtime = c(45.1, 46.2, 53, 54.2, 55.3, 56.4, 57.5),
        intensity = c(100, 180.1, 300.45, 1400, 1200.3, 300.2, 150.1)
    )
)

# Create and initialize the backend
be <- backendInitialize(ChromBackendMemory(),
    chromData = cdata, peaksData = pdata
)

# Create Chromatograms object
chr <- Chromatograms(be)
chr

## -----------------------------------------------------------------------------
MRM_file <- system.file("proteomics", "MRM-standmix-5.mzML.gz",
    package = "msdata"
)

be <- backendInitialize(ChromBackendMzR(),
    files = MRM_file,
    BPPARAM = SerialParam()
)

chr_mzr <- Chromatograms(be)

## -----------------------------------------------------------------------------
length(chr)
length(chr_mzr)

## -----------------------------------------------------------------------------
peaksData(chr)

## -----------------------------------------------------------------------------
peaksData(chr, columns = c("rtime"), drop = TRUE)

chr$rtime

## -----------------------------------------------------------------------------
peaksData(chr) <- list(
    data.frame(
        rtime = c(1, 2, 3, 4, 5, 6, 7),
        intensity = c(1, 2, 3, 4, 5, 6, 7)
    ),
    data.frame(
        rtime = c(1, 2, 3, 4, 5, 6, 7),
        intensity = c(1, 2, 3, 4, 5, 6, 7)
    )
)

## -----------------------------------------------------------------------------
chr$rtime <- list(
    c(8, 9, 10, 11, 12, 13, 14),
    c(8, 9, 10, 11, 12, 13, 14)
)

## -----------------------------------------------------------------------------
chr_filt <- filterPeaksData(chr, variables = "rtime", ranges = c(12, 15))

length(chr_filt)

length(rtime(chr_filt))

## -----------------------------------------------------------------------------
chromData(chr)

## -----------------------------------------------------------------------------
chromData(chr, columns = c("msLevel"))

chr$chromIndex

## -----------------------------------------------------------------------------
chr$msLevel <- c(2L, 2L)

chromData(chr)

## -----------------------------------------------------------------------------
chr$extra <- c("extra1", "extra2")
chromData(chr)

## -----------------------------------------------------------------------------
chr_filt <- filterChromData(chr,
    variables = "chromIndex", ranges = c(1, 2),
    keep = TRUE
)

length(chr_filt)
length(chr)

## ----eval=FALSE---------------------------------------------------------------
# ## Modify metadata
# chr_s$msLevel <- rep(2L, length(chr_s))
# 
# ## Re-factorize to update the groupings
# chr_s <- factorize(chr_s)
# 
# chromData(chr_s)

## ----define-function----------------------------------------------------------
## Define a function that takes the backend as an input, divides the intensity
## by parameter y and returns it. Note that ... is required in
## the function's definition.
divide_intensities <- function(x, y, ...) {
    intensity(x) <- lapply(intensity(x), `/`, y)
    x
}

## Add the function to the procesing queue
chr_2 <- addProcessing(chr, divide_intensities, y = 2)
chr_2

## ----custom-processing--------------------------------------------------------
intensity(chr_2)
intensity(chr)

## ----applyProcessing----------------------------------------------------------
length(chr_2@processingQueue)
chr_2 <- applyProcessing(chr_2)

length(chr_2@processingQueue)
chr_2

## -----------------------------------------------------------------------------
processingChunkFactor(chr)

## -----------------------------------------------------------------------------
processingChunkFactor(chr_mzr) |>
  head()

## -----------------------------------------------------------------------------
processingChunkSize(chr_mzr) <- 10

processingChunkFactor(chr_mzr) |> table()

## -----------------------------------------------------------------------------
print(object.size(chr_mzr), units = "Mb")
chr_mzr <- setBackend(chr_mzr, ChromBackendMemory(), BPPARAM = SerialParam())

chr_mzr

chr_mzr@backend@peaksData[[1]] |> head() # data is now in memory

## -----------------------------------------------------------------------------
print(object.size(chr_mzr), units = "Mb")

## ----message=FALSE------------------------------------------------------------
library(Spectra)
library(IRanges)
sp <- Spectra(
  DataFrame(
    rtime = c(100, 110, 120, 130, 140),
    msLevel = c(1L, 1L, 1L, 1L, 1L),
    dataOrigin = rep("example", 5L),
    mz = NumericList(
      c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101),
      compress = FALSE
    ),
    intensity = NumericList(
      c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2),
      compress = FALSE
    )
  ),
  source = MsBackendDataFrame()
)

chr_s <- Chromatograms(sp)

## -----------------------------------------------------------------------------
chr_s

## -----------------------------------------------------------------------------
## Create custom metadata for EIC extraction
custom_cd <- data.frame(
    msLevel = c(1L, 1L),
    dataOrigin = rep(dataOrigin(sp)[1], 2),
    mzMin = c(100, 200),
    mzMax = c(100.5, 200.5)
)

chr_custom <- Chromatograms(sp, chromData = custom_cd)
chr_custom

## -----------------------------------------------------------------------------
chromData(chr_s)$rtmin <- 125
chromData(chr_s)$rtmax <- 180
chromData(chr_s)$mzmin <- 100
chromData(chr_s)$mzmax <- 100.5

## -----------------------------------------------------------------------------
library(RColorBrewer)
col3 <- brewer.pal(3, "Dark2")

plotChromatograms(chr_s, col = col3)

## -----------------------------------------------------------------------------
plotChromatogramsOverlay(chr_s, col = col3)

## -----------------------------------------------------------------------------
## Define peaks of interest with retention time windows
peak_table <- data.frame(
    rtMin = c(8, 11),
    rtMax = c(10, 13),
    msLevel = c(2L, 2L),
    chromIndex = c(1L, 2L)
)

## Extract those regions
chr_extracted <- chromExtract(chr, peak_table,
                              by = c("msLevel", "chromIndex"))

chr_extracted

## -----------------------------------------------------------------------------
chromData(chr_extracted)

## -----------------------------------------------------------------------------
## Define peak table with both retention time and m/z windows
peak_table_mz <- data.frame(
    rtMin = c(125, 125),
    rtMax = c(180, 180),
    mzMin = c(100, 140),
    mzMax = c(100.5, 140.5),
    msLevel = c(1L, 1L),
    dataOrigin = rep(dataOrigin(chr_s)[1], 2),
    featureID = c("feature_1", "feature_2")
)

## Extract EICs for these features
chr_eics <- chromExtract(chr_s, peak_table_mz,
                        by = c("msLevel", "dataOrigin"))

chr_eics

## -----------------------------------------------------------------------------
chromData(chr_eics)

## -----------------------------------------------------------------------------
## Create a specific EIC
eic_table <- data.frame(
    rtMin = 125,
    rtMax = 180,
    mzMin = 100.01,
    mzMax = 100.02,
    msLevel = 1L,
    dataOrigin = dataOrigin(chr_s)[1]
)

chr_eic <- chromExtract(chr_s, eic_table, by = c("msLevel", "dataOrigin"))
chr_eic

## ----fig.height=10, fig.width=8-----------------------------------------------
## Create copies for comparison
chr_linear <- imputePeaksData(chr_eic, method = "linear")
chr_spline <- imputePeaksData(chr_eic, method = "spline")
chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian",
                                window = 5, sd = 2)
chr_loess <- imputePeaksData(chr_eic, method = "loess", span = 0.3)

## Plot all methods for comparison
par(mfrow = c(3, 2), mar = c(4, 4, 2, 1))

## Original data
plotChromatograms(chr_eic, main = "Original EIC")

## Linear interpolation
plotChromatograms(chr_linear, main = "Linear Imputation")

## Spline interpolation
plotChromatograms(chr_spline, main = "Spline Imputation")

## Gaussian smoothing
plotChromatograms(chr_gaussian, main = "Gaussian Smoothing (window=5, sd=2)")

## LOESS smoothing
plotChromatograms(chr_loess, main = "LOESS Smoothing (span=0.3)")

## -----------------------------------------------------------------------------
## For on-disk backends, add imputation to the lazy queue
chr_mzr_imputed <- imputePeaksData(
  chr_mzr,
  method = "gaussian",
  window = 5,
  sd = 2
)

chr_mzr_imputed

## -----------------------------------------------------------------------------
## This reads from disk and applies imputation in one step
peak_data <- peaksData(chr_mzr_imputed[1])

## -----------------------------------------------------------------------------
length(chr_mzr_imputed@processingQueue)

## -----------------------------------------------------------------------------
## For in-memory backends, you can persist the imputation
chr_in_memory <- setBackend(chr_mzr_imputed, ChromBackendMemory())
chr_in_memory <- applyProcessing(chr_in_memory)

# Now imputation is permanently applied
length(chr_in_memory@processingQueue)

## ----si-----------------------------------------------------------------------
sessionInfo()

