% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RFLOMICS-Methods_03_data_explor.R
\name{runDataProcessing}
\alias{runDataProcessing}
\alias{runDataProcessing,RflomicsSE-method}
\alias{runDataProcessing,RflomicsMAE-method}
\alias{runSampleFiltering}
\alias{runSampleFiltering,RflomicsSE-method}
\alias{runSampleFiltering,RflomicsMAE-method}
\alias{runFeatureFiltering}
\alias{runFeatureFiltering,RflomicsSE-method}
\alias{runFeatureFiltering,RflomicsMAE-method}
\alias{runTransformData}
\alias{runTransformData,RflomicsSE-method}
\alias{runTransformData,RflomicsMAE-method}
\alias{runNormalization}
\alias{runNormalization,RflomicsSE-method}
\alias{runNormalization,RflomicsMAE-method}
\alias{runOmicsPCA}
\alias{runOmicsPCA,RflomicsSE-method}
\alias{runOmicsPCA,RflomicsMAE-method}
\alias{checkExpDesignCompleteness}
\alias{checkExpDesignCompleteness,RflomicsSE-method}
\alias{checkExpDesignCompleteness,RflomicsMAE-method}
\alias{getProcessedData}
\alias{getProcessedData,RflomicsSE-method}
\alias{getProcessedData,RflomicsMAE-method}
\alias{getTransSettings}
\alias{getTransSettings,RflomicsSE-method}
\alias{getTransSettings,RflomicsMAE-method}
\alias{getFilterSettings}
\alias{getFilterSettings,RflomicsSE-method}
\alias{getFilterSettings,RflomicsMAE-method}
\alias{getFilteredFeatures}
\alias{getFilteredFeatures,RflomicsSE-method}
\alias{getFilteredFeatures,RflomicsMAE-method}
\alias{getSelectedSamples}
\alias{getSelectedSamples,RflomicsSE-method}
\alias{getSelectedSamples,RflomicsMAE-method}
\alias{getCoeffNorm}
\alias{getCoeffNorm,RflomicsSE-method}
\alias{getCoeffNorm,RflomicsMAE-method}
\alias{getNormSettings}
\alias{getNormSettings,RflomicsSE-method}
\alias{getNormSettings,RflomicsMAE-method}
\alias{plotLibrarySize}
\alias{plotLibrarySize,RflomicsSE-method}
\alias{plotLibrarySize,RflomicsMAE-method}
\alias{plotDataDistribution}
\alias{plotDataDistribution,RflomicsSE-method}
\alias{plotDataDistribution,RflomicsMAE-method}
\alias{plotOmicsPCA}
\alias{plotOmicsPCA,RflomicsSE-method}
\alias{plotOmicsPCA,RflomicsMAE-method}
\alias{plotExpDesignCompleteness}
\alias{plotExpDesignCompleteness,RflomicsSE-method}
\alias{plotExpDesignCompleteness,RflomicsMAE-method}
\alias{isProcessedData}
\alias{isProcessedData,RflomicsSE-method}
\alias{isProcessedData,RflomicsMAE-method}
\title{Data Exploratory and processing}
\usage{
\S4method{runDataProcessing}{RflomicsSE}(
  object,
  samples = NULL,
  filterStrategy = NULL,
  cpmCutoff = NULL,
  transformMethod = NULL,
  normMethod = NULL,
  imputMethod = NULL,
  userTransMethod = "unknown",
  userNormMethod = "unknown"
)

\S4method{runDataProcessing}{RflomicsMAE}(
  object,
  SE.name,
  samples = NULL,
  filterStrategy = NULL,
  cpmCutoff = NULL,
  transformMethod = NULL,
  normMethod = NULL,
  userTransMethod = "unknown",
  userNormMethod = "unknown"
)

\S4method{runSampleFiltering}{RflomicsSE}(object, samples = NULL)

\S4method{runSampleFiltering}{RflomicsMAE}(object, SE.name, samples = NULL)

\S4method{runFeatureFiltering}{RflomicsSE}(
  object,
  filterMethod = NULL,
  filterStrategy = NULL,
  cpmCutoff = NULL,
  imputMethod = NULL
)

\S4method{runFeatureFiltering}{RflomicsMAE}(
  object,
  SE.name,
  filterMethod = NULL,
  filterStrategy = NULL,
  cpmCutoff = NULL,
  imputMethod = NULL
)

\S4method{runTransformData}{RflomicsSE}(object, transformMethod = NULL, userTransMethod = "unknown")

\S4method{runTransformData}{RflomicsMAE}(
  object,
  SE.name,
  transformMethod = NULL,
  userTransMethod = "unknown"
)

\S4method{runNormalization}{RflomicsSE}(object, normMethod = NULL, userNormMethod = "unknown")

\S4method{runNormalization}{RflomicsMAE}(
  object,
  SE.name,
  normMethod = NULL,
  userNormMethod = "unknown"
)

\S4method{runOmicsPCA}{RflomicsSE}(object, ncomp = 5, raw = FALSE)

\S4method{runOmicsPCA}{RflomicsMAE}(object, SE.name, ncomp = 5, raw = FALSE)

\S4method{checkExpDesignCompleteness}{RflomicsSE}(object, sampleList = NULL)

\S4method{checkExpDesignCompleteness}{RflomicsMAE}(object, omicName, sampleList = NULL)

\S4method{getProcessedData}{RflomicsSE}(
  object,
  filter = FALSE,
  trans = FALSE,
  norm = FALSE,
  log = FALSE
)

\S4method{getProcessedData}{RflomicsMAE}(
  object,
  SE.name,
  filter = FALSE,
  trans = FALSE,
  norm = FALSE,
  log = FALSE
)

\S4method{getTransSettings}{RflomicsSE}(object)

\S4method{getTransSettings}{RflomicsMAE}(object, SE.name)

\S4method{getFilterSettings}{RflomicsSE}(object)

\S4method{getFilterSettings}{RflomicsMAE}(object, SE.name)

\S4method{getFilteredFeatures}{RflomicsSE}(object)

\S4method{getFilteredFeatures}{RflomicsMAE}(object, SE.name)

\S4method{getSelectedSamples}{RflomicsSE}(object)

\S4method{getSelectedSamples}{RflomicsMAE}(object, SE.name)

\S4method{getCoeffNorm}{RflomicsSE}(object)

\S4method{getCoeffNorm}{RflomicsMAE}(object, SE.name)

\S4method{getNormSettings}{RflomicsSE}(object)

\S4method{getNormSettings}{RflomicsMAE}(object, SE.name)

\S4method{plotLibrarySize}{RflomicsSE}(object, raw = FALSE)

\S4method{plotLibrarySize}{RflomicsMAE}(object, SE.name, raw = FALSE)

\S4method{plotDataDistribution}{RflomicsSE}(object, plot = "boxplot", raw = FALSE)

\S4method{plotDataDistribution}{RflomicsMAE}(object, SE.name, plot = "boxplot", raw = FALSE)

\S4method{plotOmicsPCA}{RflomicsSE}(object, raw = TRUE, axes = c(1, 2), groupColor = "groups")

\S4method{plotOmicsPCA}{RflomicsMAE}(
  object,
  SE.name,
  raw = FALSE,
  axes = c(1, 2),
  groupColor = "groups"
)

\S4method{plotExpDesignCompleteness}{RflomicsSE}(object, sampleList = NULL)

\S4method{plotExpDesignCompleteness}{RflomicsMAE}(object, omicName, sampleList = NULL)

\S4method{isProcessedData}{RflomicsSE}(
  object,
  filter = FALSE,
  trans = FALSE,
  norm = FALSE,
  log = FALSE
)

\S4method{isProcessedData}{RflomicsMAE}(
  object,
  SE.name,
  filter = TRUE,
  trans = TRUE,
  norm = TRUE,
  log = FALSE
)
}
\arguments{
\item{object}{An object of class \link{RflomicsSE-class}.}

\item{samples}{samples to keep.}

\item{filterStrategy}{The filtering strategy
("NbConditions" or "NbReplicates") for RNAseq data.}

\item{cpmCutoff}{The CPM cutoff for RNAseq data.}

\item{transformMethod}{The transformation method to store in the metadata}

\item{normMethod}{Normalization method. Accepted values: TMM for RNAseq, and
median, totalSum, or none for proteomics and metabolomics data.
Default values: TMM for RNAseq data and median for proteomics and metabolomics
data}

\item{imputMethod}{The imputation method ("MVI") for proteomics and
metabolomics data.}

\item{userTransMethod}{method used by user to transform data.}

\item{userNormMethod}{method used by user to normalize data.}

\item{SE.name}{the name of the data the normalization have to be applied to.}

\item{filterMethod}{The filtering model ("CPM") for RNAseq data.}

\item{ncomp}{Number of components to compute. Default is 5.}

\item{raw}{boolean. Does the pca have to be ran on raw data or transformed}

\item{sampleList}{list of samples to check.}

\item{omicName}{a character string with the name of the dataset}

\item{filter}{boolean. If TRUE, check if data is filtered (low counts/RNAseq)}

\item{trans}{boolean. If TRUE, check if data is transformed}

\item{norm}{boolean. If TRUE, check if data is normalized}

\item{log}{boolean. If TRUE, check if the data has been log-transformed
(RNAseq).}

\item{plot}{plot type ("boxplot" or "density")}

\item{axes}{A vector giving the two axis that have to be drawn for the
factorial map}

\item{groupColor}{All combination of level's factor}
}
\value{
An object of class \link{RflomicsSE} or class \link{RflomicsSE}

An object of class \link{RflomicsSE}
The applied normalization method and computed scaling factors
(by samples) are stored as a named list
("normalization") of two elements (respectively "method" and
"coefNorm") in the metadata slot of a
given data set, stored itself in the ExperimentList slot of a
\link{RflomicsSE} object.

An object of class \link{RflomicsSE}
}
\description{
These functions applied a data processing (filtering, normalization
and/or transformation, PCA) on RNAseq, proteomics, or metabolomics data.

runDataProcessing() calls the following functions:

\itemize{
\item runSampleFiltering:
  This function applied sample filtering on an dataset.
}

\itemize{
\item runFeatureFiltering: This function allows filtering variables in omics
data. In the case of RNA-seq data, it involves filtering out transcripts with
low counts, while in the case of proteomics and metabolomics data, it applies
the imputation procedure.
}

\itemize{
\item runTransformData:
   This function applied a transformation to the dataset. The transformation
method is chosen according to the dataset omicstype
(RNAseq: none, metabolomics/proteomics: log2)
}

\itemize{
\item runNormalization:
 This function applied a normalization on a dataset.
The normalization method is chosen according to the dataset omics type
(RNAseq: TMM, metabolomics/proteomics: median)
}

\itemize{
\item runOmicsPCA:
 This function performs a principal component analysis on omic
data stored in an object of class \link{RflomicsSE-class}
Results are stored in the metadata slot of the same object. If a
"Normalization" slot is present in the metadata slot, then data are
normalized before running the PCA according to the indicated transform
method.
}
This function performs a principal component analysis on omic
data stored in an object of class \link{RflomicsSE-class}
Results are stored in the metadata slot of the same object. If a
"Normalization" slot is present in the metadata slot, then data are
normalized before running the PCA according to the indicated transform
method.

\itemize{
   \item checkExpDesignCompleteness: return a string with message.
   This method checks some experimental design characteristics.
   A complete design (all combinations of factor modalities with at
   least 2 replicates for each have to be present) with
   at least one biological and one batch factors are required to use the
   RFLOMICS workflow.}

\itemize{
   \item plotDataDistribution: return boxplot or density plot of expression
   or abundance distribution.}
}
\details{
Low count filtering procedure: By default, transcript with 0 count
are removed from the data. The function then computes the count per million
or read (CPM) for each gene in each sample and gives by genes the number of
sample(s) which are over the cpmCutoff (NbOfsample_over_cpm).
Then Two filtering strategies are proposed:
\itemize{
\item NbConditions:  keep gene if the NbOfsample_over_cpm >= NbConditions
\item NbReplicates:  keep gene if the NbOfsample_over_cpm >= min(NbReplicates)
\item filterByExpr: the default filtering method implemented
in the edgeR filterByExpr() function.
}

Missing value imputation: This approach, applied to proteomics and
metabolomics data, replaces missing values (0 or NA) with the minimum value
among all non-zero values. Additionally, variables with at least one
condition group without any missing values are retained without further
filtering.
}
\section{Accessors}{



\itemize{
   \item getProcessedData: return RflomicsSE object with a processed data
   (filtering, normalization and/or transformation)}


\itemize{
   \item getTransSettings: return a list of transformation settings
   of a given omics dataset}


\itemize{
   \item getFilterSettings: return a list the filtering settings of a given
   omics dataset}


\itemize{
   \item getFilteredFeatures: return a vector of filtered features of a given
   omics dataset}


\itemize{
   \item getSelectedSamples: return a vector of selected samples of a given
   omics dataset}


\itemize{
   \item getCoeffNorm: return a named vector with normalization coefficients
   of a given omics dataset}


\itemize{
   \item getNormSettings: return a list of normalization settings
   of a given omics dataset}


\itemize{
   \item isProcessedData: return }
}

\section{Plots}{



\itemize{
   \item plotLibrarySize: return barplot of library size by sample.}


\itemize{
   \item plotOmicsPCA:
This function plot the factorial map from a PCA object stored
in a \link{RflomicsSE-class} object. By default, samples are
colored by groups (all combinations of level's factor)}


\itemize{
   \item plotExpDesignCompleteness:
This method checks that experimental design constraints are satisfied and
plot a summary of the design.
A complete design (all combinations of factor modalities with at least 2
replicates for each have to be present)
with at least one biological and one batch factors are required to use the
RFLOMICS workflow.}
}

\examples{
# load ecoseed data
data(ecoseed.mae)

factorInfo <- data.frame(
  "factorName"   = c("Repeat", "temperature", "imbibition"),
  "factorType"   = c("batch", "Bio", "Bio")
)

# create rflomicsMAE object with ecoseed data
MAE <- RFLOMICS::createRflomicsMAE(
  projectName = "Tests",
  omicsData   = ecoseed.mae,
  omicsTypes  = c("RNAseq","proteomics","metabolomics"),
  factorInfo  = factorInfo)


# Set the statistical model
formulae <- generateModelFormulae(MAE)
MAE <- setModelFormula(MAE, formulae[[1]])  

# set the contrast list
contrastList <- generateExpressionContrast(MAE, "averaged")
MAE <- setSelectedContrasts(MAE, contrastList = contrastList[c(1, 2, 3),])

# Data processing of RNAseq dataset : RNAtest
## using data processing functions for RNAseq data
### filter low RNAseq count 
# MAE <- filterLowAbundance(MAE, SE.name = "RNAtest",
#                           filterStrategy = "NbReplicates", 
#                           cpmCutoff = 1)
# ### filter outlier samples
# MAE <- runSampleFiltering(MAE, SE.name = "RNAtest", 
#                           samples = colnames(MAE[["RNAtest"]])[-1])
# ### data normalisation outlier samples
# MAE <- runNormalization(MAE, SE.name = "RNAtest", 
#                         normMethod = "TMM")

## use runDataProcessing function that combines the previous three functions
MAE <- runDataProcessing(MAE, SE.name = "RNAtest",
                         samples = colnames(MAE[["RNAtest"]])[-1], 
                         filterStrategy = "NbReplicates", 
                         cpmCutoff = 1, 
                         normMethod = "TMM") 

## check completness of RNAtest data
checkExpDesignCompleteness(MAE, omicName = "RNAtest")$messages

# Data processing of proteimics dataset : protetest
# ## transform data
# MAE <- runTransformData(MAE, SE.name = "protetest",  transformMethod = "log2")
# ## normalise data
# MAE <- runNormalization(MAE, SE.name = "protetest", normMethod = "median") 

## use runDataProcessing function
MAE <- runDataProcessing(MAE, SE.name = "protetest", 
                         normMethod = "median", 
                         transformMethod = "log2")

# plotExpDesignCompleteness(MAE[["RNAtest"]])

# plot Library Size
# plotDataDistribution(MAE[["RNAtest"]], raw=TRUE)
# plotDataDistribution(MAE[["RNAtest"]], raw=FALSE)

# plot gene expression distribution
# plotDataDistribution(MAE[["RNAtest"]], raw=TRUE, plot = "boxplot")
# plotDataDistribution(MAE[["RNAtest"]], raw=FALSE, plot = "boxplot")

# plot PCA 
# plotOmicsPCA(MAE[["RNAtest"]], raw="raw", groupColor = "imbibition")
# plotOmicsPCA(MAE[["RNAtest"]], raw="norm", groupColor = "imbibition")

# See runDataProcessing for an example that includes getTransSettings
# See runDataProcessing for an example that includes getFilterSettings
# See runDataProcessing for an example that includes getFilteredFeatures
# See runDataProcessing for an example that includes getSelectedSamples
# See runDataProcessing for an example that includes getCoeffNorm
# See runDataProcessing for an example that includes getNormSettings
# See runDataProcessing for an example that includes plotLibrarySize
# See runDataProcessing for an example that includes plotDataDistribution
# See runDataProcessing for an example that includes plotOmicsPCA
# See runDataProcessing for an example that includes plotExpDesignCompleteness
}
\references{
Lambert, I., Paysant-Le Roux, C., Colella, S. et al.
DiCoExpress: a tool to process multifactorial
RNAseq experiments from quality controls to co-expression analysis through
differential analysis based on contrasts inside GLM models.
Plant Methods 16, 68 (2020).
}
\seealso{
\link{RflomicsMAE-class}
\link{RflomicsSE-class}
\link{getProcessedData}
\link{getTransSettings}
\link{getFilterSettings}
\link{getFilteredFeatures}
\link{getCoeffNorm}
\link{getNormSettings}
\link{plotLibrarySize}
\link{plotDataDistribution}
\link{plotOmicsPCA}
}
