#' @title simData
#'
#' @description simulate Data with orthogonal feature clusters and replicated
#' samples. Each feature cluster corresponds to a different latent factor and
#' contains 10 redundant features. E.g. choosing samples = 100,
#' n_latent_factors = 5 and replicates = 2
#' will simulate a 50 x 200 data matrix, where the first 100 samples belong to
#' replicate 1 and sample 101-200 belong to replicate 2.
#' \cr
#' \cr
#'
#'
#' @details
#' simData constructs n_latent_factors by generating a random matrix
#' \eqn{\mathbf{Q}} whose row vectors
#' \eqn{\mathbf{Q}_{i\cdot}  \sim \mathcal{N}(0,1)}
#' with \eqn{n} samples and \eqn{i \in \{1, \dots, 
#' \textrm{n\_latent\_factors}\}} are
#' orthonormal, each corresponding to a different latent factor. To simulate a
#' set of redundant feature groups, it generates 10 features \eqn{X_{j\cdot}}
#' for each latent factor \eqn{\mathbf{Q}_{i\cdot}} by scaling each latent
#' factor by a random factor \eqn{\delta_j \sim \mathcal{N}(0,1)} and adding
#' replicate specific noise
#' \eqn{\pmb{\epsilon}_c \sim \mathcal{N}(0,0.1)} with
#' \eqn{c \in \{1, \dots, \textrm{replicates}\}} preserving orthogonality.
#'
#' @param conditions number of conditions to generate samples from
#' @param n_latent_factors number of latent factors to generate
#' @param replicates number of replicates to generate
#'
#' @return \code{SummarizedExperiment} object carrying simulated data, with 
#' \code{colData} indicating which sample belongs to which replicate
#'
#' @importFrom MASS mvrnorm
#' @importFrom pracma gramSchmidt
#' @importFrom stats rnorm
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom methods is
#' 
#' @export
#'
#' @examples
#' # simulate data 100 samples from 100 conditions, 20 features generated by 2 
#' # latent factors and 2 replicates
#' simData(conditions=100, n_latent_factors=2, replicates=2)
simData <- function(conditions, n_latent_factors, replicates) {
    stopifnot(
        is.numeric(n_latent_factors),
        is.numeric(conditions),
        is.numeric(replicates),
        length(n_latent_factors) == 1,
        length(conditions) == 1,
        length(replicates) == 1
    )
    # total number of features
    p <- n_latent_factors * 10
    # total number of measurements
    n <- conditions * replicates
    # sample target feature clusters from
    # standard multivariate normal distribution
    m <- rep(10, n_latent_factors)
    # randomly sample targetcluster vectors
    target <- mvrnorm(conditions, mu=m,
                            diag(rep(1, n_latent_factors)))
    # store magnitude of vector
    mag <- sqrt(colSums(target^2))
    # orthonormalize the vectors
    gs <- gramSchmidt(target)
    target <- gs$Q
    # rescale orthonormal vectors to original size
    target <- t(t(target) * mag)
    base <- array(dim=c(conditions, p))
    # generate 9 additional linearly dependent features
    # for each feature cluster
    for (i in seq_len(n_latent_factors)){
        base[, (i-1)*10+1] <- target[, i]
        for (j in seq_len(9)){
            coefs <- rnorm(1)
            # add noise to reduce orthogonality between features in same cluster
            base[, (i-1)*10+1+j] <- (abs(coefs) * target[, i]) +
                rnorm(conditions, 0, 0.1)
        }
    }
    # generate replicates and add noise
    replist <- list()
    for (i in seq_len(replicates)) {
        # add different noise to each replicate and concatenate
        noise <- array(rnorm(nrow(base) * ncol(base), mean=0,
                        sd=0.5), dim(base))
        if (i == 1){
            rep <- base
        } else {
            rep <- base + noise
        }
        replist[[i]] <- rep
    }
    # combine replicates
    data <- Reduce(function(x, y) rbind(x, y), replist)
    featnames <- c()
    for(i in seq_len(n_latent_factors)){
        featnames <- c(featnames, 
                        paste0(
                            "Latent_factor", 
                            i, 
                            "_Feature", 
                            seq_len(10))
                        )
    }
    dimnames(data)[[2]] <-  featnames
    conds <- data.frame(
        conditions=as.factor(rep(seq_len(conditions), replicates))
        )

    se <- SummarizedExperiment(assays=list(data=t(data)), colData=conds)
    se
}
