#' @title  Plot signals around the start and the end of genomic features and
#' random regions
#'
#' @description   Plot reads or peak Coverage/base/gene of samples given in the
#' query files around start, end and center of genomic features or custom feature
#' given in a .bed file. The upstream and downstream windows can be given
#' separately. If Input files are provided, ratio over Input is computed and
#' displayed as well. A random feature can be generated to serve as a background
#' for contrasting.
#'
#' @param queryFiles a vector of sample file names. The file should be in .bam,
#'  .bed, .wig or .bw format, mixture of formats is allowed
#' @param inputFiles a vector of input sample file names. The file should be in
#'  .bam, .bed, .wig or .bw format, mixture of formats is allowed
#' @param centerFile  a bed file that defines the custom feature, or a feature
#'  in c("utr3", "utr5", "cds", "intron", "exon", "transcript", "gene"),
#'  multiple features are not allowed.
#' @param txdb a TxDb object defined in the GenomicFeatures package. Default
#'  NULL, needed only when genomic features are used in the place of centerFile.
#' @param importParams a list of parameters for \code{handle_input}
#' @param binSize an integer defines bin size for intensity calculation
#' @param ext a vector of four integers defining upstream and downstream
#'  boundaries of the plot window, flanking the start and end of features
#' @param hl a vector of four integers defining upstream and downstream
#'  boundaries of the highlight window, flanking the start and end of features
#' @param insert an integer specifies the length of the center regions to be
#'  included, in addition to the start and end of the feature
#' @param randomize logical, indicating if randomized feature should generated
#'  and used as a contrast to the real feature. The ransomized feature is
#'  generated by shifting the given feature with a random offset within the
#'  range of ext[1] and ext[4]
#' @param stranded logical, indicating whether the strand of the feature s
#'  hould be considered
#' @param scale logical, indicating whether the score matrix should be scaled to
#'  the range 0:1, so that samples with different baseline can be compared
#' @param shade logical indicating whether to place a shaded rectangle around
#'  the point of interest
#' @param smooth logical, indicating whether the line should smoothed with a
#'  spline smoothing algorithm
#' @param rmOutlier a numeric value serving as a multiplier of the MAD in Hampel
#'  filter for outliers identification, 0 indicating not removing outliers. For
#'  Gaussian distribution, use 3, adjust based on data distribution
#' @param transform a string in c("log", "log2", "log10"), default = NA
#'  indicating no transformation of data matrix
#' @param Ylab a string for y-axis label
#' @param outPrefix a string specifying output file prefix for plots
#'  (outPrefix.pdf)
#' @param verbose logical, whether to output additional information
#'  (data used for plotting or statistical test results)
#' @param hw a vector of two elements specifying the height and width of the
#'  output figures
#' @param nc integer, number of cores for parallel processing
#'
#' @return a list of two objects, the first is a GRanges object, the second is
#' a GRangesList object
#'
#' @author Shuye Pu
#'
#' @examples
#'
#' gtfFile <- system.file("extdata", "gencode.v19.annotation_chr19.gtf",
#'     package = "GenomicPlot"
#' )
#'
#' txdb <- custom_TxDb_from_GTF(gtfFile, genome = "hg19")
#'
#' bamQueryFiles <- system.file("extdata", "treat_chr19.bam",
#'                              package = "GenomicPlot")
#' names(bamQueryFiles) <- "clip_bam"
#' bamInputFiles <- system.file("extdata", "input_chr19.bam",
#'                              package = "GenomicPlot")
#' names(bamInputFiles) <- "clip_input"
#'
#' bamImportParams <- setImportParams(
#'   offset = -1, fix_width = 0, fix_point = "start", norm = TRUE,
#'   useScore = FALSE, outRle = TRUE, useSizeFactor = FALSE, genome = "hg19"
#' )
#'
#' plot_start_end_with_random(
#'   queryFiles = bamQueryFiles,
#'   inputFiles = bamInputFiles,
#'   txdb = txdb,
#'   centerFile = "intron",
#'   binSize = 10,
#'   importParams = bamImportParams,
#'   ext = c(-100, 100, -100, 100),
#'   hl = c(-20, 20, -20, 20),
#'   insert = 100,
#'   stranded = TRUE,
#'   scale = FALSE,
#'   smooth = TRUE,
#'   verbose = TRUE,
#'   transform = "log2",
#'   outPrefix = NULL,
#'   randomize = TRUE,
#'   nc = 2
#' )
#'
#' @export plot_start_end_with_random
#'
#'

plot_start_end_with_random <- function(queryFiles,
                                       inputFiles = NULL,
                                       txdb = NULL,
                                       centerFile,
                                       importParams = NULL,
                                       binSize = 10,
                                       insert = 0,
                                       verbose = FALSE,
                                       ext = c(-500, 200, -200, 500),
                                       hl = c(-50, 50, -50, 50),
                                       randomize = FALSE,
                                       stranded = TRUE,
                                       scale = FALSE,
                                       smooth = FALSE,
                                       rmOutlier = 0,
                                       outPrefix = NULL,
                                       transform = NA,
                                       shade = TRUE,
                                       nc = 2,
                                       hw = c(8, 8),
                                       Ylab = "Coverage/base/gene") {
    stopifnot(is.numeric(c(binSize, insert, ext, hl, nc, hw, rmOutlier)))
    stopifnot(transform %in% c("log", "log2", "log10", NA))
    stopifnot(all(file.exists(queryFiles)))
    if (is.null(names(queryFiles)) || any(names(queryFiles) == ""))
        stop("Each file must have a name attribute!")

    if (verbose) message("[plot_start_end_with_random] started ...\n")
    functionName <- as.character(match.call()[[1]])
    params <- plot_named_list(as.list(environment()))
    force(params)

    if (!is.null(outPrefix)) {
        while (!is.null(dev.list())) {
            dev.off()
        }
        pdf(paste(outPrefix, "pdf", sep = "."), height = hw[1], width = hw[2])
    }

    if (is.null(inputFiles)) {
        inputLabels <- NULL
        queryInputs <- handle_input(inputFiles = queryFiles, importParams,
                                    verbose = verbose, nc = nc)
    } else {
        inputLabels <- names(inputFiles)
        queryLabels <- names(queryFiles)
        if (length(queryFiles) == length(inputFiles)) {
            queryInputs <- handle_input(inputFiles = c(queryFiles, inputFiles),
                                        importParams, verbose = verbose,
                                        nc = nc)
        } else if (length(inputFiles) == 1) {
            queryInputs <- handle_input(inputFiles = c(queryFiles, inputFiles),
                                        importParams, verbose = verbose,
                                        nc = nc)
            queryInputs <- queryInputs[c(queryLabels,
                                         rep(inputLabels, length(queryLabels)))]
            ## expand the list

            inputLabels <- paste0(names(inputFiles), seq_along(queryFiles))
            ## make each inputLabels unique
            names(queryInputs) <- c(queryLabels, inputLabels)
        } else {
            stop("the number of inputFiles must be 1 or equal to the number of
                 queryFiles!")
        }
    }
    queryLabels <- names(queryInputs)

    feature <- rfeature <- NULL
    fs <- fe <- rfs <- rfe <- fc <- rfc <- NULL

    if (centerFile %in% c("utr3", "utr5", "cds", "intron", "exon", "transcript",
                          "gene")) {
        featureName <- centerFile
        feature <- get_genomic_feature_coordinates(txdb, featureName,
                                                   longest = TRUE,
                                             protein_coding = TRUE)[["GRanges"]]
        minimal_width <- ext[2] - ext[3] + insert
        feature <- feature[width(feature) > minimal_width]
    } else if (file.exists(centerFile)) {
        bedparam <- importParams
        ## the input bed is going to be used as center, so the importParams
        ## has to be modified accordingly
        bedparam$fix_width <- 0
        bedparam$norm <- FALSE
        bedparam$useScore <- FALSE
        bedparam$outRle <- FALSE
        feature <- handle_input(inputFiles = centerFile, bedparam,
                                verbose = verbose, nc = nc)[[1]]$query
        featureName <- names(centerFile)
    } else {
        stop("centerfile does not exit or the feature name is not supported!")
    }

    if (verbose) message("Number of features: ", featureName, " ",
                         length(feature), "\n")

    fs <- check_constraints(promoters(resize(feature, width = 1, fix = "start"),
                         upstream = -ext[1], downstream = ext[2]),
                         importParams$genome)
    fe <- check_constraints(promoters(resize(feature, width = 1, fix = "end"),
                         upstream = -ext[3], downstream = ext[4]),
                         importParams$genome)
    fc <- check_constraints(promoters(resize(feature, width = 1, fix = "center"),
                         upstream = round(insert / 2),
                         downstream = round(insert / 2)),
                         importParams$genome)

    if (randomize) {
        random_points <- sample(ext[1]:ext[4], length(feature), replace = TRUE)
        rfeature <- shift(feature, shift = random_points, use.names = TRUE)
        rfs <- check_constraints(promoters(resize(
                              rfeature, width = 1, fix = "start"),
                              upstream = -ext[1], downstream = ext[2]),
                              importParams$genome)
        rfe <- check_constraints(promoters(resize(
                              rfeature, width = 1, fix = "end"),
                              upstream = -ext[3], downstream = ext[4]),
                              importParams$genome)
        rfc <- check_constraints(promoters(resize(
                              rfeature, width = 1, fix = "center"),
                              upstream = round(insert / 2),
                              downstream = round(insert / 2)),
                              importParams$genome)
    }

    ext[2] <- ext[2] - (ext[2] - ext[1]) %% binSize
    ## to avoid binSize inconsistency, as the final binSize depends on bin_num
    bin_num_s <- round((ext[2] - ext[1]) / binSize)
    ext[4] <- ext[4] - (ext[4] - ext[3]) %% binSize
    bin_num_e <- round((ext[4] - ext[3]) / binSize)
    bin_num_c <- round(insert / binSize)

    mat_list <- NULL
    mat_list[[paste("Start of", featureName)]] <- list("window" = fs,
                                                       "rwindow" = rfs,
                                                       s = ext[1],
                                                       e = ext[2],
                                                       "xmin" = hl[1],
                                                       "xmax" = hl[2],
                                                       "bin_num" = bin_num_s)
    mat_list[[paste("Center of", featureName)]] <- list("window" = fc,
                                                        "rwindow" = rfc,
                                                        s = -round(insert / 2),
                                                        e = round(insert / 2),
                                                        "xmin" = 0, "xmax" = 0,
                                                        "bin_num" = bin_num_c)
    mat_list[[paste("End of", featureName)]] <- list("window" = fe,
                                                     "rwindow" = rfe,
                                                     s = ext[3],
                                                     e = ext[4],
                                                     "xmin" = hl[3],
                                                     "xmax" = hl[4],
                                                     "bin_num" = bin_num_e)


    scoreMatrix_list <- list()
    scoreMatrix_list_random <- list()
    for (locus in names(mat_list)) {
        windowR <- mat_list[[locus]]$window
        rwindowR <- mat_list[[locus]]$rwindow

        bin_num <- mat_list[[locus]]$bin_num
        if (bin_num <= 0) next

        for (queryLabel in queryLabels) {
            if (verbose) message("Query label: ", queryLabel, "\n")
            queryRegions <- queryInputs[[queryLabel]]$query
            libsize <- queryInputs[[queryLabel]]$size

            bin_op <- "mean"
            weight_col <- queryInputs[[queryLabel]]$weight

            fullMatrix <- parallel_scoreMatrixBin(queryRegions, windowR,
                                                  bin_num, bin_op, weight_col,
                                                  stranded, nc = nc)
            if (is.null(inputFiles)) {
                fullMatrix <- process_scoreMatrix(fullMatrix, scale, rmOutlier,
                                                  transform = transform,
                                                  verbose = verbose)
            } else {
                fullMatrix <- process_scoreMatrix(fullMatrix, scale = FALSE,
                                                  rmOutlier = rmOutlier,
                                                  transform = NA,
                                                  verbose = verbose)
            }

            scoreMatrix_list[[queryLabel]][[locus]] <- fullMatrix

            if (randomize) {
                rfullMatrix <- parallel_scoreMatrixBin(queryRegions, rwindowR,
                                                       bin_num, bin_op,
                                                       weight_col, stranded,
                                                       nc = nc)
                if (is.null(inputFiles)) {
                    rfullMatrix <- process_scoreMatrix(rfullMatrix, scale,
                                                       rmOutlier,
                                                       transform = transform,
                                                       verbose = verbose)
                } else {
                    rfullMatrix <- process_scoreMatrix(rfullMatrix,
                                                       scale = FALSE,
                                                       rmOutlier = rmOutlier,
                                                       transform = NA,
                                                       verbose = verbose)
                }
                scoreMatrix_list_random[[queryLabel]][[locus]] <- rfullMatrix
            }
        }
    }

    plot_df <- NULL
    for (locus in names(mat_list)) {
        xmin <- mat_list[[locus]]$xmin
        xmax <- mat_list[[locus]]$xmax
        bin_num <- mat_list[[locus]]$bin_num
        start <- mat_list[[locus]]$s
        end <- mat_list[[locus]]$e

        if (bin_num <= 0) next
        for (queryLabel in queryLabels) {
            if (verbose) message("Query label: ", queryLabel, "\n")

            fullMatrix <- scoreMatrix_list[[queryLabel]][[locus]]

            colm <- apply(fullMatrix, 2, mean)
            if(nrow(fullMatrix) == 1){
                colsd <- rep(0, ncol(fullMatrix))
            }else{
                colsd <- apply(fullMatrix, 2, sd)
            }
            colse <- colsd / sqrt(nrow(fullMatrix))
            collabel <- seq(start, (end - binSize), binSize)
            querybed <- as.factor(rep(queryLabel, ncol(fullMatrix)))
            location <- as.factor(rep(locus, ncol(fullMatrix)))
            levels(location) <- rev(levels(location))
            halfmin <- min(fullMatrix)
            intervals <- apply(fullMatrix, 2, function(x) sum(x > halfmin))

            sub_df <- NULL
            sub_df <- data.frame("Intensity" = colm, "sd" = colsd, "se" = colse,
                                 "Interval" = intervals, "Position" = collabel,
                                 "Query" = querybed, "Location" = location)
            if (smooth) {
                sub_df$Intensity <- as.vector(smooth.spline(sub_df$Intensity,
                                                df = as.integer(bin_num / 5))$y)
                sub_df$se <- as.vector(smooth.spline(sub_df$se,
                                                df = as.integer(bin_num / 5))$y)
            }
            sub_df <- mutate(sub_df, lower = Intensity - se,
                             upper = Intensity + se)
            plot_df <- rbind(plot_df, sub_df)

            if (randomize) {
                rfullMatrix <- scoreMatrix_list_random[[queryLabel]][[locus]]

                rcolm <- apply(rfullMatrix, 2, mean)
                if(nrow(rfullMatrix) == 1){
                    rcolsd <- rep(0, ncol(rfullMatrix))
                }else{
                    rcolsd <- apply(rfullMatrix, 2, sd)
                }
                rcolse <- rcolsd / sqrt(nrow(rfullMatrix))
                rcollabel <- seq(start, (end - binSize), binSize)
                rquerybed <- as.factor(rep(paste0(queryLabel, ":Random"),
                                           ncol(rfullMatrix)))
                location <- as.factor(rep(locus, ncol(fullMatrix)))
                levels(location) <- rev(levels(location))
                halfmin <- min(fullMatrix)
                intervals <- apply(fullMatrix, 2, function(x) sum(x > halfmin))

                rsub_df <- NULL
                rsub_df <- data.frame("Intensity" = rcolm, "sd" = rcolsd,
                                      "se" = rcolse, "Interval" = intervals,
                                      "Position" = rcollabel,
                                      "Query" = rquerybed,
                                      "Location" = location)
                if (smooth) {
                    rsub_df$Intensity <- as.vector(smooth.spline(
                      rsub_df$Intensity, df = as.integer(bin_num / 5))$y)
                    rsub_df$se <- as.vector(smooth.spline(
                      rsub_df$se, df = as.integer(bin_num / 5))$y)
                }
                rsub_df <- mutate(rsub_df, lower = Intensity - se,
                                  upper = Intensity + se)

                plot_df <- rbind(plot_df, rsub_df)
            }
        }
    }

    ## plot individual bed line for one feature
    Ylab <- ifelse(!is.na(transform) && is.null(inputFiles),
                   paste0(transform, " (", Ylab, ")"), Ylab)
    for (query in unique(plot_df$Query)) {
        aplot_df <- plot_df %>%
            filter(Query == query)

        plots <- draw_stacked_profile(plot_df = aplot_df, cn = "Query",
                                      ext = ext, hl = hl, atitle = featureName,
                                      insert = insert, Ylab = Ylab,
                                      shade = shade, stack = TRUE)

        print(plots)
    }
    ## plot multi bed lines for one feature
    if (length(unique(plot_df$Query)) > 1) {

        plots <- draw_stacked_profile(plot_df = plot_df, cn = "Query",
                                      ext = ext, hl = hl, atitle = featureName,
                                      insert = insert, Ylab = Ylab,
                                      shade = shade, stack = TRUE)

        print(plots)
    }

    if (!is.null(inputFiles)) {
        Ylab <- ifelse(is.na(transform), "Ratio-over-Input",
                       paste0(transform, " (Ratio-over-Input)"))

        ratiolabels <- queryLabels[!queryLabels %in% inputLabels]
        inputMatrix_list <- scoreMatrix_list[inputLabels]
        ratioMatrix_list <- scoreMatrix_list[ratiolabels]

        imlr <- scoreMatrix_list_random[inputLabels] # inputMatrix_list_random
        rmlr <- scoreMatrix_list_random[ratiolabels] # ratioMatrix_list_random

        for (locus in names(mat_list)) {
            bin_num <- mat_list[[locus]]$bin_num
            if (bin_num <= 0) next

            for (i in seq_along(ratiolabels)) {
                rm <- ratioMatrix_list[[ratiolabels[i]]][[locus]]
                im <- inputMatrix_list[[inputLabels[i]]][[locus]]
                minrow <- min(nrow(rm), nrow(im))

                fullMatrix <- ratio_over_input(rm[seq_len(minrow), ],
                                               im[seq_len(minrow), ], verbose)

                fullMatrix <- process_scoreMatrix(fullMatrix, scale, rmOutlier,
                                                  transform = transform,
                                                  verbose = verbose)

                ratioMatrix_list[[ratiolabels[i]]][[locus]] <- fullMatrix

                ## for random feature
                if (randomize) {
                    rmr <- rmlr[[ratiolabels[i]]][[locus]]
                    imr <- imlr[[inputLabels[i]]][[locus]]
                    minrowr <- min(nrow(rmr), nrow(imr))

                    fullMatrix <- ratio_over_input(rmr[seq_len(minrowr), ],
                                                   imr[seq_len(minrowr), ],
                                                   verbose)

                    fullMatrix <- process_scoreMatrix(fullMatrix, scale,
                                                      rmOutlier,
                                                      transform = transform,
                                                      verbose = verbose)

                    rmlr[[ratiolabels[i]]][[locus]] <-
                        fullMatrix
                }
            }
        }


        plot_df <- NULL
        for (locus in names(mat_list)) {
            xmin <- mat_list[[locus]]$xmin
            xmax <- mat_list[[locus]]$xmax
            bin_num <- mat_list[[locus]]$bin_num
            start <- mat_list[[locus]]$s
            end <- mat_list[[locus]]$e

            if (bin_num <= 0) next
            for (ratiolabel in ratiolabels) {
                if (verbose) message("Ratio label: ", ratiolabel, "\n")

                fullMatrix <- ratioMatrix_list[[ratiolabel]][[locus]]

                colm <- apply(fullMatrix, 2, mean)
                if(nrow(fullMatrix) == 1){
                    colsd <- rep(0, ncol(fullMatrix))
                }else{
                    colsd <- apply(fullMatrix, 2, sd)
                }
                colse <- colsd / sqrt(nrow(fullMatrix))
                collabel <- seq(start, (end - binSize), binSize)
                ratiobed <- as.factor(rep(ratiolabel, ncol(fullMatrix)))
                location <- as.factor(rep(locus, ncol(fullMatrix)))
                levels(location) <- rev(levels(location))
                halfmin <- min(fullMatrix)
                intervals <- apply(fullMatrix, 2, function(x)
                    length(x[x > halfmin]))

                sub_df <- NULL
                sub_df <- data.frame("Intensity" = colm, "sd" = colsd,
                                     "se" = colse, "Interval" = intervals,
                                     "Position" = collabel, "Query" = ratiobed,
                                     "Location" = location)
                if (smooth) {
                    sub_df$Intensity <- as.vector(smooth.spline(
                      sub_df$Intensity, df = as.integer(bin_num / 5))$y)
                    sub_df$se <- as.vector(smooth.spline(
                      sub_df$se, df = as.integer(bin_num / 5))$y)
                }
                sub_df <- mutate(sub_df, lower = Intensity - se,
                                 upper = Intensity + se)
                plot_df <- rbind(plot_df, sub_df)

                if (randomize) {
                    rfullMatrix <- rmlr[[ratiolabel]][[locus]]

                    rcolm <- apply(rfullMatrix, 2, mean)
                    if(nrow(rfullMatrix) == 1){
                        rcolsd <- rep(0, ncol(rfullMatrix))
                    }else{
                        rcolsd <- apply(rfullMatrix, 2, sd)
                    }
                    rcolse <- rcolsd / sqrt(nrow(rfullMatrix))
                    rcollabel <- seq(start, (end - binSize), binSize)
                    rratiobed <- as.factor(rep(paste0(ratiolabel, ":Random"),
                                               ncol(rfullMatrix)))
                    location <- as.factor(rep(locus, ncol(fullMatrix)))
                    levels(location) <- rev(levels(location))
                    halfmin <- min(rfullMatrix)
                    intervals <- apply(rfullMatrix, 2, function(x)
                        length(x[x > halfmin]))

                    rsub_df <- NULL
                    rsub_df <- data.frame("Intensity" = rcolm, "sd" = rcolsd,
                                          "se" = rcolse, "Interval" = intervals,
                                          "Position" = rcollabel,
                                          "Query" = rratiobed,
                                          "Location" = location)
                    if (smooth) {
                        rsub_df$Intensity <- as.vector(smooth.spline(
                          rsub_df$Intensity, df = as.integer(bin_num / 5))$y)
                        rsub_df$se <- as.vector(smooth.spline(
                          rsub_df$se, df = as.integer(bin_num / 5))$y)
                    }
                    rsub_df <- mutate(rsub_df, lower = Intensity - se,
                                      upper = Intensity + se)

                    plot_df <- rbind(plot_df, rsub_df)
                }
            }
        }

        ## plot individual bed line for one Location
        for (query in unique(plot_df$Query)) {
            aplot_df <- plot_df %>%
                filter(Query == query)
            plots <- draw_stacked_profile(plot_df = aplot_df, cn = "Query",
                                          xc = "Position", yc = "Intensity",
                                          ext = ext, hl = hl,
                                          atitle = featureName,
                                          insert = insert, Ylab = Ylab,
                                          shade = shade, stack = TRUE)
            print(plots)
        }

        ## plot multi bed lines for one Location
        plots <- draw_stacked_profile(plot_df, xc = "Position",
                                      yc = "Intensity", cn = "Query",
                                      ext = ext, hl = hl, atitle = featureName,
                                      insert = insert, Ylab = Ylab,
                                      shade = shade, stack = TRUE)

        print(plots)
    }

    if (!is.null(outPrefix)) {
        print(params)
        on.exit(dev.off(), add = TRUE)
    }

    if (verbose) message("[plot_start_end_with_random] finished!\n")
    invisible(plot_df)
}
