
utils::globalVariables(c("coverage", "HighestPeakReadCoverage",
    "start","outputFrame"))


#' inputCheck
#'
#' Checks user inputs to ensure PIPETS can run with given parameters
#' @importFrom utils read.delim file_test
#' @importFrom GenomicRanges ranges
#' @param inputData Either a string for the filepath of the bed file or the R GRanges object
#' @param readScoreMinimum See PIPETS_Run for full explanation
#' @param OutputFileID String input that will be the identifying
#' @param OutputFileDir Either a string for the filepath of the bed file or the R GRanges object
#' @param slidingWindowSize See PIPETS_Run for full explanation
#' @param slidingWindowMovementDistance See PIPETS_Run for full explanation
#' @param threshAdjust See PIPETS_Run for full explanation
#' @param threshAdjust_TopStrand See PIPETS_Run for full explanation
#' @param threshAdjust_CompStrand See PIPETS_Run for full explanation
#' @param user_pValue See PIPETS_Run for full explanation
#' @param highOutlierTrim See PIPETS_Run for full explanation
#' @param highOutlierTrim_TopStrand See PIPETS_Run for full explanation
#' @param highOutlierTrim_CompStrand See PIPETS_Run for full explanation
#' @param adjacentPeakDistance See PIPETS_Run for full explanation
#' @param peakCondensingDistance See PIPETS_Run for full explanation
#' @param inputDataFormat PIPETS currently supports "bedFile" (default) and "GRanges" as input formats
#' @return Returns kicker variable that will stop PIPETS if error is detected
#' @noRd
#'
inputCheck <- function(inputData,readScoreMinimum,OutputFileID,
                       OutputFileDir,slidingWindowSize, 
                       slidingWindowMovementDistance,threshAdjust,
                       threshAdjust_TopStrand,threshAdjust_CompStrand,
                       user_pValue,highOutlierTrim,highOutlierTrim_TopStrand,
                       highOutlierTrim_CompStrand, adjacentPeakDistance,
                        peakCondensingDistance,inputDataFormat = "bedFile"
                       ){
    kicker <- 0
    if(!as.character(inputDataFormat) %in% c("bedFile","GRanges")){
        kicker <- 1
        warning("Please select a valid input data format")
        return(kicker)
    }
    if(!file.exists(as.character(OutputFileDir))){
        kicker <- 1
        warning("Input File Directory Does Not Exist, ensure the format is '/path/to/file/'")
        return(kicker)
    }
    if(as.character(inputDataFormat) %in% "bedFile"){
        if(file_test("-f", as.character(inputData))){
            test <- try(read.delim(file = as.character(inputData),
                                   header = FALSE, stringsAsFactors = FALSE))
        } else if(!file_test("-f", as.character(inputData))){
            kicker <- 1
            warning("Input File Not Found")
            return(kicker)
        }
        if(nrow(test) < 100){
            warning("Input file is too short (currently less than 100 reads")
            kicker <- 1
            return(kicker)
        }
        else if(ncol(test) < 6){
            warning("Not enough columns in input file")
            kicker <- 1
            return(kicker)
        }
    }
    
    if(as.character(inputDataFormat) %in% "GRanges"){
        if(!is(inputData,"GRanges")){
            warning("Input Data is not of type 'GRanges'")
            kicker <- 1
            return(kicker)
        }
        if(length(BiocGenerics::start(inputData)) < 100){
            warning("Granges object has too few total genomic ranges")
            kicker <- 1
            return(kicker)
        }
        if(length(inputData$score) < (length(inputData)/2)){
            warning("Less than half of the Genomic Ranges sequences have any score values, cannot run")
            kicker <- 1
            return(kicker)
        }
    }
    
    if(is.na(threshAdjust) & is.na(threshAdjust_TopStrand) & 
       is.na(threshAdjust_CompStrand)){
        warning("Must either provide a threshAdjust value or values for
                both of the strand specicific threshAdjust values")
        kicker <- 1
        return(kicker)
    }
    
    if(is.na(highOutlierTrim) & is.na(highOutlierTrim_TopStrand) & 
       is.na(highOutlierTrim_CompStrand)){
        warning("Must either provide a highOutlierTrim value or values for
                both of the strand specicific highOutlierTrim values")
        kicker <- 1
        return(kicker)
    }

    if((is.na(threshAdjust) & is.na(threshAdjust_TopStrand)) | 
       (is.na(threshAdjust) & is.na(threshAdjust_CompStrand))){
        warning("Cannot run strand specific analysis because one or 
                more of the strand specific threshAdjust values is NA")
        kicker <- 1
        return(kicker)
    }
    
    if((is.na(highOutlierTrim) & is.na(highOutlierTrim_TopStrand)) | 
       (is.na(highOutlierTrim) & is.na(highOutlierTrim_CompStrand))){
        warning("Cannot run strand specific analysis because one or 
                more of the strand specific highOutlierTrim values is NA")
        kicker <- 1
        return(kicker)
    }
    
    if(!is.numeric(threshAdjust) & is.na(threshAdjust_TopStrand) & 
       is.na(threshAdjust_CompStrand)){
        warning("threshAdjust is not a number and strand specific threshAdjust
                values have not been provided so PIPETS cannot run.")
        kicker <- 1
        return(kicker)
    }
    
    if(!is.numeric(highOutlierTrim) & is.na(highOutlierTrim_TopStrand) & 
       is.na(highOutlierTrim_CompStrand)){
        warning("highOutlierTrim is not a number and strand specific
        highOutlierTrim values have not been provided so PIPETS cannot run.")
        kicker <- 1
        return(kicker)
    }

    if(!is.numeric(slidingWindowSize)|
       !is.numeric(slidingWindowMovementDistance)|
       !is.numeric(user_pValue)|!is.numeric(adjacentPeakDistance)|
       !is.numeric(peakCondensingDistance)|!is.numeric(readScoreMinimum)){
        warning("One or more numerical parameters is not a number and PIPETS 
                cannot run")
        kicker <- 1
        return(kicker)
    }
    if(slidingWindowSize == 0 | slidingWindowMovementDistance == 0 |
        readScoreMinimum == 0 | adjacentPeakDistance ==0 |
        peakCondensingDistance == 0){
        warning("One or more parameters is 0 and PIPETS cannot run")
        kicker <- 1
        return(kicker)
    }
    
    
    return(kicker)
}





#' thresCalc
#'
#' Used to calculate a cutoff threshold for the data
#' @param rf Dataframe containing strand specific reads
#' @param threshAdjust This parameter is used to establish a global cutoff threshold informed by the data. PIPETS sorts the genomic positions of each strand from highest to lowest, and starts with the highest read coverage position and subtracts that value from the total read coverage for that strand. By default, this continues until 75% of the total read coverage has been accounted for. Increasing the percentage (e.x. 0.9) will lower the strictness of the cutoff, thus increasing the total number of significant results.
#' @param highOutlierTrim This parameter is used along with threshAdjust to trim off the influence exerted by high read coverage outliers. By default, it removes the top 0.01 percent of the highest read coverage positions from the calculation of the global threshold (e.x. if there are 200 positions that make up 75% of the total reads, then this parameter will take the top 2 read coverage positions and remove them from the calculation of the global threshold). This parameter can be tuned to account for datasets with outliers that would otherwise severely skew the global threshold.
#' @return Outputs threshold used for cutoff
#' @noRd
#'
thresCalc <- function(rf, threshAdjust,highOutlierTrim){
    threshCalc <- rf$coverage[order(rf$coverage, decreasing = TRUE)]
    tempMax <- sum(threshCalc) * threshAdjust
    posCount <- 1
    for(x in seq_along(threshCalc)){
        tempMax <- tempMax - threshCalc[[x]]
        if(tempMax > 0){
            posCount <- posCount + 1
        }
        else if (tempMax <= 0 ){
            posCount <- posCount - 1
            break()
        }
    }
    if(!highOutlierTrim == 0){
        rmTopEnd <- round(posCount * highOutlierTrim)
        threshold <- sum(threshCalc[rmTopEnd:posCount])/(posCount - rmTopEnd)
        return(threshold)
    }
    else if(highOutlierTrim == 0){
        threshold <- sum(threshCalc[1:posCount])/posCount
        return(threshold)
    }
}


#' consecutivePeakCheck
#' Combines Proximal Peaks
#' @param OMF OutputMergeFrame from function this is nested in
#' @param SWR Output of file that is created in nested function
#' @param pCD peak condensing distance established in full run
#' @return Returns significant peaks to be fixed and output after
#' @noRd
#'
consecutivePeakCheck <- function(OMF, SWR, pCD,TWH){
    if(nrow(OMF)>1){
        PFC <- 1
        for(x in seq_along(OMF[,1])){
            tempSubset <- ""
            if(x < nrow(OMF) & (OMF[x+1,5] - OMF[x,6]) <= pCD){
                TWH <- rbind(TWH,OMF[x,])}
            else if(x == nrow(OMF)){
                if((OMF[x,6] - OMF[(x-1),5]) <= pCD){
                    TWH <- rbind(TWH,OMF[x,])
                    SWR$chrom[PFC] <- OMF$chrom[1]
                    SWR$strand[PFC] <- OMF$strand[1]
                    tS <- subset(TWH,HighestPeakReadCoverage == max(TWH[,4]))
                    tS <- subset(tS,subset =
                                     !duplicated(tS[c("HighestPeakReadCoverage")]),select =
                                     c("chrom","strand","HighestPeak","HighestPeakReadCoverage",
                                       "LowestPeakCoord","HighestPeakCoord"))
                    SWR$HighestPeak[PFC] <- tS$HighestPeak[1]
                    SWR$HighestPeakReadCoverage[PFC] <-tS$HighestPeakReadCoverage[1]
                    SWR$LowestPeakCoord[PFC] <- min(TWH$LowestPeakCoord)
                    SWR$HighestPeakCoord[PFC] <- max(TWH$HighestPeakCoord)
                    PFC <- PFC + 1
                    SWR[PFC,] <- NA
                    TWH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OMF)))
                    colnames(TWH) <- colnames(OMF)}
                else if(!(OMF$LowestPeakCoord[x]-OMF$HighestPeakCoord[x-1])<= pCD){
                    SWR[PFC,] <- c(OMF$chrom[1],
                                   OMF$strand[x],OMF$HighestPeak[x],
                                   OMF$HighestPeakReadCoverage[x],OMF$HighestPeak[x],
                                   OMF$HighestPeak[x])}
            }
            else {
                TWH <- rbind(TWH,OMF[x,, drop=FALSE])
                SWR$chrom[PFC] <- OMF$chrom[1]
                SWR$strand[PFC] <- OMF$strand[1]
                tS <- subset(TWH,HighestPeakReadCoverage == max(TWH[,4]))
                tS <- subset(tS,
                             subset = !duplicated(tS[c("HighestPeakReadCoverage")]),
                             select = c("chrom","strand","HighestPeak",
                                        "HighestPeakReadCoverage","LowestPeakCoord","HighestPeakCoord"))
                SWR$HighestPeak[PFC] <- tS$HighestPeak[1]
                SWR$HighestPeakReadCoverage[PFC] <- tS$HighestPeakReadCoverage[1]
                SWR$LowestPeakCoord[PFC] <- min(TWH$LowestPeakCoord)
                SWR$HighestPeakCoord[PFC] <- max(TWH$HighestPeakCoord)
                PFC <- PFC + 1
                SWR[PFC,] <- NA
                TWH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OMF)))
                colnames(TWH) <- colnames(OMF)}
        }
        return(SWR)
    }
    
    else if (nrow(OMF) == 1){
        return(OMF)
    }
    
}




#' Bed_Split
#' First step of PIPETS. Takes input Bed files and splits them by strand while also assigning read coverage to each genomic position
#' @title Split Input Bed Data By Strand
#' @importFrom dplyr arrange distinct group_by transmute %>%
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @param inputData Input BED file that is not strand split. For PIPETS, the first column must be the chromosome name, the second column must be the start coordinate, the third column must be the stop coordinate, and the 6th column must be the strand. Columns 4 and 5 must be present but their information will not be used.
#' @param readScoreMinimum The user must input the minimum read score from the input bed files that is used to determine good quality reads. All values equal to and greater than the input are considered. In many modern sequencing runs, a score of 60 is used.
#' @param OutputFileID User provided identifying string for output bed files
#' @return Returns a list containing the Plus Strand Reads, the Minus Strand Reads, and the user defined name for the files. Also writes out the strand split bed files to the project directory.
#' @examples
#' ## Split input bed file into stranded files without running PIPETS
#' Bed_Split(inputData="Test_Data.bed", readScoreMinimum=42,OutputFileID = "Test1")
#' @noRd
#'
Bed_Split <- function(inputData,readScoreMinimum, OutputFileID){
    message("+-----------------------------------+")
    OutputFileName <- OutputFileID
    message("Splitting Input Bed File By Strand")
    rB <- read.delim(file = as.character(inputData),
                     header = FALSE, stringsAsFactors = FALSE)
    startBed <- rB[,c(1,2,3,5), drop=FALSE]
    if(length(names(rB[grepl("-", rB[1,], fixed = TRUE)])) >0){
        startBed$V4 <- rB[,names(rB[grepl('-', rB[1,], fixed = TRUE)])]
    } else if (length(names(rB[grepl("+", rB[1,], fixed = TRUE)])) >0){
        startBed$V4 <- rB[,names(rB[grepl('+', rB[1,], fixed = TRUE)])]
    }
    colnames(startBed) <- c("chrom","start","stop","score","strand")
    startBed$coverage <- 0
    startBed <- startBed[ , c("chrom", "start", "stop","score", "coverage", "strand"),
                          drop=FALSE]
    
    PSR <- startBed[startBed$strand %in% "+",, drop=FALSE]
    PSR <- PSR[PSR$score >= readScoreMinimum,, drop=FALSE]
    PSC <- as.data.frame(table(PSR$stop))
    PSR <- distinct(PSR, stop, .keep_all = TRUE)
    PSR <- arrange(PSR, stop)
    PSR$coverage <- PSC$Freq[match(PSR$stop,PSC$Var1)]
    sumColumn <- PSR %>% group_by(start) %>% transmute(Total=sum(coverage))
    PSR$tempSum <- sumColumn$Total
    PSR <- PSR[order(PSR$start, -PSR$coverage),]
    PSR <- PSR[!duplicated(PSR$start),]
    PSR$coverage <- PSR$tempSum
    PSR <- PSR[,-7]
    
    MSR <- startBed[startBed$strand %in% "-",, drop=FALSE]
    MSR <- MSR[MSR$score >= readScoreMinimum,, drop=FALSE]
    MSC <- as.data.frame(table(MSR$start))
    MSR <- distinct(MSR, start, .keep_all = TRUE)
    MSR <- arrange(MSR, start)
    MSR$coverage <- MSC$Freq[match(MSR$start,MSC$Var1)]
    sumColumn <- MSR %>% group_by(stop) %>% transmute(Total=sum(coverage))
    MSR$tempSum <- sumColumn$Total
    MSR <- MSR[order(MSR$stop, -MSR$coverage),]
    MSR <- MSR[!duplicated(MSR$stop),]
    MSR$coverage <- MSR$tempSum
    MSR <- MSR[,-7]

    write.table(PSR,
                file = paste(as.character(OutputFileName),"PlusStrandCounts.bed", sep = "_")
                ,quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table(MSR,
                file = paste(as.character(OutputFileName),
                             "MinusStrandCounts.bed", sep = "_"),
                quote = FALSE, row.names = FALSE, col.names = FALSE)
    
    PSR <- PSR[,c(1,2,3,5,6)]
    MSR <- MSR[,c(1,2,3,5,6)]
    return(list(OutputFileName,PSR,MSR))
}

#' GRanges_Split
#' First step of PIPETS when GRanges option is selected for input. Takes input granges object, strand splits it and trims off reads that are too short or long, and then outputs strand split bed file to directory and preserves strand split granges objects to be output at the end of the method
#' @title Split Input Bed Data By Strand
#' @importFrom dplyr arrange distinct group_by transmute %>%
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @importFrom GenomicRanges ranges end start makeGRangesFromDataFrame
#' @importFrom BiocGenerics strand
#' @importFrom methods is
#' @param inputData Input granges object. PIPETS requires chromosome, start, stop, and strand information from the granges object
#' @param readScoreMinimum The user must input the minimum read score from the input bed files that is used to determine good quality reads. All values equal to and greater than the input are considered. In many modern sequencing runs, a score of 60 is used.
#' @param OutputFileID User input string that will be used to identify output bed files
#' @return Returns a list containing the Plus Strand Reads, the Minus Strand Reads, and the user defined name for the files. Also writes out the strand split bed files to the project directory.
#' @examples
#' ## Take input bed file, convert to GRanges object, and insert into method
#' testBed <-  read.table(file = "PIPETS_TestData.bed", header = FALSE,stringsAsFactors=FALSE)
#' testBed.gr <- GRanges(seqnames=testBed[,1],ranges=IRanges(start=testBed[,2],end=testBed[,3]),strand=testBed[,6], score=testBed[,5])
#' GRanges_Split(inputData=testBed.gr, readScoreMinimum=42, OutputFileID = "Test1")
#' @noRd
#'
GRanges_Split <- function(inputData,readScoreMinimum, OutputFileID){
    message("+-----------------------------------+")
    message("Splitting Input GRanges Object By Strand")
    allMinusRanges <- inputData[strand(inputData) == "-",]
    allPlusRanges <- inputData[strand(inputData) == "+",]
    allMinusRanges <- allMinusRanges[allMinusRanges$score >= readScoreMinimum,]
    allPlusRanges <- allPlusRanges[allPlusRanges$score >= readScoreMinimum,]
    
    allMinusCoverage <- as.data.frame(table(start(allMinusRanges)))
    allMinusReads <- as.data.frame(allMinusRanges)
    allMinusReads <- distinct(allMinusReads, start, .keep_all = TRUE)
    allMinusReads <- arrange(allMinusReads, start)
    allMinusReads$coverage <- allMinusCoverage$Freq[base::match
                                                    (allMinusReads$start,allMinusCoverage$Var1)]
    sumColumn <- allMinusReads %>% group_by(end) %>% transmute(Total=sum(coverage))
    allMinusReads$tempSum <- sumColumn$Total
    allMinusReads <- allMinusReads[order(allMinusReads$end, -allMinusReads$coverage),]
    allMinusReads <- allMinusReads[!duplicated(allMinusReads$end),]
    allMinusReads$coverage <- allMinusReads$tempSum
    allMinusReads <- allMinusReads[,-8] 
    allMinusRanges <- GenomicRanges::makeGRangesFromDataFrame(allMinusReads,keep.extra.columns = TRUE)
    allMinusReads <- allMinusReads[,c(1,2,3,5,7)]
    allMinusReads <- allMinusReads[,c("seqnames","start","end","coverage","strand")
                                   , drop=FALSE]
    colnames(allMinusReads) <- c("chrom","start","stop","coverage","strand")
    allMinusReads$start <- allMinusReads$start - 1
    allMinusReads$stop <- allMinusReads$stop -1
    
    allPlusCoverage <- as.data.frame(table(end(allPlusRanges)))
    allPlusReads <- as.data.frame(allPlusRanges)
    allPlusReads <- distinct(allPlusReads, end, .keep_all = TRUE)
    allPlusReads <- arrange(allPlusReads, end)
    allPlusReads$coverage <- allPlusCoverage$Freq[base::match
                                                  (allPlusReads$end,allPlusCoverage$Var1)]
    sumColumn <- allPlusReads %>% group_by(start) %>% transmute(Total=sum(coverage))
    allPlusReads$tempSum <- sumColumn$Total
    allPlusReads <- allPlusReads[order(allPlusReads$start, -allPlusReads$coverage),]
    allPlusReads <- allPlusReads[!duplicated(allPlusReads$start),]
    allPlusReads$coverage <- allPlusReads$tempSum
    allPlusReads <- allPlusReads[,-8]
    allPlusRanges <- GenomicRanges::makeGRangesFromDataFrame(allPlusReads,keep.extra.columns = TRUE)
    allPlusReads <- allPlusReads[,c(1,2,3,5,7)]
    allPlusReads <- allPlusReads[,c("seqnames","start","end","coverage","strand")
                                 , drop=FALSE]
    colnames(allPlusReads) <- c("chrom","start","stop","coverage","strand")
    allPlusReads$start <- allPlusReads$start - 1
    allPlusReads$stop <- allPlusReads$stop - 1
    
    write.table(allPlusReads,file = paste(as.character(OutputFileID),
                                          "PlusStrandCounts.bed", sep = "_")
                ,quote = FALSE, row.names = FALSE, col.names = FALSE)
    write.table(allMinusReads,
                file = paste(as.character(OutputFileID),
                             "MinusStrandCounts.bed", sep = "_"),
                quote = FALSE, row.names = FALSE, col.names = FALSE)
    return(list(OutputFileID,allPlusReads,allMinusReads,allPlusRanges,
                allMinusRanges))
}


#' topConsecutiveCheck
#' Identifies significant positions that are consecutive for top strand
#' @param OF Dataframe with significant positions (after initial Poisson test)
#' @param OMF Dataframe that will be the output, specified in function this is nested in
#' @param aPD Adjacent Peak distance speicified by user in full run
#' @return Returns list of merged termination peaks
#' @noRd
#'
topConsecutiveCheck <- function(OF, OMF, aPD, TPH){
    if(nrow(OF)>1){
        x <- 1
        peakFrameCoord <- 1
        for(x in seq_along(OF[,1])){
            tempSubset <- ""
            if(x < nrow(OF) & (OF$stop[x+1] - OF$stop[x]) <= aPD){
                TPH <- rbind(TPH,OF[x,, drop=FALSE])
            }
            else if(x == nrow(OF)){
                if((OF$stop[x] - OF$stop[(x-1)]) <= aPD) {
                    TPH <- rbind(TPH,OF[x,, drop=FALSE])
                    OMF$chrom[peakFrameCoord] <- OF$chrom[1]
                    OMF$strand[peakFrameCoord] <- OF$strand[1]
                    tS <- subset(TPH, coverage == max(TPH$coverage))
                    tS <- subset(tS, subset = !duplicated(tS[c("coverage")]),
                                 select = c("chrom", "start", "stop","coverage","strand"))
                    OMF$HighestPeak[peakFrameCoord] <- tS$stop[1]
                    OMF$HighestPeakReadCoverage[peakFrameCoord] <-tS$coverage[1]
                    OMF$LowestPeakCoord[peakFrameCoord] <- min(TPH$stop)
                    OMF$HighestPeakCoord[peakFrameCoord] <- max(TPH$stop)
                    peakFrameCoord <- peakFrameCoord + 1
                    OMF[peakFrameCoord,] <- NA
                    TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OF)))
                    colnames(TPH) <- colnames(OF)
                }
                else if(!(OF$stop[x] - OF$stop[x-1]) <= aPD){
                    OMF[peakFrameCoord,] <- c(OF$chrom[1],
                                              OF$strand[x], OF$stop[x],OF$coverage[x],
                                              OF$stop[x],OF$stop[x])
                }
            }
            else {
                TPH <- rbind(TPH,OF[x,, drop=FALSE])
                OMF$chrom[peakFrameCoord] <- OF$chrom[1]
                OMF$strand[peakFrameCoord] <- OF$strand[1]
                tS <- subset(TPH, coverage == max(TPH$coverage))
                tS <- subset(tS,subset = !duplicated(tempSubset[c("coverage")]),
                             select = c("chrom", "start", "stop","coverage","strand"))
                OMF$HighestPeak[peakFrameCoord] <- tS$stop[1]
                OMF$HighestPeakReadCoverage[peakFrameCoord] <-tS$coverage[1]
                OMF$LowestPeakCoord[peakFrameCoord] <- min(TPH$stop)
                OMF$HighestPeakCoord[peakFrameCoord] <- max(TPH$stop)
                peakFrameCoord <- peakFrameCoord + 1
                OMF[peakFrameCoord,] <- NA
                TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OF)))
                colnames(TPH) <- colnames(OF)
            }
        }
        return(OMF)
    }
    else if(nrow(OF)==1){
        OMF[1,] <- c(OF$chrom[1],OF$strand[1],OF$stop[1],OF$coverage[1],OF$stop[1],OF$stop[1])
        return(OMF)
    }
}


#' compConsecutiveCheck
#' Identifies significant positions that are consecutive for comp strand
#' @param OF Dataframe with significant positions (after initial Poisson test)
#' @param OMF Dataframe that will be the output, specified in function this is nested in
#' @param aPD Adjacent Peak distance speicified by user in full run
#' @return Returns list of merged termination peaks
#' @noRd
#'
compConsecutiveCheck <- function(OF, OMF, aPD, TPH){
    if(nrow(OF)>1){
        x <- 1
        peakFrameCoord <- 1
        for(x in seq_along(OF[,1])){
            tempSubset <- ""
            if(x < nrow(OF) & (OF$start[x+1] - OF$start[x]) <= aPD){
                TPH <- rbind(TPH,OF[x,, drop=FALSE])
            }
            else if(x == nrow(OF)){
                if((OF$start[x] - OF$start[(x-1)]) <= aPD) {
                    TPH <- rbind(TPH,OF[x,, drop=FALSE])
                    OMF$chrom[peakFrameCoord] <- OF$chrom[1]
                    OMF$strand[peakFrameCoord] <- OF$strand[1]
                    tS <- subset(TPH, coverage == max(TPH$coverage))
                    tS <- subset(tS, subset = !duplicated(tS[c("coverage")]),
                                 select = c("chrom", "start", "stop","coverage","strand"))
                    OMF$HighestPeak[peakFrameCoord] <- tS$start[1]
                    OMF$HighestPeakReadCoverage[peakFrameCoord] <-tS$coverage[1]
                    OMF$LowestPeakCoord[peakFrameCoord] <- min(TPH$start)
                    OMF$HighestPeakCoord[peakFrameCoord] <- max(TPH$start)
                    peakFrameCoord <- peakFrameCoord + 1
                    OMF[peakFrameCoord,] <- NA
                    TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OF)))
                    colnames(TPH) <- colnames(OF)
                }
                else if(!(OF$start[x] - OF$start[x-1]) <= aPD){
                    OMF[peakFrameCoord,] <- c(OF$chrom[1],
                                              OF$strand[x], OF$start[x],OF$coverage[x],
                                              OF$start[x],OF$start[x])
                }
            }
            else {
                TPH <- rbind(TPH,OF[x,, drop=FALSE])
                OMF$chrom[peakFrameCoord] <- OF$chrom[1]
                OMF$strand[peakFrameCoord] <- OF$strand[1]
                tS <- subset(TPH, coverage == max(TPH$coverage))
                tS <- subset(tS,subset = !duplicated(tempSubset[c("coverage")]),
                             select = c("chrom", "start", "stop","coverage","strand"))
                OMF$HighestPeak[peakFrameCoord] <- tS$start[1]
                OMF$HighestPeakReadCoverage[peakFrameCoord] <-tS$coverage[1]
                OMF$LowestPeakCoord[peakFrameCoord] <- min(TPH$start)
                OMF$HighestPeakCoord[peakFrameCoord] <- max(TPH$start)
                peakFrameCoord <- peakFrameCoord + 1
                OMF[peakFrameCoord,] <- NA
                TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OF)))
                colnames(TPH) <- colnames(OF)
            }
        }
        return(OMF)
    }
    else if(nrow(OF)==1){
        OMF[1,] <- c(OF$chrom[1],OF$strand[1],OF$start[1],OF$coverage[1],OF$start[1],OF$start[1])
        return(OMF)
    }
    
}



#' TopStrand_InitialPoisson
#' Poisson Significant Peak Identification Test for the Top Strand Data
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases p.adjust
#' @importFrom utils write.csv write.table read.table read.delim
#' @param MinusStrandReads Minus Strand Read DataFrame from the Bed_Split method. The minus strand reads inform the Top Strand termination signal
#' @param slidingWindowSize This parameter establishes the distance up and down stream of each position that a sliding window will be created around. The default value is 25, and this will result in a sliding window of total size 51 (25 upstream + position (1) + 25 downstream).
#' @param slidingWindowMovementDistance This parameter sets the distance that the sliding window will be moved. By default, it is set to move by half of the sliding window size in order to ensure that almost every position in the data is tested twice.
#' @param threshAdjust This parameter is used to establish a global cutoff threshold informed by the data. PIPETS sorts the genomic positions of each strand from highest to lowest, and starts with the highest read coverage position and subtracts that value from the total read coverage for that strand. By default, this continues until 75% of the total read coverage has been accounted for. Increasing the percentage (e.x. 0.9) will lower the strictness of the cutoff, thus increasing the total number of significant results.
#' @param user_pValue Choose the minimum pValue that the Poisson distribution test must pass in order to be considered significant
#' @param highOutlierTrim This parameter is used along with threshAdjust to trim off the influence exerted by high read coverage outliers. By default, it removes the top 0.01 percent of the highest read coverage positions from the calculation of the global threshold (e.x. if there are 200 positions that make up 75% of the total reads, then this parameter will take the top 2 read coverage positions and remove them from the calculation of the global threshold). This parameter can be tuned to account for datasets with outliers that would otherwise severely skew the global threshold.
#' @return Returns a dataframe with all genomic positions that were identified as having significant read coverage.
#' @noRd
#'
TopStrand_InitialPoisson <- function(MinusStrandReads,slidingWindowSize = 25,
                                     slidingWindowMovementDistance = 25,threshAdjust = 0.75,user_pValue = 0.0005,
                                     highOutlierTrim= 0.01){
    MSR <- as.data.frame(MinusStrandReads)
    SWMD <- slidingWindowMovementDistance
    SWS <- slidingWindowSize
    outputFrame <- as.data.frame(matrix(nrow = 0, ncol = 7))
    colnames(outputFrame)[1:5] <- colnames(MSR)
    colnames(outputFrame)[6] <- "pValue"
    colnames(outputFrame)[7] <- "adjpValue"
    SWF <- as.data.frame(matrix(nrow = MSR[nrow(MSR),"stop"], ncol = 2))
    colnames(SWF) <- c("position","coverage")
    SWF$coverage <- 0
    SWF$position <- seq_along(SWF[,1])
    posThreshold <- thresCalc(rf = MSR, threshAdjust = threshAdjust,
                              highOutlierTrim = highOutlierTrim)
    toUser <- paste("Top Strand Cutoff", posThreshold, sep = " ")
    message(as.character(toUser))
    nm <- "coverage"
    SWF$coverage <- MSR$coverage[match(SWF$position, MSR$stop)]
    SWF$coverage[is.na(SWF$coverage)] <- 0
    z <- 1+ SWS
    breakCond <- 0
    numericBreakCond <- 0
    pvalPos <- 1
    while(breakCond == 0){
        sumOfWindowCoverage <- sum(SWF$coverage[(z-SWS):(z+SWS)])
        AOW <- sumOfWindowCoverage/((2*SWS)+1)
        for(y in (z-SWS):(z+SWS)){
            if(SWF$coverage[y] >= posThreshold){
                probabilityAtY <- (1- ppois(q = SWF$coverage[y], lambda = AOW))
                if(probabilityAtY <= user_pValue){
                    outputFrame[pvalPos,1:5] <- MSR[MSR$stop == SWF$position[y],]
                    outputFrame$pValue[pvalPos] <- probabilityAtY
                    pvalPos <- pvalPos + 1
                }
            }
        }
        if(numericBreakCond == 1){
            breakCond <- 1
        }
        if(numericBreakCond == 0 & (z+SWMD) >= (nrow(SWF)-SWS)){
            z <- (nrow(SWF)-SWS)
            numericBreakCond <- 1
        }
        if(numericBreakCond == 0 & (z+SWMD) < (nrow(SWF)-SWS)){
            z <- (z + SWMD)
        }
    }
    outputFrame$adjpValue <- p.adjust(outputFrame$pValue, method = "BH", n = length(outputFrame$pValue))
    outputFrame <- outputFrame[outputFrame$adjpValue <= user_pValue,]
    outputFrame<- outputFrame[with(outputFrame, ave(adjpValue, stop, FUN=min)==adjpValue),]
    outputFrame <- outputFrame[!duplicated(outputFrame),, drop=FALSE]
    outputFrame <- outputFrame[order(outputFrame$stop),,drop=FALSE]
    rownames(outputFrame) <- seq_along(outputFrame[,1])
    return(outputFrame)
}

#' TopStrand_InitialCondense
#' Takes the significant top strand positions from TopStrand_InitialPoisson and condenses all proximal positions into termination "peaks"
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @param TopInititalPoisson Uses the output of TopStrand_InitialPoisson as the input
#' @param adjacentPeakDistance adjacentPeakDistance During the peak condensing step, this parameter is used to define “adjacent” for significant genomic positions. This is used to identify initial peak structures in the data. By default this value is set to 2 to ensure that single instances of loss of signal are not sufficient to prevent otherwise contiguous peak signatures from being combined.
#' @return Returns a dataframe that contains the list of termination "peaks" for the top strand
#' @noRd
#'
TopStrand_InitialCondense <- function(TopInititalPoisson,
    adjacentPeakDistance = 2){
    outputFrame <- TopInititalPoisson
    TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(outputFrame)))
    colnames(TPH) <- colnames(outputFrame)
    OMF <- as.data.frame(matrix(nrow = 1, ncol = 6))
    colnames(OMF) <- c("chrom","strand","HighestPeak",
    "HighestPeakReadCoverage","LowestPeakCoord",
    "HighestPeakCoord")
    OMF <- topConsecutiveCheck(OF = outputFrame, OMF = OMF,
        aPD = adjacentPeakDistance, TPH = TPH)
    OMF_Top <- OMF[complete.cases(OMF),,drop=FALSE]
    rownames(OMF_Top) <- seq_along(OMF_Top[,1])
    OMF_Top$LowestPeakCoord <- as.numeric(OMF_Top$LowestPeakCoord)
    OMF_Top$HighestPeakCoord <- as.numeric(OMF_Top$HighestPeakCoord)
    OMF_Top$HighestPeakReadCoverage <-
        as.numeric(OMF_Top$HighestPeakReadCoverage)
    OMF_Top$HighestPeak <- as.numeric(OMF_Top$HighestPeak)
    return(OMF_Top)
}

#' TopStrand_SecondaryCondense
#' Takes the list of termination "peaks" from TopStrand_InitialCondense and condenses peaks that are proximal to each other
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @param TopInititalCondense Uses the output of TopStrand_InitialCondense as the input
#' @param peakCondensingDistance peakCondensingDistance Following the initial peak condensing step, this parameter is used to identify peak structures in the data that are close enough to be considered part of the same termination signal. In testing, we have not identified cases in which two distinct termination signals so proximal that the default parameters incorrectly combine the signals together.
#' @param OutputFileName A string that will be used to identify printed results files. When run with PIPETS_FullRun, this string will be input by the user in the beginning
#' @return The method writes a csv file in the project directory that contains the results for the Top Strand termination peaks
#' @noRd
#'
TopStrand_SecondaryCondense <- function(TopInititalCondense,
    peakCondensingDistance = 20,
    OutputFileName){
    OFN <- OutputFileName
    OMF_Top <- TopInititalCondense
    TWH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OMF_Top)))
    colnames(TWH) <- colnames(OMF_Top)
    SWR_Top <- as.data.frame(matrix(nrow = 1, ncol = 6))
    colnames(SWR_Top) <- c("chrom","strand","HighestPeak",
        "HighestPeakReadCoverage","LowestPeakCoord","HighestPeakCoord")
    SWR_Top <- consecutivePeakCheck(OMF_Top, SWR_Top, peakCondensingDistance, 
                                    TWH)
    SWR_Top <- SWR_Top[complete.cases(SWR_Top),, drop=FALSE]
    SWR_Top <- SWR_Top[,-2,drop=FALSE]
    write.csv(SWR_Top,
        file = paste(as.character(OFN),"TopStrandResults.csv", sep = "_"),
        row.names = FALSE)
}

#' CompStrand_InitialPoisson
#' Poisson Significant Peak Identification Test for the Complement Strand Data
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases p.adjust
#' @importFrom utils write.csv write.table read.table read.delim
#' @param PlusStrandReads Plus Strand Read DataFrame from the Bed_Split method. The Plus strand reads inform the Complement Strand termination signal
#' @param slidingWindowSize This parameter establishes the distance up and down stream of each position that a sliding window will be created around. The default value is 25, and this will result in a sliding window of total size 51 (25 upstream + position (1) + 25 downstream).
#' @param slidingWindowMovementDistance This parameter sets the distance that the sliding window will be moved. By default, it is set to move by half of the sliding window size in order to ensure that almost every position in the data is tested twice.
#' @param threshAdjust This parameter is used to establish a global cutoff threshold informed by the data. PIPETS sorts the genomic positions of each strand from highest to lowest, and starts with the highest read coverage position and subtracts that value from the total read coverage for that strand. By default, this continues until 75% of the total read coverage has been accounted for. Increasing the percentage (e.x. 0.9) will lower the strictness of the cutoff, thus increasing the total number of significant results.
#' @param user_pValue Choose the minimum pValue that the Poisson distribution test must pass in order to be considered significant
#' @param highOutlierTrim This parameter is used along with threshAdjust to trim off the influence exerted by high read coverage outliers. By default, it removes the top 0.01 percent of the highest read coverage positions from the calculation of the global threshold (e.x. if there are 200 positions that make up 75% of the total reads, then this parameter will take the top 2 read coverage positions and remove them from the calculation of the global threshold). This parameter can be tuned to account for datasets with outliers that would otherwise severely skew the global threshold.
#' @return Returns a dataframe with all genomic positions that were identified as having significant read coverage.
#' @noRd
#'
CompStrand_InitialPoisson <- function(PlusStrandReads,slidingWindowSize = 25,
                                      slidingWindowMovementDistance = 25,threshAdjust = 0.75,user_pValue = 0.0005,
                                      highOutlierTrim= 0.01){
    PSR <- as.data.frame(PlusStrandReads)
    SWMD <- slidingWindowMovementDistance
    SWS <- slidingWindowSize
    outputFrame <- as.data.frame(matrix(nrow = 0, ncol = 7))
    colnames(outputFrame)[1:5] <- colnames(PSR)
    colnames(outputFrame)[6] <- "pValue"
    colnames(outputFrame)[7] <- "adjpValue"
    SWF <- as.data.frame(matrix(nrow = PSR[nrow(PSR),"start"], ncol = 2))
    colnames(SWF) <- c("position","coverage")
    SWF$coverage <- 0
    SWF$position <- seq_along(SWF[,1])
    compThreshold <- thresCalc(rf = PSR, threshAdjust = threshAdjust,
                               highOutlierTrim = highOutlierTrim)
    toUser <- paste("Complement Strand Cutoff", compThreshold, sep = " ")
    message(as.character(toUser))
    nm <- "coverage"
    SWF$coverage <- PSR$coverage[match(SWF$position, PSR$start)]
    SWF$coverage[is.na(SWF$coverage)] <- 0
    z <- 1+ SWS
    breakCond <- 0
    numericBreakCond <- 0
    pvalPos <- 1
    while(breakCond == 0){
        sumOfWindowCoverage <- sum(SWF$coverage[(z-SWS):(z+SWS)])
        AOW <- sumOfWindowCoverage/((2*SWS)+1)
        for(y in (z-SWS):(z+SWS)){
            if(SWF$coverage[y] >= compThreshold){
                probabilityAtY <- (1- ppois(q = SWF$coverage[y], lambda = AOW))
                if(probabilityAtY <= user_pValue){
                    outputFrame[pvalPos,1:5] <- PSR[PSR$start == SWF$position[y],]
                    outputFrame$pValue[pvalPos] <- probabilityAtY
                    pvalPos <- pvalPos + 1
                }
            }
        }
        if(numericBreakCond == 1){
            breakCond <- 1
        }
        if(numericBreakCond == 0 & (z+SWMD) >= (nrow(SWF)-SWS)){
            z <- (nrow(SWF)-SWS)
            numericBreakCond <- 1
        }
        if(numericBreakCond == 0 & (z+SWMD) < (nrow(SWF)-SWS)){
            z <- (z + SWMD)
        }
    }
    outputFrame$adjpValue <- p.adjust(outputFrame$pValue, method = "BH", n = length(outputFrame$pValue))
    outputFrame <- outputFrame[outputFrame$adjpValue <= user_pValue,]
    outputFrame <- outputFrame[with(outputFrame, ave(adjpValue, start, FUN=min)==adjpValue),]
    outputFrame <- outputFrame[!duplicated(outputFrame),, drop=FALSE]
    outputFrame <- outputFrame[order(outputFrame$start),,drop=FALSE]
    rownames(outputFrame) <- seq_along(outputFrame[,1])
    return(outputFrame)
}



#' CompStrand_InitialCondense
#' Takes the significant top strand positions from CompStrand_InitialPoisson and condenses all proximal positions into termination "peaks"
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @param CompInitialPoisson Uses the output of CompStrand_InitialPoisson as the input
#' @param adjacentPeakDistance adjacentPeakDistance During the peak condensing step, this parameter is used to define “adjacent” for significant genomic positions. This is used to identify initial peak structures in the data. By default this value is set to 2 to ensure that single instances of loss of signal are not sufficient to prevent otherwise contiguous peak signatures from being combined.
#' @return Returns a dataframe that contains the list of termination "peaks" for the complement strand
#' @noRd
#'
CompStrand_InitialCondense <- function(CompInitialPoisson,
    adjacentPeakDistance = 2){
    OF <- CompInitialPoisson
    TPH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OF)))
    colnames(TPH) <- colnames(OF)
    OMF <- as.data.frame(matrix(nrow = 1, ncol = 6))
    colnames(OMF) <- c("chrom","strand","HighestPeak",
        "HighestPeakReadCoverage",
        "LowestPeakCoord","HighestPeakCoord")
    OMF <- compConsecutiveCheck(OF = OF, OMF = OMF,
        aPD = adjacentPeakDistance, TPH = TPH)
    OMF_Comp <- OMF[complete.cases(OMF),,drop=FALSE]
    rownames(OMF_Comp) <- seq_along(OMF_Comp[,1])
    OMF_Comp$LowestPeakCoord <- as.numeric(OMF_Comp$LowestPeakCoord)
    OMF_Comp$HighestPeakCoord <- as.numeric(OMF_Comp$HighestPeakCoord)
    OMF_Comp$HighestPeakReadCoverage <-
        as.numeric(OMF_Comp$HighestPeakReadCoverage)
    OMF_Comp$HighestPeak <- as.numeric(OMF_Comp$HighestPeak)
    return(OMF_Comp)
}

#' CompStrand_SecondaryCondense
#' Takes the list of termination "peaks" from CompStrand_InitialCondense and condenses peaks that are proximal to each other
#' @importFrom dplyr arrange distinct
#' @importFrom stats aggregate ppois complete.cases
#' @importFrom utils write.csv write.table read.table read.delim
#' @param CompInitialCondense Uses the output of CompStrand_InitialCondense as the input
#' @param peakCondensingDistance peakCondensingDistance Following the initial peak condensing step, this parameter is used to identify peak structures in the data that are close enough to be considered part of the same termination signal. In testing, we have not identified cases in which two distinct termination signals so proximal that the default parameters incorrectly combine the signals together.
#' @param OutputFileName A string that will be used to identify printed results files. When run with PIPETS_FullRun, this string will be input by the user in the beginning
#' @return The method writes a csv file in the project directory that contains the results for the Complement Strand termination peaks
#' @noRd
#'
CompStrand_SecondaryCondense <- function(CompInitialCondense,
    peakCondensingDistance = 20,
    OutputFileName){
    OFN <- OutputFileName
    OMF_Comp <- CompInitialCondense
    TWH <- as.data.frame(matrix(nrow = 0, ncol = ncol(OMF_Comp)))
    colnames(TWH) <- colnames(OMF_Comp)
    SWR_Comp <- as.data.frame(matrix(nrow = 1, ncol = 6))
    colnames(SWR_Comp) <- c("chrom","strand","HighestPeak",
        "HighestPeakReadCoverage","LowestPeakCoord","HighestPeakCoord")
    SWR_Comp <- consecutivePeakCheck(OMF_Comp, SWR_Comp, peakCondensingDistance,
                                     TWH)
    SWR_Comp <- SWR_Comp[complete.cases(SWR_Comp),,drop=FALSE]
    SWR_Comp <- SWR_Comp[,-2,drop=FALSE]
    write.csv(SWR_Comp, file = paste(as.character(OFN),
        "CompStrandResults.csv", sep = "_"),  row.names = FALSE)
}


#' PIPETS_FullRun
#' @title Analyze 3'-seq Data with PIPETS
#' Poisson Identification of PEaks from Term-Seq data. This is the full run method that begins with input Bed file and returns the strand split results
#' @importFrom dplyr arrange distinct %>% group_by transmute
#' @importFrom stats aggregate ppois complete.cases p.adjust
#' @importFrom utils write.csv write.table read.table read.delim
#' @importFrom GenomicRanges ranges end start makeGRangesFromDataFrame
#' @param inputData Either input Bed file or GRanges object. Either must have at least chromosome, start, stop, and strand information
#' @param OutputFileID User defined header for the output files of PIPETS. Will be the prefix for output bed and csv files.
#' @param OutputFileDir User defined output file directory where all files generated by PIPETS will be placed
#' @param readScoreMinimum The user must input the minimum read score from the input bed files that is used to determine good quality reads. All values equal to and greater than the input are considered. In many modern sequencing runs, a score of 60 is used.
#' @param slidingWindowSize This parameter establishes the distance up and down stream of each position that a sliding window will be created around. The default value is 25, and this will result in a sliding window of total size 51 (25 upstream + position (1) + 25 downstream).
#' @param slidingWindowMovementDistance This parameter sets the distance that the sliding window will be moved. By default, it is set to move by half of the sliding window size in order to ensure that almost every position in the data is tested twice.
#' @param adjacentPeakDistance During the peak condensing step, this parameter is used to define “adjacent” for significant genomic positions. This is used to identify initial peak structures in the data. By default this value is set to 2 to ensure that single instances of loss of signal are not sufficient to prevent otherwise contiguous peak signatures from being combined.
#' @param peakCondensingDistance Following the initial peak condensing step, this parameter is used to identify peak structures in the data that are close enough to be considered part of the same termination signal. In testing, we have not identified cases in which two distinct termination signals so proximal that the default parameters incorrectly combine the signals together.
#' @param threshAdjust This parameter is used to establish a global cutoff threshold informed by the data. PIPETS sorts the genomic positions of each strand from highest to lowest, and starts with the highest read coverage position and subtracts that value from the total read coverage for that strand. By default, this continues until 75% of the total read coverage has been accounted for. Increasing the percentage (e.x. 0.9) will lower the strictness of the cutoff, thus increasing the total number of significant results.
#' @param threshAdjust_TopStrand Top strand specific threshAdjust value. If the user would like to run strand specific analysis, they should set threshAdjust to NA.
#' @param threshAdjust_CompStrand Comp strand specific threshAdjust value. If the user would like to run strand specific analysis, they should set threshAdjust to NA.
#' @param user_pValue Choose the minimum pValue that the Poisson distribution test must pass in order to be considered significant
#' @param highOutlierTrim This parameter is used along with threshAdjust to trim off the influence exerted by high read coverage outliers. By default, it removes the top 0.01 percent of the highest read coverage positions from the calculation of the global threshold (e.x. if there are 200 positions that make up 75% of the total reads, then this parameter will take the top 2 read coverage positions and remove them from the calculation of the global threshold). This parameter can be tuned to account for datasets with outliers that would otherwise severely skew the global threshold.
#' @param highOutlierTrim_TopStrand Top strand specific highOutlierTrim value. If the user would like to run strand specific analysis, they should set highOutlierTrim to NA.
#' @param highOutlierTrim_CompStrand Comp strand specific highOutlierTrim value. If the user would like to run strand specific analysis, they should set highOutlierTrim to NA.
#' @param inputDataFormat PIPETS currently supports "bedFile" (default) and "GRanges" as input formats
#' @examples
#' ## When run, the user will be prompted to provide a string for file names
#' ## During the run, PIPETS will output the minumum read coverage cutoff for each strand
#' ## After completion, the output files will be created in the R project directory
#'
#' ## For run with defualt strictness of analysis
#' PIPETS_FullRun(inputData = "PIPETS_TestData.bed", readScoreMinimum = 42, 
#' OutputFileDir = "~/Desktop/", OutputFileID = "Antibiotic1")
#'
#' ## For a more strict run (can be run for files with high total read depth)
#' PIPETS_FullRun(inputData = "PIPETS_TestData.bed", readScoreMinimum = 42, threshAdjust = 0.6, 
#' OutputFileDir = "~/Desktop/", OutputFileID = "Antibiotic1_Strict")
#'
#' ## For a less strict run (for data with low total read depth)
#' PIPETS_FullRun(inputData = "PIPETS_TestData.bed", readScoreMinimum = 42, threshAdjust = 0.9, 
#' OutputFileDir = "~/Desktop/", OutputFileID = "Antibiotic1_Lax")
#'
#' @return PIPETS outputs strand specific results files as well as strand specific bed files to the directory that the R project is in.
#' @export

PIPETS_FullRun <- function(inputData,readScoreMinimum,OutputFileID,
    OutputFileDir,slidingWindowSize = 25,
    slidingWindowMovementDistance = 25,threshAdjust = 0.75, 
    threshAdjust_TopStrand = NA, threshAdjust_CompStrand = NA,
    user_pValue = 0.0005,highOutlierTrim= 0.01,
    highOutlierTrim_TopStrand = NA, highOutlierTrim_CompStrand = NA,
    adjacentPeakDistance = 2, peakCondensingDistance = 20,
    inputDataFormat = "bedFile"){
    kicker <- inputCheck(inputData,readScoreMinimum,OutputFileID,
                         OutputFileDir,slidingWindowSize, 
                         slidingWindowMovementDistance,threshAdjust,
                         threshAdjust_TopStrand,threshAdjust_CompStrand,
                         user_pValue,highOutlierTrim,highOutlierTrim_TopStrand,
                         highOutlierTrim_CompStrand,adjacentPeakDistance,
                         peakCondensingDistance,inputDataFormat = inputDataFormat)
    if(kicker ==1){
        return()
    }
    OutputFileID <- paste(OutputFileDir, OutputFileID, sep = "")
    if(inputDataFormat %in% "bedFile"){
        AllReads <- Bed_Split(inputData, readScoreMinimum, OutputFileID)
    } else if (inputDataFormat %in% "GRanges"){
        AllReads <- GRanges_Split(inputData,readScoreMinimum, OutputFileID)
    }
    if(!is.na(threshAdjust) & is.numeric(threshAdjust)){
        topInputTA <- threshAdjust
        compInputTA <- threshAdjust
    }
    if(is.na(threshAdjust)){
        topInputTA <- threshAdjust_TopStrand
        compInputTA <- threshAdjust_CompStrand
        message("Running PIPETS with strand specific threshAdjust values")
    }
    if(!is.na(highOutlierTrim) & is.numeric(highOutlierTrim)){
        topInputHOT <- highOutlierTrim
        compInputHOT <- highOutlierTrim
    }
    if(is.na(highOutlierTrim)){
        topInputHOT <- highOutlierTrim_TopStrand
        compInputHOT <- highOutlierTrim_CompStrand
        message("Running PIPETS with strand specific highOutlierTrim values")
    }
    message("+-----------------------------------+")
    message("Performing Top Strand Analysis")
    TopInititalPoisson <- TopStrand_InitialPoisson(
        MinusStrandReads = AllReads[[3]],slidingWindowSize = slidingWindowSize,
        slidingWindowMovementDistance = slidingWindowMovementDistance,
        threshAdjust = topInputTA, user_pValue = user_pValue,
        highOutlierTrim= topInputHOT)
    TopInititalCondense <- TopStrand_InitialCondense(
        TopInititalPoisson = TopInititalPoisson,
        adjacentPeakDistance = adjacentPeakDistance)
    TopStrand_SecondaryCondense(TopInititalCondense = TopInititalCondense,
        peakCondensingDistance = peakCondensingDistance,
        OutputFileName = AllReads[[1]])
    message("+-----------------------------------+")
    message("Performing Complement Strand Analysis")
    CompInitialPoisson <- CompStrand_InitialPoisson(
        PlusStrandReads = AllReads[[2]],slidingWindowSize = slidingWindowSize,
        slidingWindowMovementDistance = slidingWindowMovementDistance,
        threshAdjust = compInputTA, user_pValue = user_pValue,
        highOutlierTrim= compInputHOT)
    CompInitialCondense <- CompStrand_InitialCondense(
        CompInitialPoisson = CompInitialPoisson,
        adjacentPeakDistance = adjacentPeakDistance)
    CompStrand_SecondaryCondense(CompInitialCondense = CompInitialCondense,
        peakCondensingDistance = peakCondensingDistance,
        OutputFileName = AllReads[[1]])
    if(inputDataFormat %in% "GRanges"){
        return(list(AllReads[[4]], AllReads[[5]]))
    }
}


