#'
#' TF Binding Info
#'
#' Combined transcription factor ChIP-seq data from ChIP-Atlas and ENCODE
#'
#' This is a special data set that stores transcription factor binding sites for human and
#' mouse genomic builds, which can be used with the package epiregulon to compute regulons.
#'
#' @inheritParams prostateENZ
#' @param genome character string specifying the genomic build
#' @param source character string specifying the ChIP-seq data source
#' @param version numeric indicating data version number (see Details)
#' @param peak_number numeric indicating threshold to be applied number of peaks
#' per transcription factor in the combined version of GenomicRanges
#' (from all samples and tissues).
#'
#' @details
#' In version 2 chipatlas sample and tissue GenomicRanges were build using experiments
#' with at least 2E7 unique reads and 1E3 peaks. In sample specific encode
#' chip-seq data a quality metrics was number of peaks only.
#'
#' @return A list of TF binding sites as a \code{GrangesList} object.
#'
#' @format
#' \code{GRangesList} object containing binding site information
#' of transcription factor ChIP-seq.
#' Contains the following experiments:
#' \itemize{
#'   \item{\strong{hg38_atlas}: GRangesList object of length 1558}
#'   \item{\strong{hg19_atlas}: GRangesList object of length 1558}
#'   \item{\strong{mm10_atlas}: GRangesList object of length 768}
#'   \item{\strong{hg38_encode.sample}: List object of length 171}
#'   \item{\strong{hg19_encode.sample}: List object of length 171}
#'   \item{\strong{mm10_encode.sample}: List object of length 31}
#'   \item{\strong{hg38_atlas.sample}: List object of length 1112}
#'   \item{\strong{hg19_atlas.sample}: List object of length 1112}
#'   \item{\strong{mm10_atlas.sample}: List object of length 517}
#'   \item{\strong{hg38_atlas.tissue}: List object of length 22}
#'   \item{\strong{hg19_atlas.tissue}: List object of length 22}
#'   \item{\strong{mm10_atlas.tissue}: List object of length 23}
#' }
#'
#' @references
#' ChIP-Atlas 2021 update: a data-mining suite for exploring epigenomic landscapes by
#' fully integrating ChIP-seq, ATAC-seq and Bisulfite-seq data.
#' Zou Z, Ohta T, Miura F, Oki S.
#' \emph{Nucleic Acids Research. Oxford University Press (OUP);} 2022.
#' \href{http://dx.doi.org/10.1093/nar/gkac199}{doi:10.1093/nar/gkac199}
#'
#' ChIP‐Atlas: a data‐mining suite powered by full integration of public ChIP‐seq data.
#' Oki S, Ohta T, Shioi G, Hatanaka H, Ogasawara O, Okuda Y, Kawaji H, Nakaki R, Sese J, Meno C.
#' \emph{EMBO}; Vol. 19, EMBO reports. 2018.
#' \href{http://dx.doi.org/10.15252/embr.201846255}{doi:10.15252/embr.201846255}
#'
#' ENCODE: {https://www.encodeproject.org/}
#'
#' @section Data storage and access:
#' Each genomic build is a separate \code{GRangesList} object, stored in a separate RDS file.
#' All genomic builds can be accessed with the same function \code{tfBinding}.
#'
#' @section Data preparation:
#' ```{r child = system.file("scripts", "make-data-tfBinding.Rmd", package = "scMultiome")}
#' ```
#'
#' @examples
#' # check metada of dataset
#' tfBinding("mm10", metadata = TRUE)
#' # download data
#' \dontrun{
#' tfBinding("mm10", "atlas")
#' }
#'
#' @export
#'
tfBinding <- function(genome = c("hg38", "hg19", "mm10"),
                      source = c("atlas", "encode.sample", "atlas.sample","atlas.tissue"),
                      metadata = FALSE,
                      version=1,
                      peak_number = 1000) {
    checkmate::assertFlag(metadata)
    genome <- match.arg(genome, several.ok = FALSE)
    source <- match.arg(source, several.ok = FALSE)
    checkmate::assert_choice(version, c(1,2))
    key <- paste0(c(genome, source), collapse=".")
    if(version==1){
        message("Retrieving chip-seq data, version 1")
        to_file_dict <- c(hg38.atlas="tfBinding_hg38_atlas.rds",
                          hg19.atlas = "tfBinding_hg19_atlas.rds",
                          mm10.atlas = "tfBinding_mm10_atlas.rds",
                          hg38.atlas.sample="tfBinding_hg38_atlas.sample.rds",
                          hg19.atlas.sample = "tfBinding_hg19_atlas.sample.rds",
                          mm10.atlas.sample = "tfBinding_mm10_atlas.sample.rds",
                          hg38.encode.sample = "tfBinding_hg38_encode.sample.rds",
                          hg19.encode.sample = "tfBinding_hg19_encode.sample.rds",
                          mm10.encode.sample = "tfBinding_mm10_encode.sample.rds",
                          hg38.atlas.tissue = "tfBinding_hg38_atlas.tissue.rds",
                          hg19.atlas.tissue = "tfBinding_hg19_atlas.tissue.rds",
                          mm10.atlas.tissue = "tfBinding_mm10_atlas.tissue.rds")
    }
    else{
        message("Version 2 of the chip-seq data is being retrieved. For reproducibility with the scMultiome version < 1.7.1 please set version = 1.")
        to_file_dict <- c(hg38.atlas="tfBinding_hg38_atlas.rds",
                          hg19.atlas = "tfBinding_hg19_atlas.rds",
                          mm10.atlas = "tfBinding_mm10_atlas.rds",
                          hg38.atlas.sample="tfBinding_hg38_atlas.sample_v2.rds",
                          hg19.atlas.sample = "tfBinding_hg19_atlas.sample_v2.rds",
                          mm10.atlas.sample = "tfBinding_mm10_atlas.sample_v2.rds",
                          hg38.encode.sample = "tfBinding_hg38_encode.sample_v2.rds",
                          hg19.encode.sample = "tfBinding_hg19_encode.sample_v2.rds",
                          mm10.encode.sample = "tfBinding_mm10_encode.sample_v2.rds",
                          hg38.atlas.tissue = "tfBinding_hg38_atlas.tissue_v2.rds",
                          hg19.atlas.tissue = "tfBinding_hg19_atlas.tissue_v2.rds",
                          mm10.atlas.tissue = "tfBinding_mm10_atlas.tissue_v2.rds")

    }

    eh <- AnnotationHub::query(ExperimentHub::ExperimentHub(),
                               pattern = c("scMultiome", "tfBinding", to_file_dict[key]))

    if (source %in% c("atlas")) {
        eh_ID <- sort(eh$ah_id)[1]
    } else {
        eh_ID <- eh$ah_id
    }


    ans <-
        if (metadata) {
            eh[eh_ID]
        } else {
            readRDS(eh[[eh_ID]])
        }

    if(version==2 && !grepl("(sample|tissue)", to_file_dict[key])){
        ans <- ans[unlist(lapply(ans,length)) >= peak_number]
    }

    return(ans)
}
