#' Run the MetaScope workflow on a directory of FASTQ files
#'
#' This wrapper function streamlines the MetaScope pipeline by automatically
#' downloading references, building a Bowtie2 index, aligning reads, and running
#' \code{metascope_id()} for taxonomic identification. It supports both
#' single-end and paired-end sequencing data and is currently implemented only
#' for 16S rRNA sequencing.
#'
#' @param fastq_dir Path to the directory containing FASTQ (or FASTQ.GZ) files.
#'   For paired-end runs, files must follow the \code{_R1}/\code{_R2}
#'   naming convention.
#' @param sequencing_type Character string specifying the sequencing type.
#'   Currently only \code{"16S"} is supported.
#' @param ref_dir Path to the reference directory where the 16S RefSeq FASTA,
#'   Bowtie2 indices, and accession database are stored. Defaults to
#'   \code{"~/MetaScope/ref_dir"}.
#' @param paired_reads Logical. If \code{FALSE} (default), runs in single-end
#'   mode; if \code{TRUE}, assumes paired-end FASTQ files with matching
#'   \code{_R1} and \code{_R2} filenames.
#' @param tmp_dir Path to a directory for temporary alignment files (SAM/BAM and
#'   intermediate results). Defaults to \code{"~/MetaScope/tmp"}.
#' @param out_dir Path to the directory for final MetaScope output files.
#'   Defaults to \code{"~/MetaScope/out"}.
#' @param threads Integer. Number of CPU threads to use for Bowtie2 alignment
#'   and indexing. Default is \code{1}.
#' @param combine_results Boolean to merge all samples into one feature table
#' @param ... Additional arguments passed to \code{metascope_id()}.
#'
#' @details
#' The workflow proceeds through the following steps:
#' \enumerate{
#'   \item Ensures the NCBI 16S RefSeq FASTA file is downloaded
#'         (via \code{download_refseq_16S()}).
#'   \item Builds a Bowtie2 index of the 16S reference if it does not exist
#'         (via \code{mk_bowtie_index()}).
#'   \item Ensures that the NCBI accession-to-taxonomy database is present
#'         (via \code{download_accessions()}).
#'   \item Aligns all FASTQ files in \code{fastq_dir} against the reference
#'         (via \code{align_target_bowtie()}).
#'   \item Runs \code{metascope_id()} on each alignment BAM file and writes
#'         annotated taxonomic identification results to \code{out_dir}.
#' }
#'
#' @return A named list of output file paths generated by \code{metascope_id()},
#'   with sample names as list element names. Results are also written to the
#'   specified \code{out_dir}.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Run MetaScope on single-end FASTQ files
#' run_metascope(fastq_dir = "data/fastq", threads = 4)
#'
#' # Run MetaScope on paired-end FASTQ files
#' run_metascope(fastq_dir = "data/fastq", paired_reads = TRUE, threads = 8)
#' }

run_metascope <- function(
    fastq_dir,
    sequencing_type = "16S",
    ref_dir = "~/MetaScope/ref_dir",
    paired_reads = FALSE, 
    tmp_dir = "~/MetaScope/tmp", 
    out_dir = "~/MetaScope/out",
    threads = 1,
    combine_results = TRUE,
    ...) {
  
  # Parse ... arguments
  extra_args <- list(...)
  # Currently only works for 16S
  if (sequencing_type != "16S") {
    stop("Currently only 16S sequencing is supported.")
  } 
  
  # Create Directories if they don't already exist
  dir.create(ref_dir, showWarnings = FALSE, recursive = TRUE)
  dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE)
  dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
  
  # Run download_refseq_16S if it is not already downloaded (Currently assumes default refseq name) 
  ref_fna <- file.path(ref_dir, "refseq_16S.fna")
  if (!file.exists(ref_fna)) {
    message("16S Refseq Not Found, Dowloading from NCBI FTP")
    download_refseq_16S(out_dir = ref_dir, combined_name = "refseq_16S.fna")
  } 
  else {
    message("16S Refseq Found in: ", ref_fna)
  }
  
  # Run mk_bowtie_index if the bowtie index is not already made (Currently only uses bowtie)
  bt2_files <- file.path(ref_dir, c("16S_ribosomal_RNA.1.bt2",
                                    "16S_ribosomal_RNA.2.bt2",
                                    "16S_ribosomal_RNA.3.bt2",
                                    "16S_ribosomal_RNA.4.bt2",
                                    "16S_ribosomal_RNA.rev.1.bt2",
                                    "16S_ribosomal_RNA.rev.2.bt2"))
  
  if (!all(file.exists(bt2_files))) { 
    message("16S Ribosomal RNA bowtie Indices Not Found. Building Bowtie Indices:")
    extra_args_mk_bowtie_index <- extra_args[names(extra_args) %in% names(formals(mk_bowtie_index))]
    do.call(mk_bowtie_index, c(list(ref_dir = ref_dir, 
                                    lib_dir = ref_dir, 
                                    lib_name = "16S_ribosomal_RNA"), 
                               extra_args_mk_bowtie_index))
  } else {
    message("16S Ribosoaml Index Found in: ", ref_dir)
  }
  
  # Download Taxonomy Accessions
  if (!(file.exists(file.path(ref_dir, "accessionTaxa.sql")))) {
    message("Taxonomy Accessions Database Not Found. Downloading from NCBI FTP. This may take a while")
    download_accessions(ref_dir,
                        tmp_dir = file.path(ref_dir, "tmp"),
                        remove_tmp_dir = TRUE,
                        NCBI_accessions_database = TRUE,
                        NCBI_accessions_name = "accessionTaxa",
                        silva_taxonomy_database = FALSE,
                        silva_taxonomy_name = "all_silva_headers",
                        blast_16S_database = FALSE,
                        blast_16S_name = "16S_ribosomal_RNA")
  }
  else {
    message("Accessions Database found in: ", file.path(ref_dir, "accessionTaxa.sql"))
  }
  
  # Run align targets for all FASTQ files in input directory
  ## Collect FASTQ Files
  fq_files <- list.files(fastq_dir, full.names = TRUE)
  if (length(fq_files) == 0) stop("No FASTQ files found in: ", fastq_dir)
  
  ## Derive Sample Names from FASTQ file names
  sample_name <- function(fq) {
    gsub("\\.fastq(.gz)?$|\\.fq(.gz)?$", "", basename(fq))
  }
  
  ## Single End Reads
  if (!paired_reads) {
    message("Running single-end alignment and MetaScopeID...")
    results <- lapply(fq_files, function(fq) {
      tmp_out <- file.path(tmp_dir, paste0(sample_name(fq)))
      dir.create(tmp_out, showWarnings = FALSE) 
      extra_args_align_target_bowtie <- extra_args[names(extra_args) %in% names(formals(align_target_bowtie))]
      bam_out <- do.call(align_target_bowtie, c(list(read1 = fq, 
                                                     read2 = NULL, 
                                                     lib_dir = ref_dir, 
                                                     libs = "16S_ribosomal_RNA",
                                                     align_dir = tmp_out,
                                                     align_file = sample_name(fq), 
                                                     threads = threads), 
                                                 extra_args_align_target_bowtie))
      extra_args_metascope_id <- extra_args[names(extra_args) %in% names(formals(metascope_id))]
      id_out <- do.call(metascope_id, c(list(input_file = bam_out, 
                                             input_type = "bam",
                                             aligner = "bowtie2",
                                             db = "ncbi",
                                             accession_path = file.path(ref_dir, "accessionTaxa.sql"),
                                             tmp_dir = tmp_out,
                                             out_dir = out_dir), 
                                        extra_args_metascope_id))
    })
    
    names(results) <- basename(fq_files) |>
      sub("\\.fastq(.gz)?$|\\.fq(.gz)?$", "", x = _)
    
  } else {
    message("Running paired-end alignment and MetaScopeID...")
    # assumes R1/R2 naming convention
    fq_R1 <- fq_files[grepl("_R1", fq_files)]
    fq_R2 <- fq_files[grepl("_R2", fq_files)]
    if (length(fq_R1) != length(fq_R2)) stop("Mismatched R1/R2 files")
    
    results <- mapply(function(r1, r2) {
      sample_base_name <- sub("_R1.*", "", basename(r1))
      tmp_out <- file.path(tmp_dir, sample_base_name)
      dir.create(tmp_out, showWarnings = FALSE) 
      extra_args_align_target_bowtie <- extra_args[names(extra_args) %in% names(formals(align_target_bowtie))]
      bam_out <- do.call(align_target_bowtie, c(list(read1 = r1, 
                                                     read2 = r2, 
                                                     lib_dir = ref_dir, 
                                                     libs = "16S_ribosomal_RNA",
                                                     align_dir = tmp_out,
                                                     align_file = sample_base_name, 
                                                     threads = threads), 
                                                 extra_args_align_target_bowtie))
      
      extra_args_metascope_id <- extra_args[names(extra_args) %in% names(formals(metascope_id))]
      id_out <- do.call(metascope_id, c(list(input_file = bam_out, 
                                             input_type = "bam",
                                             aligner = "bowtie2",
                                             db = "ncbi",
                                             accession_path = file.path(ref_dir, "accessionTaxa.sql"),
                                             tmp_dir = tmp_out,
                                             out_dir = out_dir), 
                                        extra_args_metascope_id))
    }, fq_R1, fq_R2)
  names(results) <- sub("_R1.*", "", basename(fq_R1))
  }
  if (!combine_results) {
    return(results)
  }
  else {
    if (length(out_dir) == 0) stop("No MetaScope ID outputs found in: ", out_dir)
    # Read in all results
    all_res <- results |>
      purrr::map(function(path) {
        sample_name = sub(".metascope_id.csv", "", basename(path))
        utils::read.csv(path) |>
          dplyr::select(!!dplyr::sym("TaxonomyID"), !!dplyr::sym("Genome"), !!dplyr::sym("readsEM")) |>
          dplyr::rename_with(~ sample_name, .cols = !!dplyr::sym("readsEM"))
      })
    # Merge together and rename column
    merged_res <- purrr::reduce(all_res, dplyr::full_join, by = c("TaxonomyID", "Genome")) |>
      dplyr::mutate(
        dplyr::across(
          -dplyr::all_of(c("TaxonomyID", "Genome")),
          ~ tidyr::replace_na(.x, 0)
        )
      )
    
    utils::write.csv(merged_res, file = file.path(out_dir, "metascope_results.csv"))
    return(merged_res)
  }
}
