% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metascope_id.R
\name{metascope_id}
\alias{metascope_id}
\title{Identify which genomes are represented in a processed sample}
\usage{
metascope_id(
  input_file,
  input_type = "csv.gz",
  aligner = "bowtie2",
  db = "ncbi",
  db_feature_table = NULL,
  accession_path = NULL,
  priors_df = NULL,
  tmp_dir = dirname(input_file),
  out_dir = dirname(input_file),
  convEM = 1/10000,
  maxitsEM = 25,
  update_bam = FALSE,
  num_species_plot = NULL,
  group_by_taxa = "species",
  quiet = TRUE
)
}
\arguments{
\item{input_file}{The .bam or .csv.gz file of sample reads to be identified.}

\item{input_type}{Extension of file input. Should be either "bam" or
"csv.gz". Default is "csv.gz".}

\item{aligner}{The aligner which was used to create the bam file. Default is
"bowtie2" but can also be set to "subread" or "other".}

\item{db}{Currently accepts one of \code{c("ncbi", "silva", "other")} Default
is \code{"ncbi"}, appropriate for samples aligned against indices compiled
from NCBI whole genome databases. Alternatively, usage of an alternate
database (like Greengenes2) should be specified with \code{"other"}.}

\item{db_feature_table}{If \code{db = "other"}, a data.frame must be supplied
with two columns, "Feature ID" matching the names of the alignment indices,
and a second \code{character} column supplying the taxon identifying
information.}

\item{accession_path}{(character) Filepath to NCBI accessions SQL database.
See \code{taxonomzr::prepareDatabase()}.}

\item{priors_df}{data.frame containing priors data. The data.frame consists
of two columns, 'species' containing species name, and 'prior_weights'
containing the prior weights (as a percent; integer).}

\item{tmp_dir}{Path to a directory to which bam and updated bam files can be
saved. Required.}

\item{out_dir}{The directory to which the .csv output file will be output.
Defaults to \code{dirname(input_file)}.}

\item{convEM}{The convergence parameter of the EM algorithm. Default set at
\code{1/10000}.}

\item{maxitsEM}{The maximum number of EM iterations, regardless of whether
the convEM is below the threshhold. Default set at \code{50}. If set at
\code{0}, the algorithm skips the EM step and summarizes the .bam file 'as
is'.}

\item{update_bam}{Whether to update BAM file with new read assignments.
Default is \code{FALSE}. If \code{TRUE}, requires \code{input_type = "bam"}
such that a BAM file is the input to the function.}

\item{num_species_plot}{The number of genome coverage plots to be saved.
Default is \code{NULL}, which saves coverage plots for the ten most highly
abundant species.}

\item{group_by_taxa}{Character. Taxonomy level at which accessions should be
grouped. Defaults to \code{"species"}}

\item{quiet}{Turns off most messages. Default is \code{TRUE}.}
}
\value{
This function exports a .csv file with annotated read counts to
  genomes with mapped reads to the location returned by the function.
  Depending on the parameters specified, can also output an updated BAM
  file, and fasta files for additional analysis downstream.
}
\description{
This function will read in a .bam or .csv.gz file, annotate the taxonomy and
genome names, reduce the mapping ambiguity using a mixture model, and output
a .csv file with the results. Currently, it assumes that the genome
library/.bam files use NCBI accession names for reference names (rnames in
.bam file).
}
\examples{
#### Align reads to reference library and then apply metascope_id()
## Assuming filtered bam files already exist

## Create temporary directory
file_temp <- tempfile()
dir.create(file_temp)

## Get temporary accessions database
tmp_accession <- system.file("extdata", "example_accessions.sql", package = "MetaScope")

#### Subread aligned bam file

## Create object with path to filtered subread csv.gz file
filt_file <- "subread_target.filtered.csv.gz"
bamPath <- system.file("extdata", filt_file, package = "MetaScope")
file.copy(bamPath, file_temp)

## Run metascope id with the aligner option set to subread
metascope_id(input_file = file.path(file_temp, filt_file),
             aligner = "subread", num_species_plot = 0,
             input_type = "csv.gz", accession_path = tmp_accession)

#### Bowtie 2 aligned .csv.gz file

## Create object with path to filtered bowtie2 bam file
bowtie_file <- "bowtie_target.filtered.csv.gz"
bamPath <- system.file("extdata", bowtie_file, package = "MetaScope")
file.copy(bamPath, file_temp)

## Run metascope id with the aligner option set to bowtie2
metascope_id(file.path(file_temp, bowtie_file), aligner = "bowtie2",
             num_species_plot = 0, input_type = "csv.gz",
             accession_path = tmp_accession)

## Remove temporary directory
unlink(file_temp, recursive = TRUE)

}
