% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Rbec.R
\name{Rbec}
\alias{Rbec}
\title{Reference-based error correction of amplicon sequencing data}
\usage{
Rbec(fastq, reference, outdir, threads=1, sampling_size=5000, ascii=33, min_cont_obs_abd=200, min_cont_abd=0.03, min_E=0.05, min_P=1e-40, ref_seeker=1, cn=NULL)
}
\arguments{
\item{fastq}{the path of the fastq file containg merged amplicon sequencing reads (Ns are not allowed in the reads)}

\item{reference}{the path of the unique reference sequences, each sequence must be in one line (Ns are not allowed in the sequences)}

\item{outdir}{the output directory, which should be created by the user}

\item{threads}{the number of threads used, default 1}

\item{sampling_size}{the sampling size for calculating the error matrix, default 5000}

\item{ascii}{ascii characters used to encode phred scores (33 or 64), default 33}

\item{min_cont_obs_abd}{the minimum oberseved abundace of unique tags for detecting contamination sequences, default 200}

\item{min_cont_abd}{the relative abundance of unique tgas for detecting contamination sequences that can't be corrected by any of the references, default 0.03}

\item{min_E}{the minimum expectation of the Possion distribution for the identification of paralogues, default 0.05}

\item{min_P}{the minimum P value threshold of the Possion distribution to correct a read, default 1e-40}

\item{ref_seeker}{the method for finding the candidate error-producing reference sequence for a tag showing identical lowest K-mer distance to multiple references. 1 for the abundance-based method; 2 for the transition probability-based method, default 1.}

\item{cn}{the copy number table documenting the copy number of the marker gene in each strain. Rbec will normalize the strain abundance if the copy number is available}
}
\value{
lambda_final.out the lambda value and pvalue of the Poisson distribution for each read

error_matrix_final.out the error matrix in the final iteration

strain_table.txt the strain composition of the sample

strain_table_normalized.txt the copy-number-normalized strain composition of the sample if the copy number table is provided

contamination_seq.fna the potential sequences generated by contaminants

rbec.log percentage of corrected reads, which can be used to predict contaminated samples

paralogue_seq.fna paralogue sequences found in each strain except for the reference provided
}
\description{
This function corrects the amplicon sequencing data from synthetic communities where the reference sequences are known a priori
}
\details{
Ruben Garrido-Oter's group, Plant-Microbe interaction, Max Planck Institute for Plant Breeding Research
}
\examples{
fastq <- system.file("extdata", "test_raw_merged_reads.fastq.gz", package = "Rbec")

ref <- system.file("extdata", "test_ref.fasta", package = "Rbec")

Rbec(fastq=fastq, reference=ref, outdir=tempdir(), threads=1, sampling_size=500, ascii=33)

}
\author{
Pengfan Zhang
}
