% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{rfastp}
\alias{rfastp}
\title{R wrap of fastp}
\usage{
rfastp(
  read1,
  read2 = "",
  outputFastq,
  unpaired = "",
  failedOut = "",
  merge = FALSE,
  mergeOut = "",
  phred64 = FALSE,
  interleaved = FALSE,
  fixMGIid = FALSE,
  adapterTrimming = TRUE,
  adapterSequenceRead1 = "auto",
  adapterSequenceRead2 = "auto",
  adapterFasta = "",
  trimFrontRead1 = 0,
  trimTailRead1 = 0,
  trimFrontRead2 = 0,
  trimTailRead2 = 0,
  maxLengthRead1 = 0,
  maxLengthRead2 = 0,
  forceTrimPolyG = FALSE,
  disableTrimPolyG = FALSE,
  minLengthPolyG = 10,
  trimPolyX = FALSE,
  minLengthPolyX = 10,
  cutWindowSize = 4,
  cutLowQualTail = FALSE,
  cutSlideWindowRight = FALSE,
  cutLowQualFront = FALSE,
  cutMeanQual = 20,
  cutFrontWindowSize = 4,
  cutFrontMeanQual = 20,
  cutTailWindowSize = 4,
  cutTailMeanQual = 20,
  cutSlideWindowSize = 4,
  cutSlideWindowQual = 20,
  qualityFiltering = TRUE,
  qualityFilterPhred = 15,
  qualityFilterPercent = 40,
  maxNfilter = 5,
  averageQualFilter = 0,
  lengthFiltering = TRUE,
  minReadLength = 15,
  maxReadLength = 0,
  lowComplexityFiltering = FALSE,
  minComplexity = 30,
  index1Filter = "",
  index2Filter = "",
  maxIndexMismatch = 0,
  correctionOverlap = FALSE,
  minOverlapLength = 30,
  maxOverlapMismatch = 5,
  maxOverlapMismatchPercentage = 20,
  umi = FALSE,
  umiLoc = "",
  umiLength = 0,
  umiPrefix = "",
  umiSkipBaseLength = 0,
  umiNoConnection = FALSE,
  umiIgnoreSeqNameSpace = FALSE,
  overrepresentationAnalysis = FALSE,
  overrepresentationSampling = 20,
  splitOutput = 0,
  splitByLines = 0,
  thread = 2,
  verbose = TRUE
)
}
\arguments{
\item{read1}{read1 input file name(s). [vector]}

\item{read2}{read2 input file name(s). [vector]}

\item{outputFastq}{string of /path/prefix for output fastq [string]}

\item{unpaired}{for PE input, output file name for reads which the mate
reads failed to pass the QC [string], default NULL, discard it. [string]}

\item{failedOut}{file to store reads that cannot pass the filters default
NULL, discard it. [string]}

\item{merge}{for PE input, A logical(1) indicating whether merge each pair
of reads into a single read if they are overlaped, unmerged reads will
be write to `output` file. Default is FALSE. the `mergeOut` must be
set if TRUE.}

\item{mergeOut}{under `merge` mode, file to store the merged reads. [string]}

\item{phred64}{A logical indicating whether the input is using phred64
     scoring (it will be converted to phred33, so the output will still be
.      phred33)}

\item{interleaved}{A logical indicating whether <read1> is an interleaved
FASTQ which contains both read1 and read2. Default is FALSE.}

\item{fixMGIid}{the MGI FASTQ ID format is not compatible with many BAM
operation tools, enable this option to fix it. Default is FALSE}

\item{adapterTrimming}{A logical indicating whether run adapter trimming.
Default is `TRUE`}

\item{adapterSequenceRead1}{the adapter for read1. For SE data, if not
specified, the adapter will be auto-detected. For PE data, this is used
if R1/R2 are found not overlapped.}

\item{adapterSequenceRead2}{the adapter for read2 (PE data only). This is
used if R1/R2 are found not overlapped. If not specified, it will be the
same as <adapterSequenceRead1>}

\item{adapterFasta}{specify a FASTA file to trim both read1 and read2 (if
PE) by all the sequences in this FASTA file.}

\item{trimFrontRead1}{trimming how many bases in front for read1, default
is 0.}

\item{trimTailRead1}{trimming how many bases in tail for read1, default is 0'}

\item{trimFrontRead2}{trimming how many bases in front for read2. If it's not
specified, it will follow read1's settings}

\item{trimTailRead2}{trimming how many bases in tail for read2. If it's not
specified, it will follow read1's settings}

\item{maxLengthRead1}{if read1 is longer than maxLengthRead1, then trim read1
at its tail to make it as long as maxLengthRead1 Default 0 means no
limitation.}

\item{maxLengthRead2}{if read2 is longer than maxLengthRead2, then trim read2
at its tail to make it as long as maxLengthRead2. Default 0 means no
limitation. If it's not specified, it will follow read1's settings.}

\item{forceTrimPolyG}{A logical indicating force polyG tail trimming,
     trimming is only automatically enabled for Illumina NextSeq/NovaSeq
.     data.}

\item{disableTrimPolyG}{A logical indicating disable polyG tail trimming.}

\item{minLengthPolyG}{the minimum length to detect polyG in the read tail.
10 by default.}

\item{trimPolyX}{A logical indicating force polyX tail trimming.}

\item{minLengthPolyX}{the minimum length to detect polyX in the read tail.
10 by default.}

\item{cutWindowSize}{the window size option shared by cutLowQualFront,
cutLowQualTail, or cutSlideWindowRight. Range: 1~1000, default: 4}

\item{cutLowQualTail}{A logical indiccating move a sliding window from
tail (3') to front, drop the bases in the window if its mean quality
< threshold, stop otherwise. Default is `FALSE`}

\item{cutSlideWindowRight}{A logical indicating move a sliding window from
front to tail, if meet one window with mean quality < threshold, drop
the bases in the window and the right part, and then stop. Default is
`FALSE`}

\item{cutLowQualFront}{A logical indiccating move a sliding window from
front (5') to tail, drop the bases in the window if its mean quality
< threshold, stop otherwise. Default is `FALSE`}

\item{cutMeanQual}{the mean quality requirement option shared by
cutLowQualFront, cutLowQualTail or cutSlideWindowRight. Range: 1~36,
default: 20}

\item{cutFrontWindowSize}{the window size option of cutLowQualFront, default
to cutWindowSize if not specified. default: 4}

\item{cutFrontMeanQual}{the mean quality requirement option for
cutLowQualFront, default to cutMeanQual if not specified. default: 20}

\item{cutTailWindowSize}{the window size option of cutLowQualTail, default
to cutWindowSize if not specified. default: 4}

\item{cutTailMeanQual}{the mean quality requirement option for
cutLowQualTail, default to cutMeanQual if not specified. default: 20}

\item{cutSlideWindowSize}{the window size option of cutSlideWindowRight,
default to cutWindowSize if not specified. default: 4}

\item{cutSlideWindowQual}{the mean quality requirement option for
cutSlideWindowRight, default to cutMeanQual if not specified. default:
20}

\item{qualityFiltering}{A logical indicating run quality filtering.
Default is `TRUE`.}

\item{qualityFilterPhred}{the minimum quality value that a base is
qualified. Default 15 means phred quality >=Q15 is qualified.}

\item{qualityFilterPercent}{Maximum percents of bases are allowed to be
unqualified (0~100). Default 40 means 40\%}

\item{maxNfilter}{maximum number of N allowed in the sequence. read/pair is
discarded if failed to pass this filter. Default is 5}

\item{averageQualFilter}{if one read's average quality score <
`averageQualFilter`, then this read/pair is discarded. Default 0 means
 no requirement.}

\item{lengthFiltering}{A logical indicating whether run lenght filtering.
Default: TRUE}

\item{minReadLength}{reads shorter than minReadLength will be discarded,
default is 15.}

\item{maxReadLength}{reads longer than maxReadLength will be discarded,
default 0 means no limitation.}

\item{lowComplexityFiltering}{A logical indicating whethere run low
complexity filter. The complexity is defined as the percentage of base
that is different from its next base (base[i] != base[i+1]). Default is
`FALSE`}

\item{minComplexity}{the threshold for low complexity filter (0~100).
Default is 30, which means 30\% complexity is required. (int [=30])}

\item{index1Filter}{specify a file contains a list of barcodes of index1
to be filtered out, one barcode per line.}

\item{index2Filter}{specify a file contains a list of barcodes of index2
to be filtered out, one barcode per line.}

\item{maxIndexMismatch}{the allowed difference of index barcode for
index filtering, default 0 means completely identical.}

\item{correctionOverlap}{A logical indicating run base correction in
overlapped regions (only for PE data), default is `FALSE`}

\item{minOverlapLength}{the minimum length to detect overlapped region of
PE reads. This will affect overlap analysis based PE merge, adapter
trimming and correction. 30 by default.}

\item{maxOverlapMismatch}{the maximum number of mismatched bases to detect
overlapped region of PE reads. This will affect overlap analysis
based PE merge, adapter trimming and correction. 5 by default.}

\item{maxOverlapMismatchPercentage}{the maximum percentage of mismatched
bases to detect overlapped region of PE reads. This will affect
overlap analysis based PE merge, adapter trimming and correction.
Default 20 means 20\%}

\item{umi}{A logical indicating whethere preprocessing unique molecular
identifier (UMI). Default: `FALSE`}

\item{umiLoc}{specify the location of UMI, can be
(index1/index2/read1/read2/per_index/per_read)}

\item{umiLength}{length of UMI if the UMI is in read1/read2.}

\item{umiPrefix}{an string indication the following string is UMI
(i.e. prefix=UMI, UMI=AATTCG, final=UMIAATTCG). Only letters,
numbers, and '#" allowed. No prefix by default.}

\item{umiSkipBaseLength}{if the UMI is in read1/read2, skip
`umiSkipBaseLength` bases following UMI, default is 0.}

\item{umiNoConnection}{an logical indicating remove "_" between the UMI
prefix string and the UMI string. Default is FALSE.}

\item{umiIgnoreSeqNameSpace}{an logical indicating ignore the space
in the sequence name. Default is FALSE, the umi tag will be
inserted into the sequence name before the first SPACE.}

\item{overrepresentationAnalysis}{A logical indicating overrepresentation
analysis. Default is `FALSE`}

\item{overrepresentationSampling}{one in `overrepresentationSampling`
reads will be computed for overrepresentation analysis (1~10000),
smaller is slower, default is 20.}

\item{splitOutput}{number of files to be splitted (2~999). a sequential
number prefix will be added to output name. Default is 0 (no split)}

\item{splitByLines}{split output by limiting lines of each file(>=1000), a
sequential number prefix will be added to output name ( 0001.out.fq,
0002.out.fq...), default is 0 (disabled).}

\item{thread}{owrker thread number, default is 2}

\item{verbose}{output verbose log information}
}
\value{
returns a json object of the report.
}
\description{
Quality control (Cut adapter, low quality trimming, polyX trimming, 
UMI handling, and etc.) of fastq files.
}
\examples{

# preprare for the input and output files.
# if the output file exists, it will be OVERWRITEN.

se_read1 <- system.file("extdata","Fox3_Std_small.fq.gz",package="Rfastp")
pe_read1 <- system.file("extdata","reads1.fastq.gz",package="Rfastp")
pe_read2 <- system.file("extdata","reads2.fastq.gz",package="Rfastp")
outputPrefix <- tempfile(tmpdir = tempdir())


# a normal single-end file

se_json_report <- rfastp(read1 = se_read1,
    outputFastq=paste0(outputPrefix, "_se"), thread = 4)


# merge paired-end data by overlap:

pe_json_report <- rfastp(read1 = pe_read1, read2 = pe_read2, merge = TRUE,
    outputFastq = paste0(outputPrefix, '_unpaired'),
    mergeOut = paste0(outputPrefix, '_merged.fastq.gz'))


# a clipr example

clipr_json_report <- rfastp(read1 = se_read1,
   outputFastq = paste0(outputPrefix, '_clipr'),
   disableTrimPolyG = TRUE,
   cutLowQualFront = TRUE,
   cutFrontWindowSize = 29,
   cutFrontMeanQual = 20,
   cutLowQualTail = TRUE,
   cutTailWindowSize = 1,
   cutTailMeanQual = 5,
   minReadLength = 29,
   adapterSequenceRead1 = 'GTGTCAGTCACTTCCAGCGG'
)
}
\author{
Thomas Carroll, Wei Wang
}
