% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/map_rangetype.R
\name{map_rangetype}
\alias{map_rangetype}
\title{Interval classification.}
\usage{
map_rangetype(
  map,
  type = "percent",
  ss = NULL,
  min_loop_width = 4,
  intervals = list(start = 1:5, mid = 45:55, end = 95:100),
  N_include = FALSE
)
}
\arguments{
\item{map}{PAC_map (generated by \code{\link{PAC_mapper}}) or a Reanno object
(generated by \code{\link{map_reanno}}), containing mapping coordinates
against a reference fasta file.}

\item{type}{Character indicating what type of intervals that is provided.

  If type="nucleotides", then the interval list is given as ranges of
  nucleotide positions. For example, if interval=list(start=1:3, end=1:3) the
  function will classify sequences starting within the first three
  nucleotides of the reference as 'type_start_nuc' and sequences ending in
  within the last three nucleotides of the reference as 'type_end_nuc'.

  If type="percent", then intervals needs to be provided as percent ranges.
  For example, if intervals=list(start=1:5, mid=45:50, end=95:100) then the
  function will classify sequences starting within the 5% first nucleotides
  in the references as 'type_start_per', and sequences ending within the 5%
  last nucleotides of the references as "type_end_per". It will also,
  classify sequences starting within 45-50% of the references as
  "type_mid_start_per" and sequences ending within 45-50% of the references
  as 'type_mid_end_per'.

  If type="ss", then intervals is obtained from an ss file, obtained for
  example from tRNAscan-SE (\url{http://lowelab.ucsc.edu/tRNAscan-SE/}) or at
  GtRNAdb \url{http://gtrnadb.ucsc.edu/}.

  Importantly - the intervals list is name sensitive. If type="nuclotides",
  intervals can only contain two intervals named 'start' and 'end', while if
  type="percent" then intervals needs to contain three intervals named
  'start', 'mid' and 'end'.

  Hint, for classifying 5' and 3' half tsRNA you need to run the function
  twice. First, classify each sequence as 5'-start or 3'-end tsRNA using
  type="nucleotides", and then rerun the the map object using type="percent"
  specifying the 'mid' region as the half interval.}

\item{ss}{File path to ss file (character), readLines vector of ss file
(character) or ss list. If character, the function will attempt to read a
file from the path given in the character string. If this fails, the
function assumes that the ss file has already been read using
\code{readLines}, and will attempt to split that character vector into a
list of unique sequences by splitting at the empty lines. Empty line
normally delimits each sequence entry in the ss file. Such a list can also
be parsed directly to the function, making it easy to change for example
sequence names using \code{\link{lapply}} prior to running the function.}

\item{min_loop_width}{Integer setting the minimum number of nucleotides for a
loop. Only applicable when type="ss". Loops in ss-files are defined by ">"
followed by x number of "." ending with "<". For example:\cr
\code{ATCGGTGGTTCAGTGGTAGAATGCTCGCCTCGCGGGCGGCCCGGGTTCGATTCCCGGCCGATG}\cr
\code{>>>>>..>>>>.......<<<<.>>>>>...<<<<<....>>>>>.......<<<<<<<<<<<}\cr
Here are three possible loops: "AGTGGTA", "CTC", "TTCGATT". If
min_loop_width=3, the middle loop (">...<"="CTC") will be classified as a
loop. If min_loop_width=4 (default), the middle loop will not be classified
as a loop because it is too short.}

\item{intervals}{A named list with integer intervals.}

\item{N_include}{Logical whether or not N "wild card" nucleotides should be
counted in the terminals. This conveniently controls the N_up and N_down
arguments in the \code{\link{PAC_mapper}} function. If N_include=FALSE
(default), start and end of tRNA will be measured from the first and last
canonical nucleotides (A, T, C, G). Thus, if fragments align to an
NNN-terminal, it will receive a negitve value. If N_include=TRUE, N
wild-cards will be treated as any other nucleotide.}
}
\value{
Map list object containing reference sequence (Ref_seq) as
  Biostrings::DNAStringSet and the new classifications embedded with the
  alignments (Alignments) in a dataframe.
}
\description{
\code{map_rangetype} Classifies sequences based on interval mapping against a
reference.
}
\details{
Given a PAC_map object (\code{\link{PAC_mapper}}) and an interval list this
function will attempt to classify mapped sequences based on where these
sequences starts and ends in reference. This function can for example be used
for 5' and 3' tRNA classification.
}
\examples{

###########################################################
### test the map_rangetype function
# More complicated examples can be found in the vignette.
##----------------------------------------

# First create an annotation blank PAC with group means
load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata", 
                 package = "seqpac", mustWork = TRUE))
anno(pac) <- anno(pac)[,1, drop=FALSE]
pac_trna <- PAC_summary(pac, norm = "cpm", type = "means", 
                        pheno_target=list("stage"), merge_pac = TRUE)

# Then re-annotate only tRNA using the PAC_mapper function
ref <- system.file("extdata/trna", "tRNA.fa", 
                         package = "seqpac", mustWork = TRUE)
map_object <- PAC_mapper(pac_trna, input=ref, N_up = "NNN", N_down = "NNN", 
                         mismatches=0, threads=2, report_string=TRUE, 
                         override=TRUE)

## Coverage plot of tRNA using PAC_covplot

# Single tRNA targeting a summary table 
PAC_covplot(pac_trna, map=map_object, 
                      summary_target= list("cpmMeans_stage"),
                      map_target="tRNA-Ala-AGC-1-1")
            
## Classify range types with map_rangetype (see vignette for examples
# on how to use ss-files for detailed tRNA loop structure).

# Classify fragments using percent intervals
map_object <- map_rangetype(map_object, 
                intervals = list(start = 1:5, mid = 45:55, end = 95:100))
       
names(map_object)
map_object[[1]]

}
\seealso{
\url{https://github.com/OestLab/seqpac} for updates on the current
  package.

Other PAC analysis: 
\code{\link{PAC_covplot}()},
\code{\link{PAC_deseq}()},
\code{\link{PAC_filter}()},
\code{\link{PAC_filtsep}()},
\code{\link{PAC_gtf}()},
\code{\link{PAC_jitter}()},
\code{\link{PAC_mapper}()},
\code{\link{PAC_nbias}()},
\code{\link{PAC_norm}()},
\code{\link{PAC_pca}()},
\code{\link{PAC_pie}()},
\code{\link{PAC_saturation}()},
\code{\link{PAC_sizedist}()},
\code{\link{PAC_stackbar}()},
\code{\link{PAC_summary}()},
\code{\link{PAC_trna}()},
\code{\link{as.PAC}()},
\code{\link{filtsep_bin}()},
\code{\link{tRNA_class}()}
}
\concept{PAC analysis}
