% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Identify_VCF_file.R
\name{identify_vcf_file}
\alias{identify_vcf_file}
\title{identify_VCF_file}
\usage{
identify_vcf_file( 
    vcf_file,
    output_file,
    ref_gen,
    minimum_matching_mutations,
    mutational_weight_inclusion_threshold,
    write_xls,
    output_bed_file,
    top_hits_per_library,
    manual_identifier,
    verbose,
    p_value,
    confidence_score,
    n_threads,
    write_results
)
}
\arguments{
\item{vcf_file}{Input vcf file. Only one sample column allowed.}

\item{output_file}{Path of the output file. If blank, 
autogenerated as name of input file plus '_uniquorn_ident.tab' suffix.}

\item{ref_gen}{Reference genome version. All training sets are 
associated with a reference genome version. Default: GRCH37}

\item{minimum_matching_mutations}{The minimum amount of mutations that 
has to match between query and training sample for a positive prediction}

\item{mutational_weight_inclusion_threshold}{Include only mutations 
with a weight of at least x. Range: 0.0 to 1.0. 1= unique to CL. 
~0 = found in many CL samples.}

\item{write_xls}{Create identification results additionally 
as xls file for easier reading}

\item{output_bed_file}{If BED files for IGV visualization should be 
created for the Cancer Cell lines that pass the threshold}

\item{top_hits_per_library}{Limit the number of significant similarities
per library to n (default 3) many hits. Is particularrly used in contexts
when heterogeneous query and reference CCLs are being compared.}

\item{manual_identifier}{Manually enter a vector of CL 
name(s) whose bed files should be created, independently from 
them passing the detection threshold}

\item{verbose}{Print additional information}

\item{p_value}{Required p-value for identification.
Note that if you set the confidence score, the confidence score
overrides the p-value}

\item{confidence_score}{Cutoff for positive prediction between 0 and 100.
Calculated by transforming the p-value by -1 * log(p-value)
Note that if you set the confidence score, the confidence score
overrides the p-value}

\item{n_threads}{Number of threads to be used}

\item{write_results}{Write identification results to file}
}
\value{
R table with a statistic of the identification result
}
\description{
Identifies a cancer cell lines contained in a vcf file based 
on the pattern (start & length) of all contained mutations/ variations.
}
\details{
\code{identify_vcf_file} parses the vcf file and predicts 
the identity of the sample
}
\examples{
HT29_vcf_file = system.file("extdata/HT29.vcf", package = "Uniquorn");

identification = identify_vcf_file(
    vcf_file = HT29_vcf_file, 
    verbose = FALSE,
    write_results = FALSE
)
}
