% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{sigs_pcawg}
\alias{sigs_pcawg}
\alias{PCAWG_SP_SBS_sigs_Artif_df}
\alias{PCAWG_SP_SBS_sigInd_Artif_df}
\alias{PCAWG_SP_SBS_sigs_Real_df}
\alias{PCAWG_SP_SBS_sigInd_Real_df}
\alias{PCAWG_SP_ID_sigs_df}
\alias{PCAWG_SP_ID_sigInd_df}
\title{Data for PCAWG SNV signatures (COSMIC v3), including artifacts
 
\code{PCAWG_SP_SBS_sigs_Artif_df}: Data frame of the signatures published 
by Alexandrov et al. (Biorxiv 2013) which were decomposed with the 
method SigProfiler. SNV signatures are labeled with SBS, single base 
signature. There are 67 signatures which constitute the columns, 47 of 
which were validated by a bayesian NFM mehtod, SignatureAnayzer. Validated 
signatures are SBS1-SBS26,SBS28-SBS42 and SBS44. SBS7 is split up into 
7 a/b/c and d. SBS10 ans SBS17 are both split up into a and b. Resulting in
a 47 validated sigantures. Please note, unlike the paper by Alexandrov et al.
(Biorxiv 2018) the data sets do not contain a SBS84 and SBS85 as not all 
were availiablt to perfom supervised signature analysis. In total there are 
96 different features and therefore 96 rows when dealing with a trinucleotide
context.}
\source{
\code{PCAWG_SNV}: \url{https://www.synapse.org/#!Synapse:syn11738319}

\code{PCAWG_INDEL}: \url{https://cancer.sanger.ac.uk/cosmic/signatures/ID}
}
\usage{
data(sigs_pcawg)
}
\description{
\code{PCAWG_SP_SBS_sigInd_Artif_df}: Meta-information for 
\code{PCAWG_SP_SBS_sigs_Artif_df}

\code{PCAWG_SP_SBS_sigs_Real_df}: Data frame of only the validated
signatures published by Alexandrov et al. (Biorxiv 2018), corresponding 
to the column 1-26, 28-42 and 44 of the \code{PCAWG_SP_SBS_sigs_Artif_df}
data frame

\code{PCAWG_SP_SBS_sigInd_Real_df}: Meta-information for 
\code{PCAWG_SP_SBS_sigs_Real_df}

\code{PCAWG_SP_ID_sigs_df}: Data frame with Indel signatures published by
Alexandrov et al. (Biorxiv 2018) which were decomposed with the method
SigProfiler. There are 17 Sigantures reported but as supervised signatures
are only valid for whole genome sequencing data analysis. In whole genome
sequencing data the Indel signature ID15 was not discribed and thus is not
part of this data set. In total 83 features are described. The categorization
consideres the size of the insertion and delition, the motif, and the
sequence context. Hereby the number of repetition or patial repetition of the
motif is determined.

\code{PCAWG_SP_ID_sigInd_df}: Meta-information for 
\code{PCAWG_SP_ID_sigs_df}
}
\references{
Alexandrov et al. (Biorxiv 2018)
}
\author{
Lea Jopp-Saile \email{huebschmann.daniel@googlemail.com}
}
