% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_read_gmt.R
\name{read_gmt}
\alias{read_gmt}
\title{Read a \code{.gmt} file in as a \code{pathwayCollection} object}
\usage{
read_gmt(
  file,
  setType = c("pathways", "genes", "regions"),
  description = FALSE,
  nChars = 1e+07,
  delim = "\\t"
)
}
\arguments{
\item{file}{A path to a file or a connection. This file must be a \code{.gmt}
file, otherwise input will likely be nonsense. See the "Details" section
for more information.}

\item{setType}{What is the type of the set: pathway set of gene, gene sites
in RNA or DNA, or regions of CpGs. Defaults to \code{''pathway''}.}

\item{description}{Should the "description" field (the second field in the
\code{.gmt} file on each line) be included in the output? Defaults to
\code{FALSE}.}

\item{nChars}{The number of characters to read from a connection. The largest
\code{.gmt} file we have encountered is the full C5 pathway collection
from MSigDB (5917 pathways), which has roughly 5 million characters in
UTF-8 encoding. Therefore, we default this argument to be twice the size
of the largest pathway collection we have seen so far, 10,000,000.}

\item{delim}{The \code{.gmt} delimiter. As proper \code{.gmt} files are tab
delimited, this defaults to \code{"\\t"}.}
}
\value{
A \code{pathwayCollection} list of sets. This list has three
   elements:
\itemize{
  \item{'setType' : }{A named list of character vectors. Each vector
     contains the names of the individual genes, sites, or CpGs within that
     set as a vector of character strings. The name of this list entry is
     equal to the value specified in \code{setType}.}
  \item{\code{TERMS} : }{A character vector the same length as the 'setType'
     list with the proper names of the sets.}
  \item{\code{description} : }{ (OPTIONAL) A character vector the same length
     as the 'setType' list with a note on that set (for the \code{.gmt} file
     included with this package, this field contains hyperlinks to the
     MSigDB description card for that pathway). This field is included when
     \code{description = TRUE}.}
}
}
\description{
Read a set list file in Gene Matrix Transposed (\code{.gmt})
   format, with special performance consideration for large files. Present
   this object as a \code{pathwayCollection} object.
}
\details{
This function uses \code{R}'s \code{\link{readChar}} function to
   improve character input performance over \code{\link{readLines}} (and
   far improve input performance over \code{\link{scan}}).

   See the Broad Institute's "Data Formats" page for a description of the
   Gene Matrix Transposed file format:
   \url{https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GMT:_Gene_Matrix_Transposed_file_format_.28.2A.gmt.29}
}
\examples{
  # If you have installed the package:
  data_path <- system.file(
    "extdata", "c2.cp.v6.0.symbols.gmt",
    package = "pathwayPCA", mustWork = TRUE
  )
  geneset_ls <- read_gmt(data_path, description = TRUE)

  # # If you are using the development version from GitHub:
  # geneset_ls <- read_gmt(
  #   "inst/extdata/c2.cp.v6.0.symbols.gmt",
  #   description = TRUE
  # )

}
\seealso{
\code{\link{print.pathwayCollection}}; \code{\link{write_gmt}}
}
