% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/geneSet2sparseMatrix.R
\name{geneSet2sparseMatrix}
\alias{geneSet2sparseMatrix}
\title{geneSet2sparseMatrix}
\usage{
geneSet2sparseMatrix(term, geneset, value = NULL, sep = ",")
}
\arguments{
\item{term}{a vector of character values incidating the names of gene sets, 
e.g., pathway names and miRNA names.}

\item{geneset}{a vector of character values, where each value is a gene list 
separated by 'sep'.}

\item{value}{a vector of numeric values indicating the connectivity of 
between terms and genes. It could take either discrete values (0 and 1) or 
continuous values.}

\item{sep}{a character which separates the genes in the geneset.}
}
\value{
a sparse matrix where the column corresponds to the term and the row 
corresponds to the geneset.
}
\description{
geneSet2sparseMatrix transforms gene sets to a sparse matrix, which 
represents the connectivity between terms and genes.
}
\examples{

# download the gmt file
gmt <- readLines( paste0('http://amp.pharm.mssm.edu/CREEDS/download/',
'single_drug_perturbations-v1.0.gmt') ) 

# obtain the index of up-regulated and down-regulated gene sets
index_up <- grep('-up',gmt)
index_down <- grep('-dn',gmt)

# transform the gmt file into gene sets. The gene set is a data frame, 
# comprising three vectors: 
# term (here is drug), geneset (a gene symbol list separate by comma), 
# and value (1 and -1 separate by comma)
gff_up <- gmt2geneSet( gmt[index_up], termCol=c(1,2), singleValue = 1 )
gff_down <- gmt2geneSet( gmt[index_down], termCol=c(1,2), singleValue = -1 )

# combine up and down-regulated gene sets, and use 1 and -1 to indicate 
# their direction 
# extract the drug names
term_up<-vapply(gff_up$term, function(x) gsub('-up','',x), character(1))
term_down<-vapply(gff_down$term, function(x) gsub('-dn','',x), character(1))
all(term_up==term_down)

# combine the up-regulated and down-regulated gene names for each 
# drug perturbation
geneset <- vapply(1:nrow(gff_up),function(i) paste(gff_up$geneset[i],
gff_down$geneset[i],sep=','), character(1) )

# use 1 and -1 to indicate the direction of up and down-regulated genes
value <- vapply( 1:nrow(gff_up) , function(i) paste(gff_up$value[i],
gff_down$value[i],sep=',') , character(1) )


# transform the gene set into matrix, where the row represents the gene, 
# the column represents the drug perturbation, and each entry takes values 
# of 1 and -1
net1 <- geneSet2Net( term=term_up , geneset=geneset , value=value )
# transform the gene set into sparse matrix, where the row represents the 
# gene, the column represents the drug perturbation, and each entry takes 
# values of 1 and -1
net2 <- geneSet2sparseMatrix( term=term_up , geneset=geneset , value=value )
tail(net1[,1:30])
tail(net2[,1:30])
# the size of sparse matrix is much smaller than the matrix
format( object.size(net1), units = "auto")
format( object.size(net2), units = "auto")

}
\seealso{
\code{\link{gmt2geneSet}}; \code{\link{geneSet2Net}};
}
\author{
Shijia Zhu, \email{shijia.zhu@mssm.edu}
}
