% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculateP2G.R
\name{calculateP2G}
\alias{calculateP2G}
\title{Establish peak to gene links based on correlations between ATAC-seq peaks and RNA-seq genes}
\usage{
calculateP2G(
  peakMatrix = NULL,
  expMatrix = NULL,
  reducedDim = NULL,
  cutoff_stat = c("p_val", "FDR", "Correlation"),
  cutoff_sig = 0.05,
  cor_cutoff = 0.5,
  cellNum = 100,
  maxDist = 250000,
  exp_assay = "logcounts",
  peak_assay = "counts",
  gene_symbol = "name",
  clusters = NULL,
  cor_method = c("pearson", "spearman", "kendall"),
  assignment_method = c("correlation", "nearest"),
  frac_RNA = 0,
  frac_ATAC = 0,
  nRandConns = 1e+05,
  batch_size = 20000,
  BPPARAM = BiocParallel::SerialParam(progressbar = TRUE),
  verbose = TRUE
)
}
\arguments{
\item{peakMatrix}{A SingleCellExperiment object containing counts of chromatin accessibility at each peak region or genomic bin from scATAC-seq.
\code{rowRanges} should contain genomic positions of the peaks in the form of \code{GRanges}.}

\item{expMatrix}{A SingleCellExperiment object containing gene expression counts from scRNA-seq. \code{rowRanges} should contain genomic positions of
the genes in the form of \code{GRanges}. \code{rowData} should contain a column of gene symbols with column name matching the \code{gene_symbol} argument.}

\item{reducedDim}{A matrix of dimension reduced values}

\item{cutoff_stat}{A names of a statistic used to determine significant links to assign peak to gene links.
Should be \code{Correlation}, \code{p_val} or \code{FDR}.}

\item{cutoff_sig}{A numeric scalar to specify the p-value or FDR cutoff for the links between ATAC-seq peaks and RNA-seq genes .
Default is set to 0.05.}

\item{cor_cutoff}{A numeric scalar to specify the correlation cutoff between ATAC-seq peaks and RNA-seq genes to assign peak to gene links.
Default correlation cutoff is 0.5. Takes effect only of \code{cutoff_stat} is set to \code{Correlation}.}

\item{cellNum}{A numeric to specify the average number of cells per K-mean cluster. Alternatively, an object of the class \code{CellNumSol}
returned by \code{optimizeMetacellNumber} function. If set to \code{NULL}, its value is determined automatically, based on the number of cells.}

\item{maxDist}{An integer to specify the base pair extension from transcription start start for overlap with peak regions}

\item{exp_assay}{String indicating the name of the assay in expMatrix for gene expression}

\item{peak_assay}{String indicating the name of the assay in peakMatrix for chromatin accessibility}

\item{gene_symbol}{String indicating the column name in the rowData of expMatrix that corresponds to gene symbol}

\item{clusters}{A vector corresponding to the cluster labels for calculation of correlations within each cluster. If left NULL, correlation is calculated across
all clusters. See details for the use of clusters}

\item{cor_method}{String indicating which correlation coefficient is to be computed. One of 'pearson' (default), 'kendall', or 'spearman'.}

\item{assignment_method}{String indicating the method used to assign target genes to regulatory elements. 'Correlation' is based on correlation between ATAC and RNA
above a correlation threshold set by cor_cutoff. 'Nearest' assigns the closest expressed gene to regulatory element meeting a correlation threshold set by cor_cutoff.
Set cor_cutoff to 0 if wishing to assign the closest expressed gene without any correlation cutoff}

\item{frac_RNA}{An integer to indicate the fraction of cells expressing a gene. It is used to filter the gene expression matrix for expressed genes}

\item{frac_ATAC}{An integer to indication the fraction of cells showing chromatin accessibility. It is used to filter the peak Matrix for open regions}

\item{nRandConns}{An integer specifying the number of false connections between regulatory elements and target genes which
will be used to calculate empirical p-values of correlation coefficients}

\item{batch_size}{An integer specifying how many peak–gene pairs are
processed per batch during parallel correlation calculations.}

\item{BPPARAM}{A BiocParallelParam object specifying whether summation should be parallelized. Use BiocParallel::SerialParam() for
serial evaluation and use BiocParallel::MulticoreParam() for parallel evaluation}

\item{verbose}{A boolean indicating whether messages should be emitted during computation}
}
\value{
A DataFrame of Peak to Gene correlation
}
\description{
Establish peak to gene links based on correlations between ATAC-seq peaks and RNA-seq genes
}
\examples{
# create a mock singleCellExperiment object for gene expression matrix
set.seed(1000)
gene_sce <- scuttle::mockSCE()
gene_sce <- scuttle::logNormCounts(gene_sce)
gene_gr <- GenomicRanges::GRanges(seqnames = Rle(c('chr1', 'chr2', 'chr3','chr4'), nrow(gene_sce)/4),
                   ranges = IRanges(start = seq(from = 1, length.out=nrow(gene_sce), by = 1000),
                   width = 100))
rownames(gene_sce) <- rownames(gene_sce)
gene_gr$name <- rownames(gene_sce)
rowRanges(gene_sce) <- gene_gr

# create a mock singleCellExperiment object for peak matrix
peak_gr <- GenomicRanges::GRanges(seqnames = 'chr1',
                   ranges = IRanges(start = seq(from = 1, to = 10000, by = 1000), width = 100))
peak_counts <- matrix(sample(x = 0:4, size = ncol(gene_sce)*length(peak_gr), replace = TRUE),
                      nrow = length(peak_gr), ncol=ncol(gene_sce))
peak_sce <- SingleCellExperiment(list(counts = peak_counts), colData = colData(gene_sce))
rowRanges(peak_sce) <- peak_gr
rownames(peak_sce) <- paste0('peak',1:10)
# create a mock reducedDim matrix
reducedDim_mat <- matrix(runif(ncol(gene_sce)*50, min = 0, max = 1), nrow = ncol(gene_sce), 50)
p2g <- calculateP2G(peakMatrix = peak_sce, expMatrix = gene_sce, reducedDim = reducedDim_mat,
                    cellNum = 20)
}
\author{
Xiaosai Yao, Shang-yang Chen
}
