% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster_analysis.R
\name{cluster_analysis}
\alias{cluster_analysis}
\title{Function to perform Kmeans or Hierarchical clustering analysis of the
selected gene probe expression data.}
\usage{
cluster_analysis(sel.exp, cluster_type = "HClust", seed = NULL,
  distance = "euclidean", linkage_type = "ward.D2",
  gene_distance = "correlation", num_clusters, data_name,
  probe_rank = "SD_Rank", probe_num_selection = "Fixed_Probe_Num",
  cluster_num_selection = "Fixed_Clust_Num")
}
\arguments{
\item{sel.exp}{Object containing the numeric selected gene expression
matrix. This object is an output of the probe_ranking function.}

\item{cluster_type}{String indicating the type of clustering method to use.
"Kmeans" or "HClust" are the two options. The default is set to "HClust".}

\item{seed}{A positive integer vector >1 indicating a random starting position
for the centers of the clusters to be used for the k-means clustering
algorithm. The default value is set to NULL and is not used when
Hierarchical clustering is chosen.}

\item{distance}{String describing the distance metric to use for
the dist function during hierarchical clustering. dist uses a default
distance metric of Euclidean distance. Options include one of "euclidean",
"maximum", manhattan", "canberra", "binary", or "minkowski". Kmeans
clustering does not use a distance metric. The default value is set
to "euclidean".}

\item{linkage_type}{String describing the linkage metric to be
used for HClust. The default is set to "ward.D2", however other options
include "average", "complete", "median", "centroid",
"single", and "mcquitty".}

\item{gene_distance}{String describing the distance measure to be used for
the Dist function when performing hierarchical clustering of genes.
Options include one of "euclidean", "maximum", "manhattan", "canberra",
"binary", "pearson", "abspearson", "correlation", "abscorrelation",
"spearman" or "kendall". The default of gene_distance is set
to "correlation". The deafult value is set to "correlation".
The argument can be set to NULL when Kmeans clustering is used.}

\item{num_clusters}{Positive integer to specify the number of clusters
samples will be divided into. This number is determined by the
number_clusters function.}

\item{data_name}{String indicating the cancer type and name of the
dataset being analyzed. This name will be used to label the sample
dendrograms and heatmap files.}

\item{probe_rank}{String indicating the feature selection method used
in the probe_ranking function. Options include "CV_Rank", "CV_Guided",
"SD_Rank", and "Poly".}

\item{probe_num_selection}{String indicating the way in which probes
were selected in the number_probes function. Options include
"Fixed_Probe_Num", "Percent_Probe_Num", and "Adaptive_Probe_Num".}

\item{cluster_num_selection}{String indicating how the number of clusters
were determined in the number_clusters function. Options include
"Fixed_Clust_Num" and "Gap_Statistic".}
}
\value{
Returns a vector containing the sample information and respective
cluster number. In addition, this function outpus sample cluster
dendrogams, average expression for each probe in each cluster, and
heatmap images and Java TreeView files for HClust dendrograms.
}
\description{
Function to perform Kmeans or Hierarchical clustering analysis of the
selected gene probe expression data.
}
\examples{

# Example 1: HClust Analysis
# Load in a data file
data_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
    package="multiClust")
data <- input_file(input=data_file)
# Choose 300 genes to select for
gene_num <- number_probes(input=data_file, data.exp=data, Fixed=300,
    Percent=NULL, Adaptive=NULL)
# Choose the "CV_Rank" Method for gene ranking
sel.data <- probe_ranking(input=data_file, probe_number=300,
    probe_num_selection="Fixed_Probe_Num", data.exp=data, method="CV_Rank")
# Choose a fixed cluster number of 3
clust_num <- number_clusters(data.exp=data, Fixed=3, gap_statistic=NULL)

# Call function using HClust parameters
hclust_analysis <- cluster_analysis(sel.exp=sel.data, cluster_type="HClust",
    seed = NULL, distance="euclidean", linkage_type="ward.D2",
    gene_distance="correlation", num_clusters=3,
    data_name="GSE2034 Breast", probe_rank="CV_Rank",
    probe_num_selection="Fixed_Probe_Num",
    cluster_num_selection="Fixed_Clust_Num")

# Example 2: Kmeans Analysis
# Call function for Kmeans parameters
kmeans_analysis <- cluster_analysis(sel.exp=sel.data, cluster_type="Kmeans",
    seed = 1, distance=NULL, linkage_type=NULL, gene_distance=NULL,
    num_clusters=3, data_name="GSE2034 Breast",
    probe_rank="CV_Rank", probe_num_selection="Fixed_Probe_Num",
    cluster_num_selection="Fixed_Clust_Num")
}
\author{
Nathan Lawlor, Alec Fabbri
}
\seealso{
\code{\link{probe_ranking}}, \code{\link{number_clusters}},
\code{\link{number_probes}}, \code{\link[stats]{hclust}},
\code{\link[stats]{kmeans}},
\code{\link[stats]{dist}}
}

