## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)


## ----seqArchR-install, echo=TRUE, eval=FALSE----------------------------------
# 
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("seqArchR")

## ----setup-two, echo=TRUE-----------------------------------------------------
# Load seqArchR
library(seqArchR)
library(Biostrings, quietly = TRUE)


# Set seed for reproducibility
set.seed(1234)


## ----load-example-data, echo=TRUE---------------------------------------------
# Creation of one-hot encoded data matrix from FASTA file
inputFname <- system.file("extdata", "example_data.fa.gz", 
                                  package = "seqArchR", 
                                  mustWork = TRUE)

# Specifying `dinuc` generates dinucleotide features
inputSeqsMat <- seqArchR::prepare_data_from_FASTA(fasta_fname = inputFname,
                                                  sinuc_or_dinuc = "dinuc")

inputSeqsRaw <- seqArchR::prepare_data_from_FASTA(fasta_fname = inputFname, 
                                               raw_seq = TRUE)

nSeqs <- length(inputSeqsRaw)
positions <- seq(1, Biostrings::width(inputSeqsRaw[1]))



## ----load-example-data-2, echo=TRUE, eval=TRUE--------------------------------
# Creation of one-hot encoded data matrix from a DNAStringSet object
inputSeqs_direct <- seqArchR::get_one_hot_encoded_seqs(seqs = inputSeqsRaw, 
                                                  sinuc_or_dinuc = "dinuc")

identical(inputSeqs_direct, inputSeqsMat)

## ----plot-seqs, echo=TRUE, fig.dim=c(4,6)-------------------------------------
# Visualize the sequences in a image matrix where the DNA bases are 
# assigned fixed colors

seqArchR::viz_seqs_acgt_mat(as.character(inputSeqsRaw), 
                          pos_lab = positions, save_fname = NULL)


## ----setup-seqArchR-config-call, echo=TRUE------------------------------------
# Set seqArchR configuration
seqArchRconfig <- seqArchR::set_config(
        parallelize = TRUE,
        n_cores = 2,
        n_runs = 100,
        k_min = 1,
        k_max = 20,
        mod_sel_type = "stability",
        bound = 10^-6,
        chunk_size = 100,
        result_aggl = "ward.D", 
        result_dist = "euclid",
        flags = list(debug = FALSE, time = TRUE, verbose = TRUE,
                     plot = FALSE)
)

## ----call-seqArchR, echo=TRUE, eval=FALSE-------------------------------------
# # Call/Run seqArchR
# seqArchRresult <- seqArchR::seqArchR(config = seqArchRconfig,
#                             seqs_ohe_mat = inputSeqsMat,
#                             seqs_raw = inputSeqsRaw,
#                             seqs_pos = positions,
#                             total_itr = 2,
#                             set_ocollation = c(TRUE, FALSE))
# 

## ----read-stored-result, echo=FALSE-------------------------------------------

seqArchRresult <- readRDS(system.file("extdata", "example_seqArchRresult.rds",
                            package = "seqArchR", mustWork = TRUE))


## ----seqArchR-result-clust-factors--------------------------------------------

# Basis vectors at iteration 2
seqArchR::get_clBasVec_k(seqArchRresult, iter=2)

i2_bv <- seqArchR::get_clBasVec_m(seqArchRresult, iter=2)
dim(i2_bv)
head(i2_bv)

## ----viz-BV-1, echo=TRUE, fig.height=5, fig.width=25--------------------------
seqArchR::viz_bas_vec(feat_mat = get_clBasVec_m(seqArchRresult, 1), 
                      ptype = c("heatmap", "seqlogo"), method = "bits", 
                      sinuc_or_dinuc = "dinuc")


## ----viz-BV-2, fig.height=5, fig.width=25, echo=TRUE, warning=FALSE-----------
seqArchR::viz_bas_vec(feat_mat = get_clBasVec_m(seqArchRresult, 2), 
                      ptype = c("heatmap", "seqlogo"), method = "bits", 
                      sinuc_or_dinuc = "dinuc")



## ----clust-itr1, fig.dim=c(4,6), fig.cap="Clusters at iteration 1"------------

seqArchR::viz_seqs_acgt_mat(seqs_str(seqArchRresult, iter = 1, ord = TRUE),
                                  pos_lab = positions)


## ----clust-itr2, fig.dim=c(4,6), fig.cap="Clusters at iteration 2"------------

seqArchR::viz_seqs_acgt_mat(seqs_str(seqArchRresult, iter = 2, ord = TRUE),
                                  pos_lab = positions)


## ----session_info, include=TRUE, echo=TRUE, results='markup'------------------
sessionInfo()

