#' Compute mutation densities at ECA and MRCA
#'
#' This function takes the normalized clonal mutation counts obtained with
#' \code{\link{normalizeCounts}}to estimate mutation densities at MRCA and an
#' earlier common ancestor, ECA.
#' @param normObj normalized clonal SNV counts stratified by copy number as
#' generated by \code{\link{normalizeCounts}}.
#' @param min.seg.size the minimal segment length to be included in the
#' quantification
#' @param fp.mean optional, the average false positive rate of clonal mutations
#' (e.g., due to incomplete tissue sampling). Defaults to 0.
#' @param fp.sd optional, the standard deviation of the false positive rate of
#' clonal mutations (e.g., due to incomplete tissue sampling). Defaults to 0.
#' @param excl.chr a vector of chromosomes that should be excluded from the
#' quantification. e.g., due to reporter constructs in animal models.
#' @return a data table reporting the assignment of individual segments to ECA
#' or MRCA. Mutation densities at ECA and MRCA, and the bootstrapped 95% CIs
#' are stored as attributes. The columns in the data table report the following
#' information:
#' \item{`chrom`}{Chromsoome}
#' \item{`TCN`}{Total copy number}
#' \item{`A`}{Number of A alleles}
#' \item{`B`}{Number of B alleles}
#' \item{`Seglength`}{Number of bps with the given copy number configuration on
#' this chromosome}
#' \item{`n_mut_A`}{Normalized number of mutations present on all A alleles}
#' \item{`n_mut_B`}{Normalized number of mutations present on all B alleles}
#' \item{`n_mut_total_clonal`}{Normalized number of mutations per single copy
#' of the segment}
#' \item{`density_A_mean`}{Normalized mean density of mutations present on all
#' A alleles (1/Mb)}
#' \item{`density_B_mean`}{Normalized mean density of mutations present on all
#' B alleles (1/Mb)}
#' \item{`density_total_mean`}{Normalized mean density of mutations per single
#' copy of the segment (1/Mb)}
#' \item{`density_total_lower`}{Lower bound (95% CI) of normalized density of
#' mutations per single copy of the segment (1/Mb)}
#' \item{`density_total_upper`}{Upper bound (95% CI) of normalized density of
#' mutations per single copy of the segment (1/Mb)}
#' \item{`density_A_lower`}{Lower bound (95% CI) of normalized density of
#' mutations on all A alleles of the segment (1/Mb)}
#' \item{`density_A_upper`}{Upper bound (95% CI) of normalized density of
#' mutations on all A alleles of the segment (1/Mb)}
#' \item{`density_B_lower`}{Lower bound (95% CI) of normalized density of
#' mutations on all B alleles of the segment (1/Mb)}
#' \item{`density_B_upper`}{Upper bound (95% CI) of normalized density of
#' mutations on all B alleles of the segment (1/Mb)}
#' \item{`p_total_to_mrca`}{Probability that the density of mutations per
#' single copy of the segment agrees with the mutation density at MRCA.}
#' \item{`p_A_to_to_mrca`}{Probability that the density of mutations on all
#' A alleles of the segment agrees with the mutation density at MRCA.}
#' \item{`p_B_to_to_mrca`}{Probability that the density of mutations on all
#' B alleles of the segment agrees with the mutation density at MRCA.}
#' \item{`p_adj_total_to_mrca`}{Probability that the density of mutations on
#' all alleles of the segment agrees with the mutation density at MRCA,
#' adjusted for multiple sampling (Holm correction).}
#' \item{`p_adj_A_to_mrca`}{Probability that the density of mutations on all
#' A alleles of the segment agrees with the mutation density at MRCA, adjusted
#' for multiple sampling (Holm correction).}
#' \item{`p_adj_B_to_mrca`}{Probability that the density of mutations on all
#' B alleles of the segment agrees with the mutation density at MRCA, adjusted
#' for multiple sampling (Holm correction).}
#' \item{`MRCA_qual`}{Quality control. `PASS`, if the density of mutations on
#' single copies agrees with the density at the MRCA.}
#' \item{`p_total_to_eca`}{Probability that the density of mutations per single
#' copy of the segment agrees with the mutation density at ECA.}
#' \item{`p_A_to_to_eca`}{Probability that the density of mutations on all
#' A alleles of the segment agrees with the mutation density at ECA.}
#' \item{`p_B_to_to_eca`}{Probability that the density of mutations on all
#' B alleles of the segment agrees with the mutation density at ECA.}
#' \item{`p_adj_total_to_eca`}{Probability that the density of mutations on
#' all alleles of the segment agrees with the mutation density at ECA, adjusted
#' for multiple sampling (Holm correction).}
#' \item{`p_adj_A_to_eca`}{Probability that the density of mutations on all
#' A alleles of the segment agrees with the mutation density at ECA, adjusted
#' for multiple sampling (Holm correction).}
#' \item{`p_adj_B_to_eca`}{Probability that the density of mutations on all
#' B alleles of the segment agrees with the mutation density at ECA, adjusted
#' for multiple sampling (Holm correction).}
#' \item{`A_time`}{Time of A allele gain (can be "ECA", "MRCA", "ECA/MRCA" if
#' assignment is unclear, or "not mapped to ECA or MRCA" if density does not
#' agree with either ECA or MRCA).}
#' \item{`B_time`}{Time of B allele gain (can be "ECA", "MRCA", "ECA/MRCA" if
#' assignment is unclear, or "not mapped to ECA or MRCA" if density does not
#' agree with either ECA or MRCA).}
#' @examples
#' snvs <- system.file("extdata", "NBE15",
#'     "snvs_NBE15_somatic_snvs_conf_8_to_10.vcf",
#'     package = "LACHESIS"
#' )
#' s_data <- readVCF(vcf = snvs, vcf.source = "dkfz")
#' aceseq_cn <- system.file("extdata", "NBE15",
#'     "NBE15_comb_pro_extra2.51_1.txt",
#'     package = "LACHESIS"
#' )
#' c_data <- readCNV(aceseq_cn)
#' nb <- nbImport(cnv = c_data, snv = s_data, purity = 1, ploidy = 2.51)
#' cl_muts <- clonalMutationCounter(nb)
#' norm_muts <- normalizeCounts(cl_muts)
#' mrca <- MRCA(norm_muts)
#' @import data.table
#' @importFrom stats p.adjust pnbinom qchisq quantile rnorm
#' @export

MRCA <- function(normObj = NULL, min.seg.size = 10^7, fp.mean = 0, fp.sd = 0,
                 excl.chr = NULL) {
    chrom <- TCN <- A <- B <- Seglength <- n_mut_A <- n_mut_B <-
        n_mut_total_clonal <- density_total_mean <- density_A_mean <-
        density_B_mean <- density_total_lower <- density_total_upper <-
        density_A_lower <- density_A_upper <- density_B_lower <-
        density_B_upper <- p_total_to_mrca <- p_A_to_mrca <- p_B_to_mrca <-
        p_adj_total_to_mrca <- p_adj_A_to_mrca <- p_adj_B_to_mrca <- MRCA_qual <-
        p_A_to_eca <- p_B_to_eca <- p_adj_A_to_eca <- p_adj_B_to_eca <- A_time <-
        B_time <- NULL

    if (is.null(normObj)) {
        stop("Please provide a normObj, as generated by normalizeCounts")
    }

    tmp1 <- sum(normObj$Seglength <= min.seg.size)
    tmp2 <- sum(normObj$chrom %in% excl.chr)
    message(
        "Filtering ", tmp1, " segments below the minimal segment size and ",
        tmp2, " segments on excluded chromosomes."
    )
    rm(tmp1, tmp2)
    workObj <- normObj[Seglength > min.seg.size & !chrom %in% excl.chr, ]

    message("Computing mutation density at MRCA...")
    # mean mutation density (SNVs/Mb)
    tot.muts.mrca <- workObj[, sum(n_mut_total_clonal)]
    tot.seglength.mrca <- workObj[, sum(Seglength)]

    mutation.time.mrca <- tot.muts.mrca / tot.seglength.mrca * (1 - fp.mean) *
        10^6

    # bootstrap upper and lower limits of the mutation time, while randomly
    # subtracting false positive mutations
    bootstrapped.mrca.time <- vapply(seq_len(1000), function(x) {
        res <- sample(
            x = seq_len(nrow(workObj)), size = nrow(workObj),
            prob = workObj[, Seglength], replace = TRUE
        )
        res <- workObj[res, sum(n_mut_total_clonal) / sum(Seglength)] * 10^6
        res <- res - res * rnorm(n = 1, mean = fp.mean, sd = fp.sd)
    }, numeric(1))
    mutation.time.mrca.lower <- quantile(bootstrapped.mrca.time, 0.025)
    mutation.time.mrca.upper <- quantile(bootstrapped.mrca.time, 0.975)

    # test with a negative binomial distribution whether the individual
    # fragments agree with a joint time point (hence accounting for
    # overdispersion of the local mutation rate):
    workObj$p_total_to_mrca <- apply(workObj[, c(
        "n_mut_total_clonal",
        "Seglength"
    )], 1, function(x) {
        lower.tail <- ifelse(x[1] < .true_round(tot.muts.mrca * x[2] /
            tot.seglength.mrca), TRUE, FALSE)
        test <- pnbinom(
            q = x[1], size = .true_round(tot.muts.mrca),
            prob = .true_round(tot.muts.mrca) /
                (.true_round(tot.muts.mrca) * (1 + x[2] /
                    tot.seglength.mrca)),
            lower.tail = lower.tail
        )
    })

    # test whether mutation densities on A allele agree with the density at MRCA
    workObj$p_A_to_mrca <- apply(
        workObj[, c("A", "n_mut_A", "Seglength")],
        1, function(x) {
            if (x[1] <= 1) {
                return(NA)
            } # no gain
            lower.tail <- ifelse(x[2] < .true_round(tot.muts.mrca *
                (1 - fp.mean) * x[3] /
                tot.seglength.mrca),
            TRUE, FALSE
            )
            test <- pnbinom(
                q = x[2], size = .true_round(tot.muts.mrca * (1 - fp.mean)),
                prob = .true_round(tot.muts.mrca * (1 - fp.mean)) /
                    (.true_round(tot.muts.mrca * (1 - fp.mean)) *
                        (1 + x[3] / tot.seglength.mrca)),
                lower.tail = lower.tail
            )
        }
    )

    # test whether mutation densities on B allele agree with the density at MRCA
    workObj$p_B_to_mrca <- apply(
        workObj[, c("A", "B", "n_mut_B", "Seglength")],
        1, function(x) {
            if (x[2] <= 1 | x[1] == x[2]) {
                return(NA)
            } # no gain or A = B
            lower.tail <- ifelse(x[3] < .true_round(tot.muts.mrca *
                (1 - fp.mean) * x[4] /
                tot.seglength.mrca),
            TRUE, FALSE
            )
            test <- pnbinom(
                q = x[3], size = .true_round(tot.muts.mrca * (1 - fp.mean)),
                prob = .true_round(tot.muts.mrca * (1 - fp.mean)) /
                    (.true_round(tot.muts.mrca * (1 - fp.mean)) *
                        (1 + x[4] / tot.seglength.mrca)),
                lower.tail = lower.tail
            )
        }
    )

    # adjust p values and cut off at 0.01. Gains with p < 0.01 likely occurred
    # at a different time point
    adj.p <- p.adjust(unlist(workObj[, c(
        "p_total_to_mrca", "p_A_to_mrca",
        "p_B_to_mrca"
    )]))
    workObj[, "p_adj_total_to_mrca"] <- adj.p[seq_len(nrow(workObj))]
    adj.p <- adj.p[-(seq_len(nrow(workObj)))]
    workObj[, "p_adj_A_to_mrca"] <- adj.p[seq_len(nrow(workObj))]
    adj.p <- adj.p[-(seq_len(nrow(workObj)))]
    workObj[, "p_adj_B_to_mrca"] <- adj.p[seq_len(nrow(workObj))]
    rm(adj.p)

    # fragments where total mutation count does not agree with mutation density
    # at MRCA (quality control):
    if (mutation.time.mrca == 0) {
        workObj$MRCA_qual <- "FAIL"
    } else {
        workObj$MRCA_qual <- workObj[, lapply(.SD, function(x) {
            ifelse(x < 0.01, "FAIL", "PASS")
        }),
        .SDcols = "p_adj_total_to_mrca"
        ]
    }

    if (any(workObj$MRCA_qual == "FAIL")) {
        fs <- sum(workObj$MRCA_qual == "FAIL")
        warning(fs, " segments did not conform to the mutation density at MRCA.")
        rm(fs)
    }

    message("Computing mutation densities at ECA...")
    # quantify mutation densities at an earlier common ancestor (ECA)

    # mean mutation density (SNVs/Mb); exclude fragments with counts higher
    # than at MRCA, as they are likely outliers (gains must have occurred latest
    # in the MRCA)
    mut.counts.eca <- c(
        workObj[A > 1 & density_A_mean <= mutation.time.mrca &
            p_adj_A_to_mrca < 0.01, n_mut_A],
        workObj[B > 1 & A != B & density_B_mean <= mutation.time.mrca &
            p_adj_B_to_mrca < 0.01, n_mut_B]
    )
    seg.length.eca <- c(
        workObj[A > 1 & density_A_mean <= mutation.time.mrca &
            p_adj_A_to_mrca < 0.01, Seglength],
        workObj[B > 1 & A != B & density_B_mean <= mutation.time.mrca &
            p_adj_B_to_mrca < 0.01, Seglength]
    )

    mutation.time.eca <- sum(mut.counts.eca) / sum(seg.length.eca) * 10^6

    if (!is.na(mutation.time.eca)) {
        # bootstrap upper and lower limits of the mutation time
        bootstrapped.eca.time <- vapply(seq_len(1000), function(x) {
            res <- sample(
                x = seq_len(length(mut.counts.eca)),
                size = length(mut.counts.eca),
                prob = seg.length.eca, replace = TRUE
            )
            res <- sum(mut.counts.eca[res]) / sum(seg.length.eca[res]) * 10^6
        }, numeric(1))
        mutation.time.eca.lower <- quantile(bootstrapped.eca.time, 0.025)
        mutation.time.eca.upper <- quantile(bootstrapped.eca.time, 0.975)
    } else {
        mutation.time.eca.lower <- NA
        mutation.time.eca.upper <- NA
    }


    # test whether mutation densities on A allele agree with the density at ECA
    workObj$p_A_to_eca <- apply(workObj[, c(
        "A", "n_mut_A", "Seglength",
        "p_adj_A_to_mrca"
    )], 1, function(x) {
        if (x[1] <= 1 | is.na(x[4]) | x[4] >= 0.01) {
            return(NA)
        } # no gain or already mapped to MRCA
        if (x[2] == 0 & sum(mut.counts.eca) == 0 & length(mut.counts.eca) > 0) {
            return(1)
        }
        if (length(mut.counts.eca) == 1) {
            x[2] <- round(x[2])
        }
        lower.tail <- ifelse(x[2] < round(sum(mut.counts.eca) * x[3] /
            sum(seg.length.eca)), TRUE, FALSE)
        test <- pnbinom(
            q = x[2], size = round(sum(mut.counts.eca)),
            prob = round(sum(mut.counts.eca)) /
                (round(sum(mut.counts.eca)) * (1 + x[3] /
                    sum(seg.length.eca))),
            lower.tail = lower.tail
        )
        return(test)
    })

    # test whether mutation densities on B allele agree with the density at ECA
    workObj$p_B_to_eca <- apply(
        workObj[, c(
            "A", "B", "n_mut_B", "Seglength",
            "p_adj_B_to_mrca"
        )], 1,
        function(x) {
            if (x[2] <= 1 | x[1] == x[2] | is.na(x[4]) | x[4] >= 0.01) {
                return(NA)
            } # no gain or A = B or already mapped to MRCA
            if (x[2] == 0 & sum(mut.counts.eca) == 0 & length(mut.counts.eca) > 0) {
                return(1)
            }
            if (length(mut.counts.eca) == 1) {
                x[2] <- round(x[2])
            }
            lower.tail <- ifelse(x[2] < round(sum(mut.counts.eca) * x[3] /
                sum(seg.length.eca)), TRUE, FALSE)
            test <- pnbinom(
                q = x[2], size = round(sum(mut.counts.eca)),
                prob = round(sum(mut.counts.eca)) /
                    (round(sum(mut.counts.eca)) *
                        (1 + x[3] / sum(seg.length.eca))),
                lower.tail = lower.tail
            )
        }
    )

    # adjust p values and cut off at 0.01. Gains with p < 0.01 likely occurred
    # at a different time point
    adj.p <- p.adjust(unlist(workObj[, c("p_A_to_eca", "p_B_to_eca")]))
    workObj[, "p_adj_A_to_eca"] <- adj.p[seq_len(nrow(workObj))]
    adj.p <- adj.p[-(seq_len(nrow(workObj)))]
    workObj[, "p_adj_B_to_eca"] <- adj.p[seq_len(nrow(workObj))]
    rm(adj.p)


    # do any of the gains that agreed with the mutation density at MRCA also
    # agree with the density at ECA?
    # test whether mutation densities on A allele agree with the density at ECA
    workObj$p_A_to_eca <- apply(
        workObj[, c(
            "A", "n_mut_A", "Seglength",
            "p_adj_A_to_mrca", "p_A_to_eca"
        )], 1,
        function(x) {
            if (x[1] <= 1 | is.na(x[4]) | x[4] < 0.01) {
                return(x[5])
            } # no gain or already mapped to MRCA
            if (x[2] == 0 & sum(mut.counts.eca) == 0 &
                length(mut.counts.eca) > 0) {
                return(1)
            }
            if (length(mut.counts.eca) == 1) {
                x[2] <- round(x[2])
            }
            lower.tail <- ifelse(x[2] < round(sum(mut.counts.eca) * x[3] /
                sum(seg.length.eca)), TRUE, FALSE)
            test <- pnbinom(
                q = x[2], size = round(sum(mut.counts.eca)),
                prob = round(sum(mut.counts.eca)) /
                    (round(sum(mut.counts.eca)) * (1 + x[3] /
                        sum(seg.length.eca))),
                lower.tail = lower.tail
            )
        }
    )

    # test whether mutation densities on B allele agree with the density at ECA
    workObj$p_B_to_eca <- apply(
        workObj[, c(
            "A", "B", "n_mut_B", "Seglength",
            "p_adj_B_to_mrca", "p_B_to_eca"
        )],
        1, function(x) {
            if (x[2] <= 1 | x[1] == x[2] | is.na(x[4]) | x[4] < 0.01) {
                return(x[5])
            } # no gain or A = B or already mapped to MRCA
            if (x[2] == 0 & sum(mut.counts.eca) == 0 &
                length(mut.counts.eca) > 0) {
                return(1)
            }
            if (length(mut.counts.eca) == 1) {
                x[2] <- round(x[2])
            }
            lower.tail <- ifelse(x[2] < round(sum(mut.counts.eca) * x[3] /
                sum(seg.length.eca)), TRUE, FALSE)
            test <- pnbinom(
                q = x[2], size = round(sum(mut.counts.eca)),
                prob = round(sum(mut.counts.eca)) /
                    (round(sum(mut.counts.eca)) * (1 + x[3] /
                        sum(seg.length.eca))),
                lower.tail = lower.tail
            )
        }
    )

    # adjust p values and cut off at 0.01. Gains with p < 0.01 likely occurred
    # at a different time point
    if (nrow(workObj[p_adj_A_to_mrca >= 0.01 |
        p_adj_B_to_mrca >= 0.01, ]) > 0) {
        adj.p <- p.adjust(unlist(c(
            workObj[p_adj_A_to_mrca >= 0.01, p_A_to_eca],
            workObj[p_adj_B_to_mrca >= 0.01, p_B_to_eca]
        )))
        if (any(workObj$p_adj_A_to_mrca >= 0.01, na.rm = TRUE)) {
            workObj[p_adj_A_to_mrca >= 0.01, "p_adj_A_to_eca"] <-
                adj.p[seq_len(sum(workObj$p_adj_A_to_mrca >= 0.01,
                    na.rm = TRUE
                ))]
            adj.p <- adj.p[-(seq_len(sum(workObj$p_adj_A_to_mrca >= 0.01,
                na.rm = TRUE
            )))]
        }
        if (any(workObj$p_adj_B_to_mrca >= 0.01, na.rm = TRUE)) {
            workObj[p_adj_B_to_mrca >= 0.01, "p_adj_B_to_eca"] <- adj.p
        }
        rm(adj.p)
    }

    # classify gains as mapping to MRCA, ECA, both or none

    workObj$A_time <- apply(
        workObj[, c("A", "p_adj_A_to_mrca", "p_adj_A_to_eca")],
        1, function(x) {
            if (x[1] <= 1) {
                return(NA)
            }
            if (is.na(x[2]) & is.na(x[3])) {
                return("not mapped to ECA or MRCA")
            } else if (x[2] >= 0.01 & !is.na(x[3]) & x[3] >= 0.01) {
                return("ECA/MRCA")
            } else if (x[2] >= 0.01) {
                return("MRCA")
            } else if (!is.na(x[3]) & x[3] >= 0.01) {
                return("ECA")
            } else {
                return("not mapped to ECA or MRCA")
            }
        }
    )

    workObj$B_time <- apply(workObj[, c(
        "A", "A_time", "B", "p_adj_B_to_mrca",
        "p_adj_B_to_eca"
    )], 1, function(x) {
        if (x[3] <= 1) {
            return(NA)
        }
        if (x[1] == x[3]) {
            return(x[2])
        } else if (is.na(x[4]) & is.na(x[5])) {
            return("not mapped to ECA or MRCA")
        } else if (x[4] >= 0.01 & !is.na(x[5]) & x[5] >= 0.01) {
            return("ECA/MRCA")
        } else if (x[4] >= 0.01) {
            return("MRCA")
        } else if (!is.na(x[5]) & x[5] >= 0.01) {
            return("ECA")
        } else {
            return("not mapped to ECA or MRCA")
        }
    })

    attr(workObj, "MRCA_time_mean") <- mutation.time.mrca
    attr(workObj, "MRCA_time_lower") <- mutation.time.mrca.lower
    attr(workObj, "MRCA_time_upper") <- mutation.time.mrca.upper
    attr(workObj, "ECA_time_mean") <- mutation.time.eca
    attr(workObj, "ECA_time_lower") <- mutation.time.eca.lower
    attr(workObj, "ECA_time_upper") <- mutation.time.eca.upper

    return(workObj)
}


.true_round <- function(number, digits = 0) {
    posneg <- sign(number)
    number <- abs(number) * 10^digits
    number <- number + 0.5 + sqrt(.Machine$double.eps)
    number <- trunc(number)
    number <- number / 10^digits
    number * posneg
}
