#' @title Overlay Kaplan-Meier Plot Function
#'
#' @description
#' This function generates an overlayed Kaplan-Meier (KM) survival plot to visualize the survival curves
#' of training data stratified by predicted risk groups (e.g., High Risk vs Low Risk), along with the
#' survival curve of a selected test sample. It helps to visually assess how the test sample's predicted
#' survival aligns with the survival profiles of the training groups.
#'
#' @name km_overlay_plot_f
#'
#' @param Train_results A data frame containing the training samples' survival prediction results (obtained from RF risk prediction model).
#'   The following columns are required:
#'   - `Sample_ID`: Unique identifier for the sample.
#'   - `Actual`: Actual TCGA ID (often the same as `Sample_ID`).
#'   - `Predicted_Risk_Group`: Predicted class label (e.g., `High_Risk` / `Low_Risk`).
#'   - `High_Risk_Prob`: Probability of being classified as High Risk.
#'   - `Low_Risk_Prob`: Probability of being classified as Low Risk.
#'   - `Prediction_Prob`: Maximum class probability (confidence in the prediction).
#'   - `OS_month`: Overall survival time in months.
#'   - `OS_event`: Event indicator (`1` = death, `0` = censored).
#'   The data frame should have no duplicate `Sample_ID`s, and the `OS_month` column should be numeric while `OS_event` should be an integer.
#'
#' @param Test_results A data frame containing the test samples' survival prediction results (obtained from RF risk prediction model).
#'   The structure is similar to `Train_results`, with the following columns:
#'   - `Sample_ID`: Unique identifier for the sample.
#'   - `Actual`: Actual TCGA ID (often the same as `Sample_ID`).
#'   - `Predicted_Risk_Group`: Predicted class label (e.g., `High_Risk` / `Low_Risk`).
#'   - `High_Risk_Prob`: Probability of being classified as High Risk.
#'   - `Low_Risk_Prob`: Probability of being classified as Low Risk.
#'   - `Prediction_Prob`: Maximum class probability (confidence in the prediction).
#'   - `OS_month`: Overall survival time in months.
#'   - `OS_event`: Event indicator (`1` = death, `0` = censored).
#'   The data frame should have no duplicate `Sample_ID`s, and the `OS_month` column should be numeric while `OS_event` should be an integer.
#'   The `Sample_ID` of the `selected_sample` must be present in this data frame.
#'
#' @param survcurve_te_data A data frame containing survival probability curves for test samples. It should include:
#'   - `time_point`: Time points for survival estimates.
#'   - `<Sample_ID columns>`: Survival probabilities for each sample at each time point.
#'
#' @param selected_sample The `Sample_ID` of the selected test sample for which the user wants to plot the overlayed Kaplan-Meier plot.
#'   The `Sample_ID` should be present in the `Test_results` data frame.
#' @param font_size Numeric. Base font size for the plot text (axis labels, legend, and title).
#'   Default is 12.
#'
#' @param train_palette Character vector of colors for training group curves (e.g., c("red", "blue")).
#'   Default is c("red", "blue").
#'
#' @param test_line_col Character. Color of the overlaid test sample curve.
#'   Default is "darkgreen".
#'
#' @param test_line_type Character. Line type of the overlaid test sample curve (e.g., "dashed", "solid").
#'   Default is "dashed".
#'
#' @return A `ggplot` object (from `ggsurvplot`) showing:
#'   - Kaplan-Meier survival curves for the training dataset, stratified by predicted risk group.
#'   - An overlaid survival curve for the selected test sample (in a dashed green line).
#'   - Annotations displaying the test sample's predicted risk group and prediction probability.
#'
#' @importFrom survival Surv survfit
#' @importFrom survminer ggsurvplot
#' @importFrom ggplot2 geom_step annotate
#' @importFrom reshape2 melt
#'
#' @examples
#' # Example usage of the km_overlay_plot_f function
#'
#' # Example Train_results data frame
#' # Load example data included in the package
#' data(Train_results, package = "CPSM")
#' data(Test_results, package = "CPSM")
#' data(survCurves_data, package = "CPSM")
#'
#' # Generate Kaplan-Meier plot with overlay
#' KM_plot_results <- km_overlay_plot_f(
#'   Train_results = Train_results,
#'   Test_results = Test_results,
#'   survcurve_te_data = survCurves_data,
#'   selected_sample = "TCGA-TQ-A7RQ-01",
#'   font_size = 14,
#'   train_palette = c("purple", "orange"),
#'   test_curve_col = "black",
#'   test_curve_lty = "solid"
#' )
#'
#' @export


utils::globalVariables(c("time", "surv"))

km_overlay_plot_f <- function(Train_results, Test_results, survcurve_te_data, selected_sample,
				font_size = 12,                    # dynamic font size
  				train_palette = c("red", "blue"),  # colors for risk groups
  				test_curve_col = "darkgreen",      # color of test sample curve
  				test_curve_size = 0.8,             # line size of test sample curve
  				test_curve_lty = "dashed",         # line type of test sample curve
  				annotation_col = "darkgreen"       # annotation text color
			) {


  # Check Train_results
  if (missing(Train_results) || is.null(Train_results) || !is.data.frame(Train_results)) {
    message("Error: 'Train_results' must be a non-null data frame.")
    return(NULL)
  }

  # Check Test_results
  if (missing(Test_results) || is.null(Test_results) || !is.data.frame(Test_results)) {
    message("Error: 'Test_results' must be a non-null data frame.")
    return(NULL)
  }

  # Check survcurve_te_data
  if (missing(survcurve_te_data) || is.null(survcurve_te_data) || !is.data.frame(survcurve_te_data)) {
    message("Error: 'survcurve_te_data' must be a non-null data frame.")
    return(NULL)
  }

  # Check selected_sample
  if (missing(selected_sample) || is.null(selected_sample) || selected_sample == "") {
    message("Error: 'selected_sample' is missing, NULL, or an empty string.")
    return(NULL)
  }

  # Check if selected_sample exists in Test_results
  if (!(selected_sample %in% rownames(Test_results))) {
    message("Error: selected_sample '", selected_sample, "' is NOT present in the Test_results data.")
    return(NULL)
  } else {
    message("selected_sample '", selected_sample, "' is present in the Test_results data.")
  }


  # Create survival object for Train data
  Train_results$surv_Tr <- survival::Surv(time = Train_results$OS_month, event = Train_results$OS_event)
  km_Train_results_fit <- survfit(surv_Tr ~ Predicted_Risk_Group, data = Train_results)

  # KM plot for Train data
  km_Train_results_plot <- ggsurvplot(
    km_Train_results_fit,
    data = Train_results,
    risk.table = TRUE,
    break.time.by = 6,
    pval = FALSE,
    censor = TRUE,
    palette = train_palette,              #  dynamic color
    ggtheme = theme_minimal(base_size = font_size),  # dynamic font size
    surv.median.line = "hv",
    title = "Kaplan-Meier Plot: Prediction of Test Sample in Comparison to Risk Groups of Training Data"
  )

  # Reshape Test survival curve data
  Test_results_long <- reshape2::melt(
    survcurve_te_data,
    id.vars = "time_point",
    variable.name = "Patient",
    value.name = "Survival_Probability"
  )

  # Extract the selected test sample data
  test_sample <- Test_results_long[Test_results_long$Patient == selected_sample, ]

  # Get predicted risk group and probability
  selected_test_pred_risk <- Test_results$Predicted_Risk_Group[rownames(Test_results) == selected_sample]
  selected_test_pred_Prob <- Test_results$Prediction_Prob[rownames(Test_results) == selected_sample]

  # Create survival object for the test sample
 # Create test sample survival curve
  test_curve <- data.frame(
    time = test_sample$time_point,
    surv = test_sample$Survival_Probability
  )

  # Add test sample curve and median line to KM plot
  km_Train_results_plot2 <- km_Train_results_plot$plot +
    geom_step(
      data = test_curve,
      aes(x = time, y = surv),
       color = test_curve_col,          #  dynamic
       size = test_curve_size,            # dynamic
       linetype = test_curve_lty          # dynamic
    ) +

    annotate(
      "text",
      x = max(Train_results$OS_month) * 0.7,
      y = 0.85,
      label = paste("Sample:", selected_sample,
                    "\nPredicted Risk:", selected_test_pred_risk,
                    "\nPrediction Probability:", round(selected_test_pred_Prob, 3)),
      #color = "darkgreen",
      color = annotation_col,
      fontface = "bold",
      hjust = 0
    )


  return(km_Train_results_plot2)
}

