## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6,
  warning = FALSE,
  message = FALSE
)

## ----install, eval = FALSE----------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager")
# BiocManager::install("BiocBuildReporter", dependencies=TRUE)

## ----library, results='hide', warning=FALSE, message=FALSE--------------------
library(BiocBuildReporter)

## ----setup--------------------------------------------------------------------
library(BiocBuildReporter)
library(dplyr)
library(ggplot2)
library(tidyr)

## ----get_all_tables-----------------------------------------------------------
# Download all available tables
# This will cache the tables for quick subsequent access
get_all_bbs_tables()

## ----get_individual_table-----------------------------------------------------
# Get the build summary table
build_summary <- get_bbs_table("build_summary")

# Get the info table
info <- get_bbs_table("info")

# Get the propagation status table
propagation_status <- get_bbs_table("propagation_status")

## ----read_remote--------------------------------------------------------------
info <- get_bbs_table("info", useLocal=FALSE)

## ----updateTables-------------------------------------------------------------
info <- get_bbs_table("info", useLocal=TRUE, updateLocal=TRUE)

## ----package_release_info-----------------------------------------------------
# Get release information for BiocFileCache
bfc_releases <- get_package_release_info("BiocFileCache")
bfc_releases

## ----get_package_build_results------------------------------------------------
# Get build results for BiocFileCache on branch RELEASE_3_22
get_package_build_results("BiocFileCache", branch="RELEASE_3_22")

## ----package_error_count------------------------------------------------------
# Get error counts for BiocFileCache
bfc_errors <- package_error_count("BiocFileCache")
bfc_errors

# Filter to a specific branch
bfc_errors_release <- package_error_count("BiocFileCache", branch = "RELEASE_3_22")
bfc_errors_release

# Filter to a specific builder
bfc_errors_builder <- package_error_count("BiocFileCache", 
                                          builder = "nebbiolo2", 
                                          branch = "RELEASE_3_22")
bfc_errors_builder

## ----filter_devel_errors------------------------------------------------------
# Get devel errors
dev_errors <- package_error_count("BiocFileCache", branch = "devel")

# Filter to current devel version
dev_errors |> filter(version == max(version))

## ----package_failure_over_time------------------------------------------------
# Get failure events for BiocFileCache on nebbiolo1 and
# group events in a 24 hour period
package_failures_over_time("BiocFileCache", "nebbiolo1", 24)

## ----package_growth-----------------------------------------------------------
# Get info table
info <- get_bbs_table("info")

# Count unique packages by branch
package_counts <- info |>
  group_by(git_branch) |>
  summarise(
    n_packages = n_distinct(Package),
    .groups = "drop"
  ) |>
  arrange(desc(n_packages))

# Display the counts
package_counts

# Visualize package counts by branch
ggplot(package_counts, aes(x = reorder(git_branch, n_packages), y = n_packages)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Number of Packages by Bioconductor Branch",
    x = "Branch",
    y = "Number of Packages"
  ) +
  theme_minimal()

## ----build_status-------------------------------------------------------------
# Get build summary table
build_summary <- get_bbs_table("build_summary")

# Count build statuses
status_counts <- build_summary |>
  count(status) |>
  arrange(desc(n))

status_counts

# Visualize status distribution
ggplot(status_counts, aes(x = reorder(status, n), y = n)) +
  geom_col(aes(fill = status)) +
  scale_fill_manual(values = c(
    "OK" = "green3",
    "WARNING" = "orange",
    "ERROR" = "red",
    "TIMEOUT" = "darkred"
  )) +
  coord_flip() +
  labs(
    title = "Distribution of Build Statuses",
    x = "Status",
    y = "Count"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

## ----platform_analysis--------------------------------------------------------
# Analyze build status by platform (node)
platform_status <- build_summary |>
  group_by(node, status) |>
  summarise(count = n(), .groups = "drop") |>
  group_by(node) |>
  mutate(
    total = sum(count),
    percentage = count / total * 100
  ) |>
  ungroup()

# Show error rates by platform
error_rates <- platform_status |>
  filter(status %in% c("ERROR", "TIMEOUT")) |>
  group_by(node) |>
  summarise(
    error_count = sum(count),
    total = first(total),
    error_rate = sum(percentage),
    .groups = "drop"
  ) |>
  arrange(desc(error_rate))

head(error_rates, 10)

## ----stage_analysis-----------------------------------------------------------
# Analyze failures by stage
stage_failures <- build_summary |>
  filter(status %in% c("ERROR", "TIMEOUT")) |>
  count(stage, status) |>
  arrange(desc(n))

stage_failures

# Visualize
ggplot(stage_failures, aes(x = stage, y = n, fill = status)) +
  geom_col() +
  scale_fill_manual(values = c("ERROR" = "red", "TIMEOUT" = "darkred")) +
  labs(
    title = "Build Failures by Stage",
    x = "Build Stage",
    y = "Number of Failures",
    fill = "Status"
  ) +
  theme_minimal()

## ----problematic_packages-----------------------------------------------------
# Find packages with most errors
package_errors <- build_summary |>
  filter(status %in% c("ERROR", "TIMEOUT")) |>
  count(package, status) |>
  group_by(package) |>
  summarise(
    total_errors = sum(n),
    .groups = "drop"
  ) |>
  arrange(desc(total_errors))

# Top 10 packages with most errors
head(package_errors, 10)

## ----maintainer_analysis------------------------------------------------------
# Get unique packages per maintainer
maintainer_packages <- info |>
  group_by(Maintainer) |>
  summarise(
    n_packages = n_distinct(Package),
    packages = paste(unique(Package), collapse = ", "),
    .groups = "drop"
  ) |>
  arrange(desc(n_packages))

# Top maintainers by number of packages
head(maintainer_packages, 10)

# Distribution of packages per maintainer
ggplot(maintainer_packages, aes(x = n_packages)) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Packages per Maintainer",
    x = "Number of Packages",
    y = "Number of Maintainers"
  ) +
  theme_minimal()

## ----temporal_analysis--------------------------------------------------------
# Analyze build patterns over time
build_summary <- build_summary |>
  mutate(
    date = as.Date(startedat),
    month = format(startedat, "%Y-%m")
  )

# Build activity by month
monthly_builds <- build_summary |>
  count(month) |>
  mutate(month_date = as.Date(paste0(month, "-01")))

ggplot(monthly_builds, aes(x = month_date, y = n)) +
  geom_line(color = "steelblue", linewidth = 1) +
  geom_point(color = "steelblue") +
  labs(
    title = "Build Activity Over Time",
    x = "Month",
    y = "Number of Builds"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Error rate over time
monthly_errors <- build_summary |>
  group_by(month) |>
  summarise(
    total = n(),
    errors = sum(status %in% c("ERROR", "TIMEOUT")),
    error_rate = errors / total * 100,
    .groups = "drop"
  ) |>
  mutate(month_date = as.Date(paste0(month, "-01")))

ggplot(monthly_errors, aes(x = month_date, y = error_rate)) +
  geom_line(color = "red", linewidth = 1) +
  geom_point(color = "red") +
  labs(
    title = "Build Error Rate Over Time",
    x = "Month",
    y = "Error Rate (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## ----get_build_report---------------------------------------------------------
# Retrieves the build report for all packages on December 29, 2025
# Filtering also for RELEASE_3_22 branch and linux "nebbiolo1" build machine
get_build_report("2025-12-29", branch="RELEASE_3_22", builder="nebbiolo2")

## ----get_failing_packages-----------------------------------------------------
# returns all failing packages for RELEASE_3_22 branch
# for build machine nebbolo2
get_failing_packages("RELEASE_3_22", "nebbiolo2")

## ----sessionInfo--------------------------------------------------------------

sessionInfo()


