From c8b4bc15aaefd0a1225341068fc85079df595e26 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Thu, 12 Mar 2026 13:46:00 +0000
Subject: [PATCH 01/17] Add initial fns from pressure testing report

---
 R/helpers.R          |  25 ++
 R/pressure_testing.R | 580 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 605 insertions(+)
 create mode 100644 R/pressure_testing.R

diff --git a/R/helpers.R b/R/helpers.R
index 7557d02..4070345 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -35,3 +35,28 @@ make_novax_scenario <- function(disease) {
     names_from = "variable"
   )
 }
+
+adaptive_round <- function(
+  x,
+  large_threshold = 1,
+  small_sigfig = 2,
+  large_digits = 1
+) {
+  ifelse(
+    abs(x) >= large_threshold,
+    round(x, large_digits),
+    signif(x, small_sigfig)
+  )
+}
+
+round_numeric <- function(df) {
+  df %>%
+    mutate(across(
+      where(is.numeric) & !matches("year", ignore.case = TRUE),
+      ~ adaptive_round(.x)
+    ))
+}
+
+str_as_ts_year <- function(x) {
+  as.numeric(substr(x, 1, 6))
+}
diff --git a/R/pressure_testing.R b/R/pressure_testing.R
new file mode 100644
index 0000000..d4a15c6
--- /dev/null
+++ b/R/pressure_testing.R
@@ -0,0 +1,580 @@
+### All functions for pressure testing
+
+# Flexible rounding
+
+# Fix for scenario_type variable being included from 202310 onwards
+filter_recent_ts <- function(df, threshold = 202310) {
+  touchstone_year <- unique(df$touchstone)
+
+  # TODO: check that touchstone year is 6 digit - can there be more digits?
+  ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R
+
+  if (ts_number >= threshold) {
+    df <- dplyr::filter(
+      df,
+      scenario_type == "default"
+    )
+  }
+
+  df
+}
+
+# Helper for removing excluded diseases post-202110
+filter_excluded_diseases_ts <- function(df, threshold = 202110) {
+  exclude_dis <- c("Hib", "PCV", "Rota", "JE")
+
+  touchstone_year <- unique(df$touchstone)
+  ts_number <- as.numeric(substr(touchstone_year, 1, 6))
+
+  if (ts_number <= threshold) {
+    df %>% filter(!disease %in% exclude_dis)
+  } else {
+    df
+  }
+}
+
+# Identify duplicates
+flag_duplicates <- function(df, key_cols) {
+  df %>%
+    add_count(across(all_of(key_cols)), name = "n_key") %>%
+    filter(n_key > 1)
+}
+
+# Identify rows where deaths_averted went from non-NA to NA
+comparison_prev <- function(df, prev_dat, outcome) {
+  prev_df <- prev_dat %>%
+    select(all_of(key_cols), all_of(outcome)) %>%
+    rename(outcome_prev = !!sym(outcome))
+
+  current_df <- df %>%
+    select(all_of(key_cols), all_of(outcome)) %>%
+    rename(outcome_cur = !!sym(outcome))
+
+  result <- prev_df %>%
+    inner_join(current_df, by = key_cols) %>%
+    filter(!is.na(outcome_prev) & is.na(outcome_cur))
+
+  return(result)
+}
+
+# Explore significant changes in key outcomes (i.e. deaths/dalys)
+generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) {
+  #fix for erroneous duplicated YF data in 201910 dataset
+  prev_df <- prev_df %>%
+    {
+      if (identical(pars$touchstone_old, "201910")) {
+        filter(., !(disease == "YF" & support_type == "other" & coverage == 0))
+      } else {
+        .
+      }
+    }
+
+  # Fix for multiple campaigns per year (i.e. not true duplicates) - only applicable for 2019 true non-duplicates.
+  add_campaign_id <- function(df, key_cols) {
+    df %>%
+      group_by(across(all_of(key_cols))) %>%
+      mutate(campaign_id = row_number()) %>%
+      ungroup()
+  }
+
+  prev_df <- add_campaign_id(prev_df, key_cols)
+  curr_df <- add_campaign_id(curr_df, key_cols)
+
+  diff_keys <- c(key_cols, "campaign_id")
+  cols_needed <- unique(c(diff_keys, interest_cols))
+
+  diff <- diffdf::diffdf(
+    prev_df[, cols_needed],
+    curr_df[, cols_needed],
+    keys = diff_keys
+  )
+
+  changes <- setNames(
+    lapply(interest_cols, function(v) {
+      nm <- paste0("VarDiff_", v)
+      if (nm %in% names(diff)) diff[[nm]] else NULL
+    }),
+    interest_cols
+  )
+
+  return(changes)
+}
+
+# Generate IQR for key outcomes - for threshold of "significant"
+gen_national_iqr <- function(
+  df,
+  group_cols,
+  value_cols,
+  prefix = "national_iqr_"
+) {
+  df %>%
+    group_by(across(all_of(group_cols))) %>%
+    summarise(
+      across(
+        all_of(value_cols),
+        \(x) IQR(x, na.rm = TRUE),
+        .names = "{prefix}{.col}"
+      ),
+      .groups = "drop"
+    )
+}
+
+## Flag significant changes
+flag_large_diffs <- function(
+  changes_list,
+  iqr_df,
+  variable,
+  group_cols,
+  threshold
+) {
+  iqr_col <- paste0("national_iqr_", variable)
+
+  changes_list[[variable]] %>%
+    mutate(diff = COMPARE - BASE) %>%
+    left_join(
+      iqr_df %>% select(all_of(group_cols), all_of(iqr_col)),
+      by = group_cols
+    ) %>%
+    mutate(
+      flag = abs(diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0
+    ) %>%
+    filter(flag) %>%
+    select(
+      country,
+      country_name,
+      year,
+      vaccine,
+      modelling_group,
+      activity_type,
+      BASE,
+      COMPARE,
+      diff
+    ) %>%
+    rename(!!as.character(old) := BASE, !!as.character(new) := COMPARE) %>%
+    arrange(desc(diff))
+}
+
+## Plot significant changes
+significant_diff_plot <- function(df, outcome) {
+  df$label <- paste(
+    df$country_name,
+    df$vaccine,
+    df$activity_type,
+    df$year,
+    sep = " | "
+  )
+
+  ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) +
+    geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) +
+    geom_point(size = 2) +
+    labs(
+      x = "Difference",
+      y = "",
+      title = glue(
+        "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year"
+      )
+    ) +
+    theme_minimal()
+}
+
+### Generate combined df
+gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
+  prev_df <- prev_dat[, interest_cols]
+  cur_df <- df2[, interest_cols]
+
+  combined <- full_join(
+    prev_df,
+    cur_df,
+    by = key_cols,
+    suffix = c("_old", "_new")
+  )
+
+  combined <- combined %>%
+    select(
+      country,
+      country_name,
+      disease,
+      vaccine,
+      activity_type,
+      year,
+      modelling_group,
+      deaths_averted_old,
+      deaths_averted_new,
+      dalys_averted_old,
+      dalys_averted_new
+    )
+  return(combined)
+}
+
+plot_diff <- function(
+  combined,
+  variable,
+  group_vars = c("activity_type", "vaccine")
+) {
+  x_var <- paste0(variable, "_new")
+  y_var <- paste0(variable, "_old")
+  x_sym <- rlang::sym(x_var)
+  y_sym <- rlang::sym(y_var)
+
+  combined <- combined %>%
+    filter(!is.na(!!x_sym) & !is.na(!!y_sym))
+
+  n_facets <- combined %>%
+    distinct(activity_type, vaccine) %>%
+    nrow()
+
+  ncol_dynamic <- case_when(
+    n_facets <= 4 ~ 2,
+    n_facets <= 9 ~ 3,
+    n_facets <= 16 ~ 4,
+    n_facets <= 25 ~ 6,
+    TRUE ~ 8
+  )
+
+  p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) +
+    geom_point(alpha = 0.5, colour = "#008080") +
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    facet_wrap(
+      ~ activity_type + vaccine,
+      scales = "free",
+      ncol = ncol_dynamic
+    ) +
+    scale_x_log10() +
+    scale_y_log10() +
+    theme_bw() +
+    theme(
+      strip.text = element_text(size = 7),
+      panel.spacing = unit(0.05, "lines"),
+      axis.text = element_text(size = 6.5)
+    ) +
+    labs(
+      title = glue("{variable}: Current vs Previous Report"),
+      x = glue("{new} - {variable}"),
+      y = glue("{old} - {variable}")
+    )
+
+  return(p)
+}
+
+### Subregional v national estimate comparison
+compare_national_to_subregional <- function(
+  df,
+  outcome,
+  activity_filter,
+  threshold
+) {
+  df <- df %>%
+    filter(activity_type == activity_filter) %>%
+    select(all_of(key_cols), subregion, !!outcome)
+
+  results <- purrr::map_dfr(outcome, function(outcome) {
+    subregional_summary <- df %>%
+      group_by(subregion, disease, activity_type) %>%
+      summarise(
+        subregional_mean = mean(.data[[outcome]], na.rm = TRUE),
+        subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE),
+        .groups = "drop"
+      )
+
+    national_summary <- df %>%
+      select(all_of(key_cols), subregion, !!outcome) %>%
+      rename(national_value = !!outcome)
+
+    comparison <- national_summary %>%
+      left_join(subregional_summary, by = c("subregion", "disease")) %>%
+      mutate(
+        outcome = outcome,
+        difference = national_value - subregional_mean,
+        iqr_score = abs(difference) / subregional_iqr
+      )
+
+    dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE)
+
+    comparison <- comparison %>%
+      mutate(
+        flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0
+      ) %>%
+      filter(flag_iqr) %>%
+      select(
+        country_name,
+        vaccine,
+        year,
+        modelling_group,
+        national_value,
+        subregional_mean,
+        subregional_iqr,
+        difference,
+        iqr_score
+      ) %>%
+      arrange(desc(iqr_score))
+
+    comparison
+  })
+
+  return(results)
+}
+
+### Modelling group variations
+plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
+  offset <- 1e-6
+
+  df2 %>%
+    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
+    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
+    group_by(vaccine) %>%
+    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>%
+    ggplot() +
+    aes(
+      fill = as.character(mod_num),
+      x = adj_outc,
+      y = reorder(vaccine, mean_outc)
+    ) +
+    geom_density_ridges(
+      alpha = 0.5,
+      stat = "binline",
+      bins = 200,
+      draw_baseline = FALSE
+    ) +
+    facet_grid(. ~ activity_type, scales = "fixed") +
+    theme_bw() +
+    theme(
+      legend.position = "none",
+      axis.text.x = element_text(angle = 90, hjust = 1)
+    ) +
+    scale_x_log10(
+      breaks = scales::trans_breaks("log10", function(x) 10^x),
+      labels = scales::trans_format("log10", math_format(10^.x))
+    ) +
+    scale_fill_viridis_d() +
+    labs(
+      x = paste0(
+        "Burden averted (",
+        ifelse(outc == "dalys", "DALYs", outc),
+        ")"
+      ),
+      y = "Vaccine"
+    )
+}
+
+
+# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended)
+plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") {
+  df_cur <- df %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(new))
+
+  df_prev <- prev_dat %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(old))
+
+  df_combined <- bind_rows(df_cur, df_prev)
+
+  df_diff <- df_cur %>%
+    left_join(
+      df_prev,
+      by = c("disease", "year"),
+      suffix = c("_curr", "_prev")
+    ) %>%
+    mutate(
+      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
+      dataset = "Difference"
+    ) %>%
+    select(disease, year, yearly_outcome, dataset)
+
+  df_combined <- bind_rows(df_combined, df_diff)
+
+  df_combined$dataset <- factor(
+    df_combined$dataset,
+    levels = c(as.character(old), "Difference", as.character(new))
+  )
+
+  ggplot(
+    df_combined,
+    aes(
+      x = reorder(disease, yearly_outcome),
+      y = yearly_outcome,
+      fill = factor(year)
+    )
+  ) +
+    geom_col(position = "dodge") +
+    scale_fill_manual(
+      values = c(
+        "2021" = "#008080",
+        "2022" = "#E68424",
+        "2023" = "#9573B5",
+        "2024" = "#A1D15C"
+      )
+    ) +
+    facet_wrap(~dataset, scales = "free_y") +
+    scale_y_continuous(labels = scales::scientific) +
+    theme_bw() +
+    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+    labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year")
+}
+
+### Gavi Cumulative Plot (modelling group + average)
+plot_cumul <- function(df, outcome, disease_filter) {
+  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
+
+  outcome_sym <- sym(outcome)
+  cum_col <- paste0("cum_", outcome)
+  avg_col <- paste0("avg_", outcome)
+
+  col_old <- paste0(outcome, "_old")
+  col_new <- paste0(outcome, "_new")
+
+  combined2 <- df %>%
+    select(
+      country,
+      country_name,
+      disease,
+      vaccine,
+      activity_type,
+      year,
+      modelling_group,
+      all_of(outcome_cols)
+    ) %>%
+    pivot_longer(
+      cols = all_of(outcome_cols),
+      names_to = "touchstone",
+      values_to = "value"
+    ) %>%
+    mutate(
+      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
+      touchstone = recode(
+        touchstone,
+        "old" = as.character(old),
+        "new" = as.character(new),
+        .default = touchstone
+      ),
+      touchstone = factor(
+        touchstone,
+        levels = c(as.character(old), as.character(new))
+      )
+    )
+  # Cumulative values by modelling group
+  df_cum <- combined2 %>%
+    filter(disease == disease_filter) %>%
+    group_by(modelling_group, touchstone) %>%
+    complete(year = full_seq(year, 1)) %>%
+    arrange(year) %>%
+    mutate(
+      first_valid = min(year[!is.na(value)]),
+      !!cum_col := ifelse(
+        year < first_valid,
+        NA,
+        cumsum(replace_na(value, 0))
+      )
+    ) %>%
+    select(-first_valid) %>%
+    ungroup() %>%
+    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
+
+  # Model average
+  df_avg <- df_cum %>%
+    group_by(year, touchstone) %>%
+    summarise(
+      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
+      n_models = sum(!is.na(!!sym(cum_col))),
+      .groups = "drop"
+    ) %>%
+    filter(n_models >= 1) %>%
+    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
+
+  # Combine for plot
+  df_plot <- bind_rows(
+    df_cum %>%
+      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
+    df_avg %>%
+      select(year, modelling_group, touchstone, value = !!sym(avg_col))
+  )
+
+  df_plot <- df_plot %>%
+    group_by(modelling_group) %>%
+    filter(sum(value, na.rm = TRUE) > 0) %>%
+    ungroup() %>%
+    mutate(
+      line_type = ifelse(
+        grepl("Model Average", modelling_group),
+        "dashed",
+        "solid"
+      )
+    )
+
+  if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
+    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
+    return(NULL)
+  }
+
+  p <- ggplot(
+    df_plot,
+    aes(
+      x = year,
+      y = value,
+      color = modelling_group,
+      linetype = line_type
+    )
+  ) +
+    geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) +
+    scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) +
+    guides(linetype = "none") +
+    scale_y_continuous(labels = scales::scientific) +
+    theme_minimal() +
+    labs(
+      x = "Year",
+      y = paste("Cumulative", outcome),
+      color = "Modelling Group",
+      title = paste("Cumulative", outcome, "Over Time –", disease_filter)
+    ) +
+    theme(legend.position = "bottom")
+
+  return(p)
+}
+
+save_outputs <- function() {
+  saveRDS(
+    round_numeric(
+      missing_in_current %>%
+        select(all_of(c(
+          "country_name",
+          "vaccine",
+          "activity_type",
+          "year",
+          "modelling_group"
+        )))
+    ),
+    "outputs/missing_in_current.rds"
+  )
+  saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds")
+  saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds")
+  saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds")
+  saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds")
+  saveRDS(
+    round_numeric(subregional_flags_deaths_camp),
+    "outputs/subregional_flags_deaths_camp.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_deaths_rout),
+    "outputs/subregional_flags_deaths_rout.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_dalys_camp),
+    "outputs/subregional_flags_dalys_camp.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_dalys_rout),
+    "outputs/subregional_flags_dalys_rout.rds"
+  )
+}

From db03465b35fa129a858df32d58994f6c5e62c4fa Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Thu, 12 Mar 2026 14:56:46 +0000
Subject: [PATCH 02/17] Move pressure testing plotting

---
 R/pressure_testing.R | 483 +++++++++----------------------------------
 1 file changed, 97 insertions(+), 386 deletions(-)

diff --git a/R/pressure_testing.R b/R/pressure_testing.R
index d4a15c6..d73c08f 100644
--- a/R/pressure_testing.R
+++ b/R/pressure_testing.R
@@ -10,24 +10,22 @@ filter_recent_ts <- function(df, threshold = 202310) {
   ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R
 
   if (ts_number >= threshold) {
-    df <- dplyr::filter(
+    dplyr::filter(
       df,
       scenario_type == "default"
     )
+  } else {
+    df
   }
-
-  df
 }
 
 # Helper for removing excluded diseases post-202110
 filter_excluded_diseases_ts <- function(df, threshold = 202110) {
-  exclude_dis <- c("Hib", "PCV", "Rota", "JE")
-
   touchstone_year <- unique(df$touchstone)
-  ts_number <- as.numeric(substr(touchstone_year, 1, 6))
+  ts_number <- str_as_ts_year(touchstone_year)
 
   if (ts_number <= threshold) {
-    df %>% filter(!disease %in% exclude_dis)
+    filter(df, !disease %in% exclude_dis)
   } else {
     df
   }
@@ -35,48 +33,41 @@ filter_excluded_diseases_ts <- function(df, threshold = 202110) {
 
 # Identify duplicates
 flag_duplicates <- function(df, key_cols) {
-  df %>%
-    add_count(across(all_of(key_cols)), name = "n_key") %>%
-    filter(n_key > 1)
+  df <- dplyr::add_count(
+    df,
+    dplyr::across(dplyr::all_of(key_cols)),
+    name = "n_key"
+  )
+
+  filter(df, n_key > 1)
 }
 
 # Identify rows where deaths_averted went from non-NA to NA
 comparison_prev <- function(df, prev_dat, outcome) {
-  prev_df <- prev_dat %>%
-    select(all_of(key_cols), all_of(outcome)) %>%
-    rename(outcome_prev = !!sym(outcome))
+  prev_df <- select(prev_data, all_of(key_cols), all_of(outcome))
+  prev_df <- rename(prev_df, outcome_prev = !!sym(outcome))
 
-  current_df <- df %>%
-    select(all_of(key_cols), all_of(outcome)) %>%
-    rename(outcome_cur = !!sym(outcome))
+  current_df <- select(current_df, all_of(key_cols), all_of(outcome))
+  current_df <- rename(current_df, outcome_cur = !!sym(outcome))
 
-  result <- prev_df %>%
-    inner_join(current_df, by = key_cols) %>%
-    filter(!is.na(outcome_prev) & is.na(outcome_cur))
+  result <- inner_join(prev_df, current_df, by = key_cols)
+  result <- filter(result, !is.na(outcome_prev) & is.na(outcome_cur))
 
-  return(result)
+  result
 }
 
 # Explore significant changes in key outcomes (i.e. deaths/dalys)
 generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) {
   #fix for erroneous duplicated YF data in 201910 dataset
-  prev_df <- prev_df %>%
-    {
-      if (identical(pars$touchstone_old, "201910")) {
-        filter(., !(disease == "YF" & support_type == "other" & coverage == 0))
-      } else {
-        .
-      }
-    }
-
-  # Fix for multiple campaigns per year (i.e. not true duplicates) - only applicable for 2019 true non-duplicates.
-  add_campaign_id <- function(df, key_cols) {
-    df %>%
-      group_by(across(all_of(key_cols))) %>%
-      mutate(campaign_id = row_number()) %>%
-      ungroup()
+  if (identical(pars$touchstone_old, TOUCHSTONE_OLD)) {
+    prev_df <- filter(
+      prev_df,
+      !(disease == "YF" & support_type == "other" & coverage == 0)
+    )
   }
 
+  # Fix for multiple campaigns per year (i.e. not true duplicates)
+  # only applicable for 2019 true non-duplicates.
   prev_df <- add_campaign_id(prev_df, key_cols)
   curr_df <- add_campaign_id(curr_df, key_cols)
 
@@ -97,7 +88,7 @@ generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) {
     interest_cols
   )
 
-  return(changes)
+  changes
 }
 
 # Generate IQR for key outcomes - for threshold of "significant"
@@ -107,18 +98,20 @@ gen_national_iqr <- function(
   value_cols,
   prefix = "national_iqr_"
 ) {
-  df %>%
-    group_by(across(all_of(group_cols))) %>%
-    summarise(
-      across(
-        all_of(value_cols),
-        \(x) IQR(x, na.rm = TRUE),
-        .names = "{prefix}{.col}"
-      ),
-      .groups = "drop"
-    )
+  df <- group_by(df, across(all_of(group_cols)))
+  df <- summarise(
+    df,
+    across(
+      all_of(value_cols),
+      \(x) IQR(x, na.rm = TRUE),
+      .names = "{prefix}{.col}"
+    ),
+    .groups = "drop"
+  )
 }
 
+# TODO: I don't like how this looks - this should probably be a simpler
+# functional that maps over a list in a separate function
 ## Flag significant changes
 flag_large_diffs <- function(
   changes_list,
@@ -129,8 +122,22 @@ flag_large_diffs <- function(
 ) {
   iqr_col <- paste0("national_iqr_", variable)
 
-  changes_list[[variable]] %>%
-    mutate(diff = COMPARE - BASE) %>%
+  # returns a list so that the function can accept multiple variables
+  lapply(
+    changes_list[[variable]],
+    temp_fn,
+    iqr_df,
+    variable,
+    group_cols,
+    threshold
+  )
+}
+
+temp_fn <- function(df, iqr_df, variable, group_cols, threshold) {
+  mutate(
+    df,
+    diff = COMPARE - BASE
+  ) %>%
     left_join(
       iqr_df %>% select(all_of(group_cols), all_of(iqr_col)),
       by = group_cols
@@ -154,29 +161,6 @@ flag_large_diffs <- function(
     arrange(desc(diff))
 }
 
-## Plot significant changes
-significant_diff_plot <- function(df, outcome) {
-  df$label <- paste(
-    df$country_name,
-    df$vaccine,
-    df$activity_type,
-    df$year,
-    sep = " | "
-  )
-
-  ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) +
-    geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) +
-    geom_point(size = 2) +
-    labs(
-      x = "Difference",
-      y = "",
-      title = glue(
-        "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year"
-      )
-    ) +
-    theme_minimal()
-}
-
 ### Generate combined df
 gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
   prev_df <- prev_dat[, interest_cols]
@@ -206,56 +190,6 @@ gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
   return(combined)
 }
 
-plot_diff <- function(
-  combined,
-  variable,
-  group_vars = c("activity_type", "vaccine")
-) {
-  x_var <- paste0(variable, "_new")
-  y_var <- paste0(variable, "_old")
-  x_sym <- rlang::sym(x_var)
-  y_sym <- rlang::sym(y_var)
-
-  combined <- combined %>%
-    filter(!is.na(!!x_sym) & !is.na(!!y_sym))
-
-  n_facets <- combined %>%
-    distinct(activity_type, vaccine) %>%
-    nrow()
-
-  ncol_dynamic <- case_when(
-    n_facets <= 4 ~ 2,
-    n_facets <= 9 ~ 3,
-    n_facets <= 16 ~ 4,
-    n_facets <= 25 ~ 6,
-    TRUE ~ 8
-  )
-
-  p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) +
-    geom_point(alpha = 0.5, colour = "#008080") +
-    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
-    facet_wrap(
-      ~ activity_type + vaccine,
-      scales = "free",
-      ncol = ncol_dynamic
-    ) +
-    scale_x_log10() +
-    scale_y_log10() +
-    theme_bw() +
-    theme(
-      strip.text = element_text(size = 7),
-      panel.spacing = unit(0.05, "lines"),
-      axis.text = element_text(size = 6.5)
-    ) +
-    labs(
-      title = glue("{variable}: Current vs Previous Report"),
-      x = glue("{new} - {variable}"),
-      y = glue("{old} - {variable}")
-    )
-
-  return(p)
-}
-
 ### Subregional v national estimate comparison
 compare_national_to_subregional <- function(
   df,
@@ -263,286 +197,63 @@ compare_national_to_subregional <- function(
   activity_filter,
   threshold
 ) {
-  df <- df %>%
-    filter(activity_type == activity_filter) %>%
-    select(all_of(key_cols), subregion, !!outcome)
-
-  results <- purrr::map_dfr(outcome, function(outcome) {
-    subregional_summary <- df %>%
-      group_by(subregion, disease, activity_type) %>%
-      summarise(
-        subregional_mean = mean(.data[[outcome]], na.rm = TRUE),
-        subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE),
-        .groups = "drop"
-      )
-
-    national_summary <- df %>%
-      select(all_of(key_cols), subregion, !!outcome) %>%
-      rename(national_value = !!outcome)
-
-    comparison <- national_summary %>%
-      left_join(subregional_summary, by = c("subregion", "disease")) %>%
-      mutate(
-        outcome = outcome,
-        difference = national_value - subregional_mean,
-        iqr_score = abs(difference) / subregional_iqr
-      )
-
-    dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE)
-
-    comparison <- comparison %>%
-      mutate(
-        flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0
-      ) %>%
-      filter(flag_iqr) %>%
-      select(
-        country_name,
-        vaccine,
-        year,
-        modelling_group,
-        national_value,
-        subregional_mean,
-        subregional_iqr,
-        difference,
-        iqr_score
-      ) %>%
-      arrange(desc(iqr_score))
-
-    comparison
-  })
-
-  return(results)
-}
-
-### Modelling group variations
-plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
-  offset <- 1e-6
-
-  df2 %>%
-    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
-    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
-    group_by(vaccine) %>%
-    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>%
-    ggplot() +
-    aes(
-      fill = as.character(mod_num),
-      x = adj_outc,
-      y = reorder(vaccine, mean_outc)
-    ) +
-    geom_density_ridges(
-      alpha = 0.5,
-      stat = "binline",
-      bins = 200,
-      draw_baseline = FALSE
-    ) +
-    facet_grid(. ~ activity_type, scales = "fixed") +
-    theme_bw() +
-    theme(
-      legend.position = "none",
-      axis.text.x = element_text(angle = 90, hjust = 1)
-    ) +
-    scale_x_log10(
-      breaks = scales::trans_breaks("log10", function(x) 10^x),
-      labels = scales::trans_format("log10", math_format(10^.x))
-    ) +
-    scale_fill_viridis_d() +
-    labs(
-      x = paste0(
-        "Burden averted (",
-        ifelse(outc == "dalys", "DALYs", outc),
-        ")"
-      ),
-      y = "Vaccine"
-    )
-}
-
-
-# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended)
-plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") {
-  df_cur <- df %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+  df <- filter(df, activity_type == activity_filter)
+  df <- select(df, all_of(key_cols), subregion, !!outcome)
+
+  results <- purrr::map_dfr(outcome, function(otc) {
+    subregional_summary <-
+      group_by(df, subregion, disease, activity_type)
+    subregional_summary <- summarise(
+      subregional_summary,
+      subregional_mean = mean(.data[[otc]], na.rm = TRUE),
+      subregional_iqr = IQR(.data[[otc]], na.rm = TRUE),
       .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(new))
-
-  df_prev <- prev_dat %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
-      .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(old))
-
-  df_combined <- bind_rows(df_cur, df_prev)
-
-  df_diff <- df_cur %>%
-    left_join(
-      df_prev,
-      by = c("disease", "year"),
-      suffix = c("_curr", "_prev")
-    ) %>%
-    mutate(
-      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
-      dataset = "Difference"
-    ) %>%
-    select(disease, year, yearly_outcome, dataset)
-
-  df_combined <- bind_rows(df_combined, df_diff)
-
-  df_combined$dataset <- factor(
-    df_combined$dataset,
-    levels = c(as.character(old), "Difference", as.character(new))
-  )
-
-  ggplot(
-    df_combined,
-    aes(
-      x = reorder(disease, yearly_outcome),
-      y = yearly_outcome,
-      fill = factor(year)
     )
-  ) +
-    geom_col(position = "dodge") +
-    scale_fill_manual(
-      values = c(
-        "2021" = "#008080",
-        "2022" = "#E68424",
-        "2023" = "#9573B5",
-        "2024" = "#A1D15C"
-      )
-    ) +
-    facet_wrap(~dataset, scales = "free_y") +
-    scale_y_continuous(labels = scales::scientific) +
-    theme_bw() +
-    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-    labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year")
-}
 
-### Gavi Cumulative Plot (modelling group + average)
-plot_cumul <- function(df, outcome, disease_filter) {
-  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
+    national_summary <-
+      select(df, all_of(key_cols), subregion, !!outcome)
+    national_summary <- rename(national_summary, national_value = !!outcome)
 
-  outcome_sym <- sym(outcome)
-  cum_col <- paste0("cum_", outcome)
-  avg_col <- paste0("avg_", outcome)
+    comparison <- left_join(
+      national_summary,
+      subregional_summary,
+      by = c("subregion", "disease")
+    )
+    comparison <- mutate(
+      comparison,
+      outcome = outcome,
+      difference = national_value - subregional_mean,
+      iqr_score = abs(difference) / subregional_iqr
+    )
 
-  col_old <- paste0(outcome, "_old")
-  col_new <- paste0(outcome, "_new")
+    dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE)
 
-  combined2 <- df %>%
-    select(
-      country,
+    comparison <- mutate(
+      comparison,
+      flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0
+    )
+    comparison <- filter(comparison, flag_iqr)
+    comparison <- select(
+      comparison,
       country_name,
-      disease,
       vaccine,
-      activity_type,
       year,
       modelling_group,
-      all_of(outcome_cols)
-    ) %>%
-    pivot_longer(
-      cols = all_of(outcome_cols),
-      names_to = "touchstone",
-      values_to = "value"
-    ) %>%
-    mutate(
-      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
-      touchstone = recode(
-        touchstone,
-        "old" = as.character(old),
-        "new" = as.character(new),
-        .default = touchstone
-      ),
-      touchstone = factor(
-        touchstone,
-        levels = c(as.character(old), as.character(new))
-      )
-    )
-  # Cumulative values by modelling group
-  df_cum <- combined2 %>%
-    filter(disease == disease_filter) %>%
-    group_by(modelling_group, touchstone) %>%
-    complete(year = full_seq(year, 1)) %>%
-    arrange(year) %>%
-    mutate(
-      first_valid = min(year[!is.na(value)]),
-      !!cum_col := ifelse(
-        year < first_valid,
-        NA,
-        cumsum(replace_na(value, 0))
-      )
-    ) %>%
-    select(-first_valid) %>%
-    ungroup() %>%
-    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
-
-  # Model average
-  df_avg <- df_cum %>%
-    group_by(year, touchstone) %>%
-    summarise(
-      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
-      n_models = sum(!is.na(!!sym(cum_col))),
-      .groups = "drop"
-    ) %>%
-    filter(n_models >= 1) %>%
-    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
-
-  # Combine for plot
-  df_plot <- bind_rows(
-    df_cum %>%
-      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
-    df_avg %>%
-      select(year, modelling_group, touchstone, value = !!sym(avg_col))
-  )
-
-  df_plot <- df_plot %>%
-    group_by(modelling_group) %>%
-    filter(sum(value, na.rm = TRUE) > 0) %>%
-    ungroup() %>%
-    mutate(
-      line_type = ifelse(
-        grepl("Model Average", modelling_group),
-        "dashed",
-        "solid"
-      )
+      national_value,
+      subregional_mean,
+      subregional_iqr,
+      difference,
+      iqr_score
     )
+    comparison <- arrange(comparison, desc(iqr_score))
 
-  if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
-    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
-    return(NULL)
-  }
+    comparison
+  })
 
-  p <- ggplot(
-    df_plot,
-    aes(
-      x = year,
-      y = value,
-      color = modelling_group,
-      linetype = line_type
-    )
-  ) +
-    geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) +
-    scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) +
-    guides(linetype = "none") +
-    scale_y_continuous(labels = scales::scientific) +
-    theme_minimal() +
-    labs(
-      x = "Year",
-      y = paste("Cumulative", outcome),
-      color = "Modelling Group",
-      title = paste("Cumulative", outcome, "Over Time –", disease_filter)
-    ) +
-    theme(legend.position = "bottom")
-
-  return(p)
+  results
 }
 
+### Modelling group variations
 save_outputs <- function() {
   saveRDS(
     round_numeric(

From 50f6b0d4e35a51382c13cd57a1dae7f85d7d482c Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Thu, 12 Mar 2026 14:57:14 +0000
Subject: [PATCH 03/17] Clean up funs and add constants

---
 R/constants.R                 |   6 +
 R/helpers.R                   |   7 +
 R/plotting_pressure_testing.R | 300 ++++++++++++++++++++++++++++++++++
 man/constants.Rd              |  10 ++
 4 files changed, 323 insertions(+)
 create mode 100644 R/plotting_pressure_testing.R

diff --git a/R/constants.R b/R/constants.R
index 03cd537..e214aa2 100644
--- a/R/constants.R
+++ b/R/constants.R
@@ -46,3 +46,9 @@ colnames_plot_demog_compare <- c(
   "value",
   "value_millions"
 )
+
+#' @name constants
+EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE")
+
+#' @name constants
+TOUCHSTONE_OLD <- "201910"
diff --git a/R/helpers.R b/R/helpers.R
index 4070345..d065a9e 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -60,3 +60,10 @@ round_numeric <- function(df) {
 str_as_ts_year <- function(x) {
   as.numeric(substr(x, 1, 6))
 }
+
+add_campaign_id <- function(df, key_cols) {
+  df <- group_by(df, across(all_of(key_cols)))
+  df <- mutate(df, campaign_id = row_number())
+
+  ungroup(df)
+}
diff --git a/R/plotting_pressure_testing.R b/R/plotting_pressure_testing.R
new file mode 100644
index 0000000..08a4329
--- /dev/null
+++ b/R/plotting_pressure_testing.R
@@ -0,0 +1,300 @@
+## Plot significant changes
+significant_diff_plot <- function(df, outcome) {
+  df$label <- paste(
+    df$country_name,
+    df$vaccine,
+    df$activity_type,
+    df$year,
+    sep = " | "
+  )
+
+  ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) +
+    geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) +
+    geom_point(size = 2) +
+    labs(
+      x = "Difference",
+      y = "",
+      title = glue(
+        "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year"
+      )
+    ) +
+    theme_minimal()
+}
+
+plot_diff <- function(
+  combined,
+  variable,
+  group_vars = c("activity_type", "vaccine")
+) {
+  x_var <- paste0(variable, "_new")
+  y_var <- paste0(variable, "_old")
+  x_sym <- rlang::sym(x_var)
+  y_sym <- rlang::sym(y_var)
+
+  combined <- combined %>%
+    filter(!is.na(!!x_sym) & !is.na(!!y_sym))
+
+  n_facets <- combined %>%
+    distinct(activity_type, vaccine) %>%
+    nrow()
+
+  ncol_dynamic <- case_when(
+    n_facets <= 4 ~ 2,
+    n_facets <= 9 ~ 3,
+    n_facets <= 16 ~ 4,
+    n_facets <= 25 ~ 6,
+    TRUE ~ 8
+  )
+
+  p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) +
+    geom_point(alpha = 0.5, colour = "#008080") +
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+    facet_wrap(
+      ~ activity_type + vaccine,
+      scales = "free",
+      ncol = ncol_dynamic
+    ) +
+    scale_x_log10() +
+    scale_y_log10() +
+    theme_bw() +
+    theme(
+      strip.text = element_text(size = 7),
+      panel.spacing = unit(0.05, "lines"),
+      axis.text = element_text(size = 6.5)
+    ) +
+    labs(
+      title = glue("{variable}: Current vs Previous Report"),
+      x = glue("{new} - {variable}"),
+      y = glue("{old} - {variable}")
+    )
+
+  return(p)
+}
+
+plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
+  offset <- 1e-6
+
+  df2 %>%
+    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
+    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
+    group_by(vaccine) %>%
+    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>%
+    ggplot() +
+    aes(
+      fill = as.character(mod_num),
+      x = adj_outc,
+      y = reorder(vaccine, mean_outc)
+    ) +
+    geom_density_ridges(
+      alpha = 0.5,
+      stat = "binline",
+      bins = 200,
+      draw_baseline = FALSE
+    ) +
+    facet_grid(. ~ activity_type, scales = "fixed") +
+    theme_bw() +
+    theme(
+      legend.position = "none",
+      axis.text.x = element_text(angle = 90, hjust = 1)
+    ) +
+    scale_x_log10(
+      breaks = scales::trans_breaks("log10", function(x) 10^x),
+      labels = scales::trans_format("log10", math_format(10^.x))
+    ) +
+    scale_fill_viridis_d() +
+    labs(
+      x = paste0(
+        "Burden averted (",
+        ifelse(outc == "dalys", "DALYs", outc),
+        ")"
+      ),
+      y = "Vaccine"
+    )
+}
+
+
+# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended)
+plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") {
+  df_cur <- df %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(new))
+
+  df_prev <- prev_dat %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(old))
+
+  df_combined <- bind_rows(df_cur, df_prev)
+
+  df_diff <- df_cur %>%
+    left_join(
+      df_prev,
+      by = c("disease", "year"),
+      suffix = c("_curr", "_prev")
+    ) %>%
+    mutate(
+      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
+      dataset = "Difference"
+    ) %>%
+    select(disease, year, yearly_outcome, dataset)
+
+  df_combined <- bind_rows(df_combined, df_diff)
+
+  df_combined$dataset <- factor(
+    df_combined$dataset,
+    levels = c(as.character(old), "Difference", as.character(new))
+  )
+
+  ggplot(
+    df_combined,
+    aes(
+      x = reorder(disease, yearly_outcome),
+      y = yearly_outcome,
+      fill = factor(year)
+    )
+  ) +
+    geom_col(position = "dodge") +
+    scale_fill_manual(
+      values = c(
+        "2021" = "#008080",
+        "2022" = "#E68424",
+        "2023" = "#9573B5",
+        "2024" = "#A1D15C"
+      )
+    ) +
+    facet_wrap(~dataset, scales = "free_y") +
+    scale_y_continuous(labels = scales::scientific) +
+    theme_bw() +
+    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+    labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year")
+}
+
+### Gavi Cumulative Plot (modelling group + average)
+plot_cumul <- function(df, outcome, disease_filter) {
+  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
+
+  outcome_sym <- sym(outcome)
+  cum_col <- paste0("cum_", outcome)
+  avg_col <- paste0("avg_", outcome)
+
+  col_old <- paste0(outcome, "_old")
+  col_new <- paste0(outcome, "_new")
+
+  combined2 <- df %>%
+    select(
+      country,
+      country_name,
+      disease,
+      vaccine,
+      activity_type,
+      year,
+      modelling_group,
+      all_of(outcome_cols)
+    ) %>%
+    pivot_longer(
+      cols = all_of(outcome_cols),
+      names_to = "touchstone",
+      values_to = "value"
+    ) %>%
+    mutate(
+      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
+      touchstone = recode(
+        touchstone,
+        "old" = as.character(old),
+        "new" = as.character(new),
+        .default = touchstone
+      ),
+      touchstone = factor(
+        touchstone,
+        levels = c(as.character(old), as.character(new))
+      )
+    )
+  # Cumulative values by modelling group
+  df_cum <- combined2 %>%
+    filter(disease == disease_filter) %>%
+    group_by(modelling_group, touchstone) %>%
+    complete(year = full_seq(year, 1)) %>%
+    arrange(year) %>%
+    mutate(
+      first_valid = min(year[!is.na(value)]),
+      !!cum_col := ifelse(
+        year < first_valid,
+        NA,
+        cumsum(replace_na(value, 0))
+      )
+    ) %>%
+    select(-first_valid) %>%
+    ungroup() %>%
+    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
+
+  # Model average
+  df_avg <- df_cum %>%
+    group_by(year, touchstone) %>%
+    summarise(
+      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
+      n_models = sum(!is.na(!!sym(cum_col))),
+      .groups = "drop"
+    ) %>%
+    filter(n_models >= 1) %>%
+    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
+
+  # Combine for plot
+  df_plot <- bind_rows(
+    df_cum %>%
+      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
+    df_avg %>%
+      select(year, modelling_group, touchstone, value = !!sym(avg_col))
+  )
+
+  df_plot <- df_plot %>%
+    group_by(modelling_group) %>%
+    filter(sum(value, na.rm = TRUE) > 0) %>%
+    ungroup() %>%
+    mutate(
+      line_type = ifelse(
+        grepl("Model Average", modelling_group),
+        "dashed",
+        "solid"
+      )
+    )
+
+  if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
+    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
+    return(NULL)
+  }
+
+  p <- ggplot(
+    df_plot,
+    aes(
+      x = year,
+      y = value,
+      color = modelling_group,
+      linetype = line_type
+    )
+  ) +
+    geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) +
+    scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) +
+    guides(linetype = "none") +
+    scale_y_continuous(labels = scales::scientific) +
+    theme_minimal() +
+    labs(
+      x = "Year",
+      y = paste("Cumulative", outcome),
+      color = "Modelling Group",
+      title = paste("Cumulative", outcome, "Over Time –", disease_filter)
+    ) +
+    theme(legend.position = "bottom")
+
+  return(p)
+}
diff --git a/man/constants.Rd b/man/constants.Rd
index 70b14da..7608951 100644
--- a/man/constants.Rd
+++ b/man/constants.Rd
@@ -7,6 +7,8 @@
 \alias{scenario_data_colnames}
 \alias{burden_outcome_names}
 \alias{colnames_plot_demog_compare}
+\alias{EXCLUDED_DISEASES}
+\alias{TOUCHSTONE_OLD}
 \title{Package constants}
 \format{
 An object of class \code{character} of length 5.
@@ -16,6 +18,10 @@ An object of class \code{character} of length 4.
 An object of class \code{character} of length 10.
 
 An object of class \code{character} of length 7.
+
+An object of class \code{character} of length 4.
+
+An object of class \code{character} of length 1.
 }
 \usage{
 file_dict_colnames
@@ -25,6 +31,10 @@ scenario_data_colnames
 burden_outcome_names
 
 colnames_plot_demog_compare
+
+EXCLUDED_DISEASES
+
+TOUCHSTONE_OLD
 }
 \description{
 Package constants

From 13d624435632dedd107d1939cbafb389cffd2656 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Fri, 13 Mar 2026 12:14:18 +0000
Subject: [PATCH 04/17] Clean up fns and add constants

---
 R/constants.R                                 |   6 +
 ...sting.R => fn_plotting_pressure_testing.R} |  26 +-
 R/fn_pressure_testing.R                       | 361 ++++++++++++++++++
 3 files changed, 382 insertions(+), 11 deletions(-)
 rename R/{plotting_pressure_testing.R => fn_plotting_pressure_testing.R} (94%)
 create mode 100644 R/fn_pressure_testing.R

diff --git a/R/constants.R b/R/constants.R
index e214aa2..20ae152 100644
--- a/R/constants.R
+++ b/R/constants.R
@@ -52,3 +52,9 @@ EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE")
 
 #' @name constants
 TOUCHSTONE_OLD <- "201910"
+
+#' @name constants
+TOUCHSTONE_NEW <- "202310"
+
+#' @name constants
+TOUCHSTONE_OLD_OLD <- "202110"
diff --git a/R/plotting_pressure_testing.R b/R/fn_plotting_pressure_testing.R
similarity index 94%
rename from R/plotting_pressure_testing.R
rename to R/fn_plotting_pressure_testing.R
index 08a4329..8aaad49 100644
--- a/R/plotting_pressure_testing.R
+++ b/R/fn_plotting_pressure_testing.R
@@ -1,11 +1,14 @@
-## Plot significant changes
+#' Plot significant changes
+#'
+#' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid
+#' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars
+#' labeller label_wrap_gen
+#'
+#' @export
 significant_diff_plot <- function(df, outcome) {
-  df$label <- paste(
-    df$country_name,
-    df$vaccine,
-    df$activity_type,
-    df$year,
-    sep = " | "
+  # retained here as this is a small df and a small operation
+  df$label <- glue::glue(
+    "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}"
   )
 
   ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) +
@@ -13,12 +16,13 @@ significant_diff_plot <- function(df, outcome) {
     geom_point(size = 2) +
     labs(
       x = "Difference",
-      y = "",
-      title = glue(
-        "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year"
+      y = NULL,
+      title = glue::glue(
+        "Significant Differences in {outcome} by Country, Vaccine, \\
+        Activity Type and Year"
       )
     ) +
-    theme_minimal()
+    theme_vimc(x_text_angle = 0)
 }
 
 plot_diff <- function(
diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R
new file mode 100644
index 0000000..140cc79
--- /dev/null
+++ b/R/fn_pressure_testing.R
@@ -0,0 +1,361 @@
+#' Fix for scenario_type variable being included from 202310 onwards
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+filter_recent_ts <- function(df, threshold = 202310) {
+  touchstone_year <- unique(df$touchstone)
+
+  # TODO: check that touchstone year is 6 digit - can there be more digits?
+  ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R
+
+  if (ts_number >= threshold) {
+    dplyr::filter(
+      df,
+      .data$scenario_type == "default"
+    )
+  } else {
+    df
+  }
+}
+
+#' Helper for removing excluded diseases post-202110
+#'
+#' @export
+filter_excluded_diseases_ts <- function(df, threshold = 202110) {
+  touchstone_year <- unique(df$touchstone)
+  ts_number <- str_as_ts_year(touchstone_year)
+
+  if (ts_number <= threshold) {
+    dplyr::filter(df, !.data$disease %in% EXCLUDED_DISEASES)
+  } else {
+    df
+  }
+}
+
+#' Identify duplicates
+#'
+#' @export
+flag_duplicates <- function(df, key_cols) {
+  df <- dplyr::add_count(
+    df,
+    dplyr::across(dplyr::all_of(key_cols)),
+    name = "n_key"
+  )
+
+  dplyr::filter(df, .data$n_key > 1)
+}
+
+#' Identify rows where deaths_averted went from non-NA to NA
+#'
+#' @export
+comparison_prev <- function(df, prev_data, outcome) {
+  prev_df <- dplyr::select(
+    prev_data,
+    dplyr::all_of(key_cols),
+    dplyr::all_of(outcome)
+  )
+  prev_df <- dplyr::rename(prev_df, outcome_prev = {{ outcome }})
+
+  current_df <- dplyr::select(
+    df,
+    dplyr::all_of(key_cols),
+    dplyr::all_of(outcome)
+  )
+  current_df <- dplyr::rename(current_df, outcome_cur = {{ outcome }})
+
+  result <- dplyr::inner_join(prev_df, current_df, by = key_cols)
+  # `,` replaces `&` for dplyr syntax
+  result <- dplyr::filter(result, !is.na(outcome_prev), is.na(outcome_cur))
+
+  result
+}
+
+#' Explore significant changes in deaths and DALYs
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+generate_diffs <- function(
+  prev_df,
+  curr_df,
+  interest_cols,
+  key_cols,
+  touchstone = TOUCHSTONE_OLD
+) {
+  # TODO: replace use of `pars$touchstone_old` with arg `touchstone`
+  #fix for erroneous duplicated YF data in 201910 dataset
+  if (identical(touchstone, TOUCHSTONE_OLD)) {
+    prev_df <- dplyr::filter(
+      prev_df,
+      !(.data$disease == "YF" &
+        .data$support_type == "other" &
+        .data$coverage == 0)
+    )
+  }
+
+  # Fix for multiple campaigns per year (i.e. not true duplicates)
+  # only applicable for 2019 true non-duplicates.
+  prev_df <- add_campaign_id(prev_df, key_cols)
+  curr_df <- add_campaign_id(curr_df, key_cols)
+
+  diff_keys <- c(key_cols, "campaign_id")
+  cols_needed <- unique(c(diff_keys, interest_cols))
+
+  diff <- diffdf::diffdf(
+    prev_df[, cols_needed],
+    curr_df[, cols_needed],
+    keys = diff_keys
+  )
+
+  changes <- stats::setNames(
+    lapply(interest_cols, function(v) {
+      nm <- glue::glue("VarDiff_{v}")
+      if (nm %in% names(diff)) diff[[nm]] else NULL
+    }),
+    interest_cols
+  )
+
+  changes
+}
+
+#' Generate IQR for key outcomes
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+gen_national_iqr <- function(
+  df,
+  group_cols,
+  value_cols,
+  prefix = "national_iqr_"
+) {
+  df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(group_cols)))
+  df <- dplyr::summarise(
+    df,
+    dplyr::across(
+      dplyr::all_of(value_cols),
+      function(x) {
+        IQR(x, na.rm = TRUE)
+      },
+      .names = "{prefix}{.col}"
+    ),
+    .groups = "drop"
+  )
+}
+
+#' Flag significant changes
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+flag_large_diffs <- function(
+  changes_list,
+  iqr_df,
+  variable = c("deaths_averted", "dalys_averted"),
+  group_cols,
+  threshold
+) {
+  # TODO: input checking
+  variable <- rlang::arg_match(variable)
+
+  iqr_col <- glue::glue("national_iqr_{variable}")
+
+  df <- dplyr::mutate(
+    changes_list[[variable]],
+    diff = .data$COMPARE - .data$BASE
+  )
+
+  iqr_df <- dplyr::select(
+    iqr_df,
+    dplyr::all_of(group_cols),
+    dplyr::all_of(iqr_col)
+  )
+
+  df <- left_join(
+    df,
+    iqr_df,
+    by = group_cols
+  )
+
+  df <- dplyr::mutate(
+    df,
+    flag = abs(.data$diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0
+  )
+
+  df <- dplyr::filter(df, .data$flag)
+
+  cols_to_select <- c(
+    "country",
+    "country_name",
+    "year",
+    "vaccine",
+    "modelling_group",
+    "activity_type",
+    "BASE",
+    "COMPARE",
+    "diff"
+  )
+
+  df <- dplyr::select(
+    df,
+    {{ cols_to_select }}
+  )
+
+  # TODO: replace `old` and `new` with defined objs --- see scratch.R
+  # unsure why this syntax was used
+  df <- dplyr::rename(
+    df,
+    !!as.character(old) := BASE,
+    !!as.character(new) := COMPARE
+  )
+
+  dplyr::arrange(df, dplyr::desc(diff))
+}
+
+#' Generate combined df
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
+  # TODO: input checks
+  # TODO: df2 needs a better name
+  prev_df <- dplyr::select(prev_dat, {{ interest_cols }})
+  cur_df <- dplyr::select(df2, {{ interest_cols }})
+
+  combined <- dplyr::full_join(
+    prev_df,
+    cur_df,
+    by = key_cols,
+    suffix = c("_old", "_new")
+  )
+
+  dplyr::select(
+    combined,
+    country,
+    country_name,
+    disease,
+    vaccine,
+    activity_type,
+    year,
+    modelling_group,
+    deaths_averted_old,
+    deaths_averted_new,
+    dalys_averted_old,
+    dalys_averted_new
+  )
+}
+
+### Subregional v national estimate comparison
+compare_national_to_subregional <- function(
+  df,
+  outcome,
+  activity_filter,
+  threshold
+) {
+  df <- dplyr::filter(df, activity_type == activity_filter)
+  df <- dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome)
+
+  results <- purrr::map_dfr(outcome, function(otc) {
+    subregional_summary <-
+      dplyr::group_by(df, subregion, disease, activity_type)
+
+    subregional_summary <- dplyr::summarise(
+      subregional_summary,
+      subregional_mean = mean(.data[[otc]], na.rm = TRUE),
+      subregional_iqr = IQR(.data[[otc]], na.rm = TRUE),
+      .groups = "drop"
+    )
+
+    national_summary <-
+      dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome)
+    national_summary <- dplyr::rename(
+      national_summary,
+      national_value = !!outcome
+    )
+
+    comparison <- dplyr::left_join(
+      national_summary,
+      subregional_summary,
+      by = c("subregion", "disease")
+    )
+    comparison <- dplyr::mutate(
+      comparison,
+      outcome = outcome,
+      difference = .data$national_value - .data$subregional_mean,
+      iqr_score = abs(.data$difference) / .data$subregional_iqr
+    )
+
+    dynamic_threshold <- stats::quantile(
+      comparison$iqr_score,
+      0.99,
+      na.rm = TRUE
+    )
+
+    comparison <- dplyr::mutate(
+      comparison,
+      flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0
+    )
+    comparison <- dplyr::filter(comparison, .data$flag_iqr)
+    comparison <- dplyr::select(
+      comparison,
+      country_name,
+      vaccine,
+      year,
+      modelling_group,
+      national_value,
+      subregional_mean,
+      subregional_iqr,
+      difference,
+      iqr_score
+    )
+    comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score))
+
+    comparison
+  })
+
+  results
+}
+
+# TODO: reconsider function name, add explicit arguments
+#' Modelling group variations
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+save_outputs <- function() {
+  saveRDS(
+    round_numeric(
+      missing_in_current %>%
+        dplyr::select(dplyr::all_of(c(
+          "country_name",
+          "vaccine",
+          "activity_type",
+          "year",
+          "modelling_group"
+        )))
+    ),
+    "outputs/missing_in_current.rds"
+  )
+  saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds")
+  saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds")
+  saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds")
+  saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds")
+  saveRDS(
+    round_numeric(subregional_flags_deaths_camp),
+    "outputs/subregional_flags_deaths_camp.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_deaths_rout),
+    "outputs/subregional_flags_deaths_rout.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_dalys_camp),
+    "outputs/subregional_flags_dalys_camp.rds"
+  )
+  saveRDS(
+    round_numeric(subregional_flags_dalys_rout),
+    "outputs/subregional_flags_dalys_rout.rds"
+  )
+}

From 8aa8494dfab033d9d4e70092f1b2237343ccf6af Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Fri, 13 Mar 2026 12:15:14 +0000
Subject: [PATCH 05/17] Rename files

---
 ..._diagnostics.R => fn_burden_diagnostics.R} |   0
 R/{helpers.R => fn_helpers.R}                 |   0
 ...ing.R => fn_plotting_burden_diagnostics.R} |   0
 ...ing_prep.R => fn_plotting_prep_bur_diag.R} |   0
 R/pressure_testing.R                          | 291 ------------------
 5 files changed, 291 deletions(-)
 rename R/{burden_diagnostics.R => fn_burden_diagnostics.R} (100%)
 rename R/{helpers.R => fn_helpers.R} (100%)
 rename R/{plotting.R => fn_plotting_burden_diagnostics.R} (100%)
 rename R/{plotting_prep.R => fn_plotting_prep_bur_diag.R} (100%)
 delete mode 100644 R/pressure_testing.R

diff --git a/R/burden_diagnostics.R b/R/fn_burden_diagnostics.R
similarity index 100%
rename from R/burden_diagnostics.R
rename to R/fn_burden_diagnostics.R
diff --git a/R/helpers.R b/R/fn_helpers.R
similarity index 100%
rename from R/helpers.R
rename to R/fn_helpers.R
diff --git a/R/plotting.R b/R/fn_plotting_burden_diagnostics.R
similarity index 100%
rename from R/plotting.R
rename to R/fn_plotting_burden_diagnostics.R
diff --git a/R/plotting_prep.R b/R/fn_plotting_prep_bur_diag.R
similarity index 100%
rename from R/plotting_prep.R
rename to R/fn_plotting_prep_bur_diag.R
diff --git a/R/pressure_testing.R b/R/pressure_testing.R
deleted file mode 100644
index d73c08f..0000000
--- a/R/pressure_testing.R
+++ /dev/null
@@ -1,291 +0,0 @@
-### All functions for pressure testing
-
-# Flexible rounding
-
-# Fix for scenario_type variable being included from 202310 onwards
-filter_recent_ts <- function(df, threshold = 202310) {
-  touchstone_year <- unique(df$touchstone)
-
-  # TODO: check that touchstone year is 6 digit - can there be more digits?
-  ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R
-
-  if (ts_number >= threshold) {
-    dplyr::filter(
-      df,
-      scenario_type == "default"
-    )
-  } else {
-    df
-  }
-}
-
-# Helper for removing excluded diseases post-202110
-filter_excluded_diseases_ts <- function(df, threshold = 202110) {
-  touchstone_year <- unique(df$touchstone)
-  ts_number <- str_as_ts_year(touchstone_year)
-
-  if (ts_number <= threshold) {
-    filter(df, !disease %in% exclude_dis)
-  } else {
-    df
-  }
-}
-
-# Identify duplicates
-flag_duplicates <- function(df, key_cols) {
-  df <- dplyr::add_count(
-    df,
-    dplyr::across(dplyr::all_of(key_cols)),
-    name = "n_key"
-  )
-
-  filter(df, n_key > 1)
-}
-
-# Identify rows where deaths_averted went from non-NA to NA
-comparison_prev <- function(df, prev_dat, outcome) {
-  prev_df <- select(prev_data, all_of(key_cols), all_of(outcome))
-  prev_df <- rename(prev_df, outcome_prev = !!sym(outcome))
-
-  current_df <- select(current_df, all_of(key_cols), all_of(outcome))
-  current_df <- rename(current_df, outcome_cur = !!sym(outcome))
-
-  result <- inner_join(prev_df, current_df, by = key_cols)
-  result <- filter(result, !is.na(outcome_prev) & is.na(outcome_cur))
-
-  result
-}
-
-# Explore significant changes in key outcomes (i.e. deaths/dalys)
-generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) {
-  #fix for erroneous duplicated YF data in 201910 dataset
-  if (identical(pars$touchstone_old, TOUCHSTONE_OLD)) {
-    prev_df <- filter(
-      prev_df,
-      !(disease == "YF" & support_type == "other" & coverage == 0)
-    )
-  }
-
-  # Fix for multiple campaigns per year (i.e. not true duplicates)
-  # only applicable for 2019 true non-duplicates.
-  prev_df <- add_campaign_id(prev_df, key_cols)
-  curr_df <- add_campaign_id(curr_df, key_cols)
-
-  diff_keys <- c(key_cols, "campaign_id")
-  cols_needed <- unique(c(diff_keys, interest_cols))
-
-  diff <- diffdf::diffdf(
-    prev_df[, cols_needed],
-    curr_df[, cols_needed],
-    keys = diff_keys
-  )
-
-  changes <- setNames(
-    lapply(interest_cols, function(v) {
-      nm <- paste0("VarDiff_", v)
-      if (nm %in% names(diff)) diff[[nm]] else NULL
-    }),
-    interest_cols
-  )
-
-  changes
-}
-
-# Generate IQR for key outcomes - for threshold of "significant"
-gen_national_iqr <- function(
-  df,
-  group_cols,
-  value_cols,
-  prefix = "national_iqr_"
-) {
-  df <- group_by(df, across(all_of(group_cols)))
-  df <- summarise(
-    df,
-    across(
-      all_of(value_cols),
-      \(x) IQR(x, na.rm = TRUE),
-      .names = "{prefix}{.col}"
-    ),
-    .groups = "drop"
-  )
-}
-
-# TODO: I don't like how this looks - this should probably be a simpler
-# functional that maps over a list in a separate function
-## Flag significant changes
-flag_large_diffs <- function(
-  changes_list,
-  iqr_df,
-  variable,
-  group_cols,
-  threshold
-) {
-  iqr_col <- paste0("national_iqr_", variable)
-
-  # returns a list so that the function can accept multiple variables
-  lapply(
-    changes_list[[variable]],
-    temp_fn,
-    iqr_df,
-    variable,
-    group_cols,
-    threshold
-  )
-}
-
-temp_fn <- function(df, iqr_df, variable, group_cols, threshold) {
-  mutate(
-    df,
-    diff = COMPARE - BASE
-  ) %>%
-    left_join(
-      iqr_df %>% select(all_of(group_cols), all_of(iqr_col)),
-      by = group_cols
-    ) %>%
-    mutate(
-      flag = abs(diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0
-    ) %>%
-    filter(flag) %>%
-    select(
-      country,
-      country_name,
-      year,
-      vaccine,
-      modelling_group,
-      activity_type,
-      BASE,
-      COMPARE,
-      diff
-    ) %>%
-    rename(!!as.character(old) := BASE, !!as.character(new) := COMPARE) %>%
-    arrange(desc(diff))
-}
-
-### Generate combined df
-gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
-  prev_df <- prev_dat[, interest_cols]
-  cur_df <- df2[, interest_cols]
-
-  combined <- full_join(
-    prev_df,
-    cur_df,
-    by = key_cols,
-    suffix = c("_old", "_new")
-  )
-
-  combined <- combined %>%
-    select(
-      country,
-      country_name,
-      disease,
-      vaccine,
-      activity_type,
-      year,
-      modelling_group,
-      deaths_averted_old,
-      deaths_averted_new,
-      dalys_averted_old,
-      dalys_averted_new
-    )
-  return(combined)
-}
-
-### Subregional v national estimate comparison
-compare_national_to_subregional <- function(
-  df,
-  outcome,
-  activity_filter,
-  threshold
-) {
-  df <- filter(df, activity_type == activity_filter)
-  df <- select(df, all_of(key_cols), subregion, !!outcome)
-
-  results <- purrr::map_dfr(outcome, function(otc) {
-    subregional_summary <-
-      group_by(df, subregion, disease, activity_type)
-    subregional_summary <- summarise(
-      subregional_summary,
-      subregional_mean = mean(.data[[otc]], na.rm = TRUE),
-      subregional_iqr = IQR(.data[[otc]], na.rm = TRUE),
-      .groups = "drop"
-    )
-
-    national_summary <-
-      select(df, all_of(key_cols), subregion, !!outcome)
-    national_summary <- rename(national_summary, national_value = !!outcome)
-
-    comparison <- left_join(
-      national_summary,
-      subregional_summary,
-      by = c("subregion", "disease")
-    )
-    comparison <- mutate(
-      comparison,
-      outcome = outcome,
-      difference = national_value - subregional_mean,
-      iqr_score = abs(difference) / subregional_iqr
-    )
-
-    dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE)
-
-    comparison <- mutate(
-      comparison,
-      flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0
-    )
-    comparison <- filter(comparison, flag_iqr)
-    comparison <- select(
-      comparison,
-      country_name,
-      vaccine,
-      year,
-      modelling_group,
-      national_value,
-      subregional_mean,
-      subregional_iqr,
-      difference,
-      iqr_score
-    )
-    comparison <- arrange(comparison, desc(iqr_score))
-
-    comparison
-  })
-
-  results
-}
-
-### Modelling group variations
-save_outputs <- function() {
-  saveRDS(
-    round_numeric(
-      missing_in_current %>%
-        select(all_of(c(
-          "country_name",
-          "vaccine",
-          "activity_type",
-          "year",
-          "modelling_group"
-        )))
-    ),
-    "outputs/missing_in_current.rds"
-  )
-  saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds")
-  saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds")
-  saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds")
-  saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds")
-  saveRDS(
-    round_numeric(subregional_flags_deaths_camp),
-    "outputs/subregional_flags_deaths_camp.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_deaths_rout),
-    "outputs/subregional_flags_deaths_rout.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_dalys_camp),
-    "outputs/subregional_flags_dalys_camp.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_dalys_rout),
-    "outputs/subregional_flags_dalys_rout.rds"
-  )
-}

From de1eda658e8402d12a0624e716f797ecac0fdeed Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Mon, 20 Apr 2026 16:55:14 +0100
Subject: [PATCH 06/17] Add checks and docs for pressure testing fns

---
 R/fn_pressure_testing.R | 683 ++++++++++++++++++++++++++++++----------
 1 file changed, 523 insertions(+), 160 deletions(-)

diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R
index 140cc79..9f25309 100644
--- a/R/fn_pressure_testing.R
+++ b/R/fn_pressure_testing.R
@@ -1,14 +1,50 @@
-#' Fix for scenario_type variable being included from 202310 onwards
+#' Filter data for touchstones or diseases
+#'
+#' @name pres_test_filter_data
+#' @rdname pres_test_filter_data
+#'
+#' @description
+#' A pair of helper functions allowing filtering out of recent touchstone values
+#' and excluded diseases.
+#'
+#' @param df A `<data.frame>` holding impact data. This data.frame is not
+#' checked for contents
+#'
+#' @param threshold A six-digit number that is checked as a valid touchstone
+#' identifier (YYYYMM format) using [validate_ts_year()].
 #'
 #' @keywords pressure_testing
 #'
+#' @return A filtered `<data.frame>`.
+#'
+#' - `filter_recent_ts()` returns `df` with rows where the touchstone condition
+#' is not met excluded.
+#'
+#' - `filter_excluded_diseases_ts()` returns `df` with rows where rows relating
+#' to the [EXCLUDED_DISEASES], when the touchstone year in `df` is less than the
+#' `threshold`, excluded.
+#'
+#' - `filter_duplicates()` returns `df` with duplicated combinations of
+#' `key_cols` removed.
+#'
+#' - `filter_invalid_trajectories()` returns `df` with bad outcome trajectories
+#' (`NA` to non-`NA`) removed.
+#'
 #' @export
 filter_recent_ts <- function(df, threshold = 202310) {
-  touchstone_year <- unique(df$touchstone)
+  checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L)
+  checkmate::assert_names(
+    names(df),
+    must.include = "touchstone"
+  )
+  threshold <- validate_ts_year(threshold) # apply same rule as data ts year
 
-  # TODO: check that touchstone year is 6 digit - can there be more digits?
-  ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R
+  touchstone_year <- unique(df[["touchstone"]])
 
+  ts_number <- validate_ts_year(touchstone_year) # see R/helpers.R
+
+  # NOTE: consider converting to Date and checking - numeric comparison
+  # works okay for now
   if (ts_number >= threshold) {
     dplyr::filter(
       df,
@@ -19,12 +55,20 @@ filter_recent_ts <- function(df, threshold = 202310) {
   }
 }
 
-#' Helper for removing excluded diseases post-202110
+#' @name pres_test_filter_data
 #'
 #' @export
 filter_excluded_diseases_ts <- function(df, threshold = 202110) {
+  checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L)
+  checkmate::assert_names(
+    names(df),
+    must.include = "touchstone"
+  )
+
+  threshold <- validate_ts_year(threshold)
+
   touchstone_year <- unique(df$touchstone)
-  ts_number <- str_as_ts_year(touchstone_year)
+  ts_number <- validate_ts_year(touchstone_year)
 
   if (ts_number <= threshold) {
     dplyr::filter(df, !.data$disease %in% EXCLUDED_DISEASES)
@@ -33,10 +77,27 @@ filter_excluded_diseases_ts <- function(df, threshold = 202110) {
   }
 }
 
-#' Identify duplicates
+#' @name pres_test_filter_data
+#'
+#' @param key_cols Key columns in `df` to check for duplicates.
 #'
 #' @export
-flag_duplicates <- function(df, key_cols) {
+filter_duplicates <- function(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) {
+  checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L)
+  checkmate::assert_character(key_cols)
+
+  has_cols <- checkmate::test_names(
+    colnames(df),
+    must.include = key_cols
+  )
+  if (!has_cols) {
+    missing_cols <- setdiff(colnames(df), key_cols) # nolint used in cli
+    cli::cli_abort(
+      "Expected {.code df} to have columns {.str {key_cols}}, but columns \
+    {.str {.strong {missing_cols}}} were missing!"
+    )
+  }
+
   df <- dplyr::add_count(
     df,
     dplyr::across(dplyr::all_of(key_cols)),
@@ -46,46 +107,136 @@ flag_duplicates <- function(df, key_cols) {
   dplyr::filter(df, .data$n_key > 1)
 }
 
-#' Identify rows where deaths_averted went from non-NA to NA
+#' @name pres_test_filter_data
+#'
+#' @param prev_data A `<data.frame>` holding data from a previous touchstone for
+#' the same scenarios as `df`.
+#'
+#' @param outcome A string giving the outcome of interest; may be one of
+#' `"deaths_averted"` or `"dalys_averted"`.
 #'
 #' @export
-comparison_prev <- function(df, prev_data, outcome) {
+filter_invalid_trajectories <- function(
+  df,
+  prev_data,
+  outcome = c("deaths_averted", "dalys_averted")
+) {
+  checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L)
+
+  # TODO: can we assume prev_data is at least the size of df?
+  checkmate::assert_data_frame(
+    prev_data,
+    min.cols = ncol(df),
+    min.rows = nrow(df)
+  )
+
+  outcome <- rlang::arg_match(outcome)
+
+  has_cols <- checkmate::test_names(
+    colnames(df),
+    must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome)
+  )
+  if (!has_cols) {
+    missing_cols <- setdiff(colnames(df), COLNAMES_KEY_PRESSURE_TEST) # nolint
+    cli::cli_abort(
+      "Expected {.code df} to have columns \
+      {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \
+      {.str {.strong {missing_cols}}} were missing!"
+    )
+  }
+
+  has_cols <- checkmate::test_names(
+    colnames(prev_data),
+    must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome)
+  )
+  if (!has_cols) {
+    missing_cols <- setdiff(colnames(prev_data), COLNAMES_KEY_PRESSURE_TEST)
+    cli::cli_abort(
+      "Expected {.code prev_data} to have columns \
+      {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \
+      {.str {.strong {missing_cols}}} were missing!"
+    )
+  }
+
   prev_df <- dplyr::select(
     prev_data,
-    dplyr::all_of(key_cols),
+    dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST),
     dplyr::all_of(outcome)
   )
   prev_df <- dplyr::rename(prev_df, outcome_prev = {{ outcome }})
 
   current_df <- dplyr::select(
     df,
-    dplyr::all_of(key_cols),
+    dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST),
     dplyr::all_of(outcome)
   )
   current_df <- dplyr::rename(current_df, outcome_cur = {{ outcome }})
 
-  result <- dplyr::inner_join(prev_df, current_df, by = key_cols)
-  # `,` replaces `&` for dplyr syntax
-  result <- dplyr::filter(result, !is.na(outcome_prev), is.na(outcome_cur))
+  result <- dplyr::inner_join(
+    prev_df,
+    current_df,
+    by = COLNAMES_KEY_PRESSURE_TEST
+  )
 
-  result
+  # `,` replaces `&` for dplyr syntax
+  dplyr::filter(
+    result,
+    !is.na(.data$outcome_prev),
+    is.na(.data$outcome_cur)
+  )
 }
 
 #' Explore significant changes in deaths and DALYs
 #'
+#' @param prev_df A `<data.frame>` of impact estimates from the previous
+#' touchstone.
+#'
+#' @param curr_df A `<data.frame>` of impact estimates for the current
+#' touchstone.
+#'
+#' @param interest_cols A character vector of columns to check for differences.
+#' Defaults to [COLNAMES_INTEREST_PRESSURE_TEST].
+#'
+#' @param key_cols A character vector of columns to use when assigning campaign
+#' identifiers. Passed to [add_campaign_id()], defaults to
+#' [COLNAMES_KEY_PRESSURE_TEST].
+#'
+#' @param touchstone A six character string that can be converted to a six digit
+#' numeric giving a touchstone identifier in `YYYYMM` format.
+#'
+#' @return A list of data.frames of differences between `prev_df` and `curr_df`,
+#' with one list element per element of `interest_cols`.
+#'
 #' @keywords pressure_testing
 #'
 #' @export
 generate_diffs <- function(
   prev_df,
   curr_df,
-  interest_cols,
-  key_cols,
-  touchstone = TOUCHSTONE_OLD
+  interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
+  key_cols = COLNAMES_KEY_PRESSURE_TEST,
+  touchstone = DEF_TOUCHSTONE_OLD
 ) {
-  # TODO: replace use of `pars$touchstone_old` with arg `touchstone`
-  #fix for erroneous duplicated YF data in 201910 dataset
-  if (identical(touchstone, TOUCHSTONE_OLD)) {
+  checkmate::assert_data_frame(prev_df, min.rows = 1L, min.cols = 1L)
+  checkmate::assert_data_frame(curr_df, min.rows = 1L, min.cols = 1L)
+
+  checkmate::assert_character(interest_cols, min.len = 1L)
+  checkmate::assert_character(key_cols, min.len = 1L)
+
+  # check interest cols in dfs. key cols are check in `add_campaign_id`
+  checkmate::assert_names(
+    colnames(prev_df),
+    interest_cols
+  )
+  checkmate::assert_names(
+    colnames(curr_df),
+    interest_cols
+  )
+
+  touchstone <- validate_ts_year(touchstone)
+
+  # fix for erroneous duplicated YF data in 201910 dataset
+  if (touchstone == DEF_TOUCHSTONE_OLD) {
     prev_df <- dplyr::filter(
       prev_df,
       !(.data$disease == "YF" &
@@ -100,9 +251,9 @@ generate_diffs <- function(
   curr_df <- add_campaign_id(curr_df, key_cols)
 
   diff_keys <- c(key_cols, "campaign_id")
-  cols_needed <- unique(c(diff_keys, interest_cols))
+  cols_needed <- union(diff_keys, interest_cols)
 
-  diff <- diffdf::diffdf(
+  df_diff <- diffdf::diffdf(
     prev_df[, cols_needed],
     curr_df[, cols_needed],
     keys = diff_keys
@@ -111,7 +262,7 @@ generate_diffs <- function(
   changes <- stats::setNames(
     lapply(interest_cols, function(v) {
       nm <- glue::glue("VarDiff_{v}")
-      if (nm %in% names(diff)) diff[[nm]] else NULL
+      if (nm %in% names(df_diff)) df_diff[[nm]] else NULL
     }),
     interest_cols
   )
@@ -123,28 +274,89 @@ generate_diffs <- function(
 #'
 #' @keywords pressure_testing
 #'
+#' @param df A data.frame of impact estimates.
+#'
+#' @param group_cols A character vector of grouping columns. Defaults to
+#' "country", "vaccine", "activity_type".
+#'
+#' @param value_cols A character vector of value columns. Defaults to
+#' "deaths_averted" and "dalys_averted".
+#'
+#' @param prefix A string for the prefix applied to every IQR summary column.
+#' Defaults to "national_iqr".
+#'
+#' @return A `<data.frame>` with the inter-quartile range of the columns
+#' in `value_cols`, with the column name constructed as `{prefix}_{value_col}`
+#' using string interpolation.
+#'
 #' @export
 gen_national_iqr <- function(
   df,
-  group_cols,
-  value_cols,
-  prefix = "national_iqr_"
+  group_cols = c("country", "vaccine", "activity_type"),
+  value_cols = c("deaths_averted", "dalys_averted"),
+  prefix = "national_iqr"
 ) {
+  checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L)
+  checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE)
+
+  # NOTE: restricting value columns to deaths and dalys averted
+  value_cols <- rlang::arg_match(
+    value_cols,
+    c("deaths_averted", "dalys_averted"),
+    multiple = TRUE
+  )
+
+  checkmate::assert_string(prefix)
+
+  checkmate::assert_names(
+    colnames(df),
+    must.include = union(group_cols, value_cols)
+  )
+
   df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(group_cols)))
   df <- dplyr::summarise(
     df,
     dplyr::across(
       dplyr::all_of(value_cols),
       function(x) {
-        IQR(x, na.rm = TRUE)
+        stats::IQR(x, na.rm = TRUE)
       },
-      .names = "{prefix}{.col}"
+      .names = "{prefix}_{.col}"
     ),
     .groups = "drop"
   )
 }
 
-#' Flag significant changes
+#' Flag significant changes in impact estimates
+#'
+#' @description Calculates and flags whether the difference in impact estimates
+#' between touchstones is greater than expected. A row is flagged if the
+#' difference is greater than `threshold` \eqn{\times} the inter-quartile range
+#' for cases where the IQR is greater than zero.
+#'
+#' @param changes_list A list of data.frames with one element per variable of
+#' interest (see `variable`). Usually generated using [generate_diffs()].
+#'
+#' @param iqr_df A data.frame of inter-quartile differences generated using
+#' [gen_national_iqr()].
+#'
+#' @param variable A string specifying the variable of interest. Must be one of
+#' "deaths_averted" and "dalys_averted", and must be present as a name and
+#' element of `changes_list`.
+#'
+#' @inheritParams gen_national_iqr
+#'
+#' @param threshold A single numeric value for the IQR multiplier. Defaults to
+#' 100.
+#'
+#' @param touchstone_old The previous touchstone identifier. Defaults to
+#' [DEF_TOUCHSTONE_OLD_OLD].
+#'
+#' @param touchstone_new The new touchstone identifier. Defaults to
+#' [DEF_TOUCHSTONE_NEW].
+#'
+#' @return A filtered data.frame of differences in impact estimates flagged
+#' as too large. Rows with differences within tolerance are removed.
 #'
 #' @keywords pressure_testing
 #'
@@ -153,16 +365,46 @@ flag_large_diffs <- function(
   changes_list,
   iqr_df,
   variable = c("deaths_averted", "dalys_averted"),
-  group_cols,
-  threshold
+  group_cols = c("country", "vaccine", "activity_type"),
+  threshold = 100,
+  touchstone_old = DEF_TOUCHSTONE_OLD_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
 ) {
-  # TODO: input checking
+  checkmate::assert_list(changes_list, "data.frame")
+  checkmate::assert_data_frame(iqr_df, min.rows = 1L, min.cols = 1L)
+
   variable <- rlang::arg_match(variable)
+  checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE)
+
+  # TODO: check what a sensible upper limit might be
+  checkmate::assert_number(threshold, lower = 1.0, finite = TRUE)
+
+  touchstone_old <- validate_ts_year(touchstone_old)
+  touchstone_new <- validate_ts_year(touchstone_new)
+
+  # cross checking
+  has_var <- variable %in% names(changes_list)
+  if (!has_var) {
+    cli::cli_abort(
+      "Expected list {.code changes_list} to have an element with the name \
+    {.str {variable}}, but it does not."
+    )
+  }
+  df_compare <- changes_list[[variable]]
+
+  checkmate::assert_names(
+    colnames(df_compare),
+    must.include = group_cols
+  )
+  checkmate::assert_names(
+    colnames(iqr_df),
+    must.include = group_cols
+  )
 
   iqr_col <- glue::glue("national_iqr_{variable}")
 
-  df <- dplyr::mutate(
-    changes_list[[variable]],
+  df_compare <- dplyr::mutate(
+    df_compare,
     diff = .data$COMPARE - .data$BASE
   )
 
@@ -172,18 +414,18 @@ flag_large_diffs <- function(
     dplyr::all_of(iqr_col)
   )
 
-  df <- left_join(
-    df,
+  df_compare <- dplyr::left_join(
+    df_compare,
     iqr_df,
     by = group_cols
   )
 
-  df <- dplyr::mutate(
-    df,
+  df_compare <- dplyr::mutate(
+    df_compare,
     flag = abs(.data$diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0
   )
 
-  df <- dplyr::filter(df, .data$flag)
+  df_compare <- dplyr::filter(df_compare, .data$flag)
 
   cols_to_select <- c(
     "country",
@@ -197,29 +439,59 @@ flag_large_diffs <- function(
     "diff"
   )
 
-  df <- dplyr::select(
-    df,
+  df_compare <- dplyr::select(
+    df_compare,
     {{ cols_to_select }}
   )
 
-  # TODO: replace `old` and `new` with defined objs --- see scratch.R
-  # unsure why this syntax was used
-  df <- dplyr::rename(
-    df,
-    !!as.character(old) := BASE,
-    !!as.character(new) := COMPARE
+  rename_lookup <- c("BASE", "COMPARE")
+  names(rename_lookup) <- c(
+    as.character(touchstone_old),
+    as.character(touchstone_new)
+  )
+  df_compare <- dplyr::rename(
+    df_compare,
+    rename_lookup
   )
 
-  dplyr::arrange(df, dplyr::desc(diff))
+  dplyr::arrange(df_compare, dplyr::desc(diff))
 }
 
-#' Generate combined df
+#' Combine and align data from two touchstones
+#'
+#' @description
+#' Generates a full join of two data.frames, selecting for columns of interest.
+#'
+#' @param prev_dat A data.frame of impact estimates corresponding to an earlier
+#' touchstone.
+#'
+#' @param df2 A data.frame of impact estimates corresponding to a more recent
+#' touchstone.
+#'
+#' @param interest_cols A character vector of columns of interest. Defaults to
+#' [COLNAMES_INTEREST_PRESSURE_TEST].
+#'
+#' @param key_cols A character vector of columns of interest. Defaults to
+#' [COLNAMES_KEY_PRESSURE_TEST].
+#'
+#' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns
+#' are disambiguated with the suffixes `"_old"` and `"_new"`.
 #'
 #' @keywords pressure_testing
 #'
 #' @export
-gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
-  # TODO: input checks
+gen_combined_df <- function(
+  prev_dat,
+  df2,
+  interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
+  key_cols = COLNAMES_KEY_PRESSURE_TEST
+) {
+  checkmate::assert_data_frame(
+    prev_dat,
+    min.cols = 1L,
+    min.rows = 1L
+  )
+
   # TODO: df2 needs a better name
   prev_df <- dplyr::select(prev_dat, {{ interest_cols }})
   cur_df <- dplyr::select(df2, {{ interest_cols }})
@@ -231,131 +503,222 @@ gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) {
     suffix = c("_old", "_new")
   )
 
+  cols_to_select <- c(
+    "country",
+    "country_name",
+    "disease",
+    "vaccine",
+    "activity_type",
+    "year",
+    "modelling_group",
+    "deaths_averted_old",
+    "deaths_averted_new",
+    "dalys_averted_old",
+    "dalys_averted_new"
+  )
+
   dplyr::select(
     combined,
-    country,
-    country_name,
-    disease,
-    vaccine,
-    activity_type,
-    year,
-    modelling_group,
-    deaths_averted_old,
-    deaths_averted_new,
-    dalys_averted_old,
-    dalys_averted_new
+    {{ cols_to_select }}
   )
 }
 
-### Subregional v national estimate comparison
-compare_national_to_subregional <- function(
+#' Compare sub-regional and national estimates
+#'
+#' @param df A data.frame with sub-region level data on vaccination impact
+#' outcomes.
+#'
+#' @param outcome A string for the outcome of interest. May be one of
+#' `"deaths_averted_rate"` or `"dalys_averted_rate"`.
+#'
+#' @param activity_filter A string for the type of vaccination activity. May be
+#' one of `"campaign"` or `"routine"`.
+#'
+#' @return A data.frame of sub-regional vaccination impact estimates where the
+#' impact is considered to be outside the tolerance limit.
+#'
+#' @keywords pressure_testing
+#'
+#' @export
+compare_natl_subreg <- function(
   df,
-  outcome,
-  activity_filter,
-  threshold
+  outcome = c("deaths_averted_rate", "dalys_averted_rate"),
+  activity_filter = c("campaign", "routine")
 ) {
-  df <- dplyr::filter(df, activity_type == activity_filter)
-  df <- dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome)
-
-  results <- purrr::map_dfr(outcome, function(otc) {
-    subregional_summary <-
-      dplyr::group_by(df, subregion, disease, activity_type)
-
-    subregional_summary <- dplyr::summarise(
-      subregional_summary,
-      subregional_mean = mean(.data[[otc]], na.rm = TRUE),
-      subregional_iqr = IQR(.data[[otc]], na.rm = TRUE),
-      .groups = "drop"
-    )
+  df <- dplyr::filter(df, .data$activity_type == activity_filter)
+  df <- dplyr::select(
+    df,
+    dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST),
+    "subregion",
+    !!outcome
+  )
 
-    national_summary <-
-      dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome)
-    national_summary <- dplyr::rename(
-      national_summary,
-      national_value = !!outcome
-    )
+  # first get national summary
+  national_summary <- dplyr::select(
+    df,
+    dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST),
+    .data$subregion,
+    !!outcome
+  )
+  national_summary <- dplyr::rename(
+    national_summary,
+    national_value = !!outcome
+  )
 
-    comparison <- dplyr::left_join(
-      national_summary,
-      subregional_summary,
-      by = c("subregion", "disease")
-    )
-    comparison <- dplyr::mutate(
-      comparison,
-      outcome = outcome,
-      difference = .data$national_value - .data$subregional_mean,
-      iqr_score = abs(.data$difference) / .data$subregional_iqr
-    )
+  # next get sub-regional summary
+  subregional_summary <-
+    dplyr::group_by(df, .data$subregion, .data$disease, .data$activity_type)
 
-    dynamic_threshold <- stats::quantile(
-      comparison$iqr_score,
-      0.99,
-      na.rm = TRUE
-    )
+  subregional_summary <- dplyr::summarise(
+    subregional_summary,
+    subregional_mean = mean(.data[[outcome]], na.rm = TRUE),
+    subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE),
+    .groups = "drop"
+  )
 
-    comparison <- dplyr::mutate(
-      comparison,
-      flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0
-    )
-    comparison <- dplyr::filter(comparison, .data$flag_iqr)
-    comparison <- dplyr::select(
-      comparison,
-      country_name,
-      vaccine,
-      year,
-      modelling_group,
-      national_value,
-      subregional_mean,
-      subregional_iqr,
-      difference,
-      iqr_score
-    )
-    comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score))
+  comparison <- dplyr::left_join(
+    national_summary,
+    subregional_summary,
+    by = c("subregion", "disease")
+  )
+  comparison <- dplyr::mutate(
+    comparison,
+    outcome = outcome,
+    difference = .data$national_value - .data$subregional_mean,
+    iqr_score = abs(.data$difference) / .data$subregional_iqr
+  )
 
-    comparison
-  })
+  dynamic_threshold <- stats::quantile(
+    comparison$iqr_score,
+    0.99,
+    na.rm = TRUE
+  )
 
-  results
+  comparison <- dplyr::mutate(
+    comparison,
+    flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0
+  )
+  comparison <- dplyr::filter(comparison, .data$flag_iqr)
+
+  cols_to_select <- c(
+    "country_name",
+    "vaccine",
+    "year",
+    "modelling_group",
+    "national_value",
+    "subregional_mean",
+    "subregional_iqr",
+    "difference",
+    "iqr_score"
+  )
+  comparison <- dplyr::select(comparison, {{ cols_to_select }})
+  comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score))
+
+  comparison
 }
 
-# TODO: reconsider function name, add explicit arguments
-#' Modelling group variations
+#' Save pressure-testing diagnostics to local file
+#'
+#' @description
+#' Save pressure-testing diagnostics data.frames to local compressed files in
+#' the `.Rds` format. Input data.frames are generated by other package functions
+#' and are not checked here.
+#'
+#' @param missing_in_current A data.frame.
+#'
+#' @param missing_deaths A data.frame that is the output of
+#' [filter_invalid_trajectories()] with the outcome `"deaths_averted"`.
+#'
+#' @param missing_dalys A data.frame that is the output of
+#' [filter_invalid_trajectories()] with the outcome `"dalys_averted"`.
+#'
+#' @param changes_deaths A data.frame that is the output of [flag_large_diffs()]
+#' with the outcome `"deaths_averted"`.
+#'
+#' @param changes_dalys A data.frame that is the output of [flag_large_diffs()]
+#' with the outcome `"dalys_averted"`.
+#'
+#' @param subregional_flags_deaths_camp A data.frame that is the output of
+#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the
+#' `"campaign"` activity type.
+#'
+#' @param subregional_flags_deaths_rout A data.frame that is the output of
+#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the
+#' `"routine"` activity type.
+#'
+#' @param subregional_flags_dalys_camp A data.frame that is the output of
+#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the
+#' `"campaign"` activity type.
+#'
+#' @param subregional_flags_dalys_rout A data.frame that is the output of
+#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the
+#' `"campaign"` activity type.
+#'
+#' @return None. Called for the convenience side-effect of saving data.frames as
+#' `.Rds` format.
 #'
 #' @keywords pressure_testing
 #'
 #' @export
-save_outputs <- function() {
-  saveRDS(
-    round_numeric(
-      missing_in_current %>%
-        dplyr::select(dplyr::all_of(c(
-          "country_name",
-          "vaccine",
-          "activity_type",
-          "year",
-          "modelling_group"
-        )))
-    ),
-    "outputs/missing_in_current.rds"
-  )
-  saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds")
-  saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds")
-  saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds")
-  saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds")
-  saveRDS(
-    round_numeric(subregional_flags_deaths_camp),
-    "outputs/subregional_flags_deaths_camp.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_deaths_rout),
-    "outputs/subregional_flags_deaths_rout.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_dalys_camp),
-    "outputs/subregional_flags_dalys_camp.rds"
-  )
-  saveRDS(
-    round_numeric(subregional_flags_dalys_rout),
-    "outputs/subregional_flags_dalys_rout.rds"
+save_outputs <- function(
+  missing_in_current,
+  missing_deaths,
+  missing_dalys,
+  changes_deaths,
+  changes_dalys,
+  subregional_flags_deaths_camp,
+  subregional_flags_deaths_rout,
+  subregional_flags_dalys_camp,
+  subregional_flags_dalys_rout,
+  output_dir = here::here("outputs")
+) {
+  # NOTE: not checking most input args as these are generated from other pkg fns
+
+  output_dir_exists <- dir.exists(output_dir)
+  if (!output_dir_exists) {
+    cli::cli_abort(
+      "Expected output directory {.arg {output_dir}} but it does not exist!"
+    )
+  }
+
+  # NOTE: consider writing to agnostic format e.g. CSV
+  missing_in_current <- dplyr::select(
+    missing_in_current,
+    {{ colnames_df_missing_cols }}
+  )
+
+  filenames <- c(
+    "missing_in_current",
+    "missing_deaths",
+    "missing_dalys",
+    "changes_deaths",
+    "changes_dalys",
+    "subregional_flags_deaths_camp",
+    "subregional_flags_deaths_rout",
+    "subregional_flags_dalys_camp",
+    "subregional_flags_dalys_rout"
+  )
+
+  df_list <- list(
+    missing_in_current,
+    missing_deaths,
+    missing_dalys,
+    changes_deaths,
+    changes_dalys,
+    subregional_flags_deaths_camp,
+    subregional_flags_deaths_rout,
+    subregional_flags_dalys_camp,
+    subregional_flags_dalys_rout
+  )
+
+  Map(
+    df_list,
+    filenames,
+    f = function(df, name) {
+      saveRDS(
+        round_numeric(df),
+        file.path(output_dir, glue::glue("{name}.Rds"))
+      )
+    }
   )
 }

From 46d3d974131d34d115a658d1cfc87e49f9cbdd63 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Mon, 20 Apr 2026 16:55:25 +0100
Subject: [PATCH 07/17] Add pkg constants

---
 R/constants.R | 58 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/R/constants.R b/R/constants.R
index 20ae152..adb8175 100644
--- a/R/constants.R
+++ b/R/constants.R
@@ -47,14 +47,66 @@ colnames_plot_demog_compare <- c(
   "value_millions"
 )
 
+#' @name constants
+colnames_df_missing_cols <- c(
+  "country_name",
+  "vaccine",
+  "activity_type",
+  "year",
+  "modelling_group"
+)
+
+#' @name constants
+COLNAMES_KEY_PRESSURE_TEST <- c(
+  "country",
+  "country_name",
+  "vaccine",
+  "activity_type",
+  "year",
+  "disease",
+  "modelling_group"
+)
+
+#' @name constants
+COLNAMES_INTEREST_PRESSURE_TEST <- union(
+  COLNAMES_KEY_PRESSURE_TEST,
+  c(
+    "fvps",
+    "target_population",
+    "coverage",
+    "deaths_averted",
+    "dalys_averted",
+    "deaths_averted_rate",
+    "dalys_averted_rate"
+  )
+)
+
 #' @name constants
 EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE")
 
 #' @name constants
-TOUCHSTONE_OLD <- "201910"
+N_TS_MIN_CHARS <- 6L
+
+#' @name constants
+N_TS_YEAR_CHARS <- 4L
+
+#' @name constants
+MIN_TS_YEAR <- 2000
+
+#' @name constants
+MAX_TS_YEAR <- 2100
+
+#' @name constants
+MIN_TS_MONTH <- 1
+
+#' @name constants
+MAX_TS_MONTH <- 12
+
+#' @name constants
+DEF_TOUCHSTONE_OLD <- "201910"
 
 #' @name constants
-TOUCHSTONE_NEW <- "202310"
+DEF_TOUCHSTONE_NEW <- "202310"
 
 #' @name constants
-TOUCHSTONE_OLD_OLD <- "202110"
+DEF_TOUCHSTONE_OLD_OLD <- "202110"

From 63e5871eb95be0d2e787e0a92f89dfc74f75977f Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Mon, 20 Apr 2026 16:55:48 +0100
Subject: [PATCH 08/17] Add pkg helpers

---
 R/fn_helpers.R | 135 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 125 insertions(+), 10 deletions(-)

diff --git a/R/fn_helpers.R b/R/fn_helpers.R
index d065a9e..5159361 100644
--- a/R/fn_helpers.R
+++ b/R/fn_helpers.R
@@ -36,12 +36,34 @@ make_novax_scenario <- function(disease) {
   )
 }
 
+#' Adaptively round numerics
+#'
+#' @param x A numeric vector.
+#'
+#' @param large_threshold A single number for the threshold over which numbers
+#' are to be considered 'large'.
+#'
+#' @param small_sigfig A single number for the number of significant digits for
+#' 'small' numbers.
+#'
+#' @param large_digits A single number for the number of places to which 'large'
+#' numbers should be rounded.
+#'
+#' @return `x` rounded to either `large_digits` or to `small_sigfig`.
+#'
+#' @keywords internal
 adaptive_round <- function(
   x,
   large_threshold = 1,
   small_sigfig = 2,
   large_digits = 1
 ) {
+  # basic checks for numeric
+  checkmate::assert_numeric(x, finite = TRUE, any.missing = FALSE)
+  checkmate::assert_number(large_threshold)
+  checkmate::assert_count(small_sigfig, positive = TRUE)
+  checkmate::assert_count(large_digits, positive = TRUE)
+
   ifelse(
     abs(x) >= large_threshold,
     round(x, large_digits),
@@ -49,21 +71,114 @@ adaptive_round <- function(
   )
 }
 
+#' Round numeric columns of a data.frame
+#'
+#' @param df A data.frame.
+#'
+#' @keywords internal
 round_numeric <- function(df) {
-  df %>%
-    mutate(across(
-      where(is.numeric) & !matches("year", ignore.case = TRUE),
-      ~ adaptive_round(.x)
-    ))
+  checkmate::assert_data_frame(
+    df,
+    min.rows = 1L,
+    min.cols = 1L
+  )
+
+  dplyr::mutate(
+    df,
+    dplyr::across(
+      .cols = dplyr::where(is.numeric) & !matches("year", ignore.case = TRUE),
+      .fns = adaptive_round
+    )
+  )
 }
 
-str_as_ts_year <- function(x) {
-  as.numeric(substr(x, 1, 6))
+#' Check and return touchstone year-month
+#'
+#' @param x A string for the touchstone identifier.
+#'
+#' @return The first 6 characters of `x` converted to a numeric. Also has side
+#' effects of erroring if conditions on `x` are not met.
+#'
+#' @keywords internal
+validate_ts_year <- function(x) {
+  has_n_chars <- checkmate::test_string(
+    x,
+    min.chars = N_TS_MIN_CHARS
+  )
+  if (!has_n_chars) {
+    n_chars <- nchars(x)
+    cli::cli_abort(
+      "Touchstone year string should have at least {N_TS_MIN_CHARS}, but got \
+      {n_chars} characters."
+    )
+  }
+
+  inferred_year <- as.numeric(substr(x, 1, N_TS_YEAR_CHARS))
+  is_good_year <- checkmate::test_number(
+    inferred_year,
+    lower = MIN_TS_YEAR,
+    upper = MAX_TS_YEAR,
+    finite = TRUE
+  )
+
+  if (!is_good_year) {
+    cli::cli_abort(
+      "Touchstone year string has an inferred year of \
+      {.strong {inferred_year}} but expected an year in the range \
+      [{MIN_TS_YEAR}, {MAX_TS_YEAR}]."
+    )
+  }
+
+  inferred_month <- as.numeric(
+    substr(x, N_TS_YEAR_CHARS + 1, N_TS_YEAR_CHARS + 2)
+  )
+  is_good_month <- checkmate::test_number(
+    inferred_month,
+    lower = MIN_TS_MONTH,
+    upper = MAX_TS_MONTH,
+    finite = TRUE
+  )
+
+  if (!is_good_month) {
+    cli::cli_abort(
+      "Touchstone month string has an inferred month of \
+      {.strong {inferred_month}} but expected an month in the range \
+      [{MIN_TS_MONTH}, {MAX_TS_MONTH}]."
+    )
+  }
+
+  # return year-month as numeric
+  substr(x, 1, N_TS_MIN_CHARS)
 }
 
+#' Add campaign id to dataframe
+#'
+#' @param df A data.frame.
+#'
+#' @param key_cols A character vector of columns in `df` by which the data are
+#' to be grouped.
+#'
+#' @return `df` with a campaign identifier as a numeric.
+#'
+#' @keywords internal
 add_campaign_id <- function(df, key_cols) {
-  df <- group_by(df, across(all_of(key_cols)))
-  df <- mutate(df, campaign_id = row_number())
+  checkmate::assert_data_frame(df)
+  checkmate::assert_character(key_cols, any.missing = FALSE)
+
+  has_cols <- checkmate::test_names(
+    names(df),
+    must.include = key_cols
+  )
+  if (!has_cols) {
+    missing_cols <- setdiff(colnames(df), key_cols)
+    cli::cli_abort(
+      "Expected {.code df} to have columns {.str {key_cols}} but columns \
+    {.str {missing_cols}} are missing."
+    )
+  }
+
+  df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(key_cols)))
+  df <- dplyr::mutate(df, campaign_id = dplyr::row_number())
 
-  ungroup(df)
+  dplyr::ungroup(df)
 }

From ffff8400c9308d53d06ff610aa46a64ab6733266 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Mon, 20 Apr 2026 17:05:50 +0100
Subject: [PATCH 09/17] WIP separate plot prep from plot code

---
 R/fn_plotting_prep_pres_test.R   | 154 ++++++++++++++++++++++++++++++
 R/fn_plotting_pressure_testing.R | 159 +++----------------------------
 2 files changed, 165 insertions(+), 148 deletions(-)
 create mode 100644 R/fn_plotting_prep_pres_test.R

diff --git a/R/fn_plotting_prep_pres_test.R b/R/fn_plotting_prep_pres_test.R
new file mode 100644
index 0000000..d1959dd
--- /dev/null
+++ b/R/fn_plotting_prep_pres_test.R
@@ -0,0 +1,154 @@
+prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") {
+  offset <- 1e-6
+
+  df2 %>%
+    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
+    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
+    group_by(vaccine) %>%
+    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE))
+}
+
+prep_plot_vax_gavi <- function(
+  df,
+  prev_dat = NULL,
+  outcome = "deaths_averted"
+) {
+  df_cur <- df %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(new))
+
+  df_prev <- prev_dat %>%
+    select(all_of(key_cols), !!outcome) %>%
+    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
+    group_by(disease, year) %>%
+    summarise(
+      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    mutate(dataset = as.character(old))
+
+  df_combined <- bind_rows(df_cur, df_prev)
+
+  df_diff <- df_cur %>%
+    left_join(
+      df_prev,
+      by = c("disease", "year"),
+      suffix = c("_curr", "_prev")
+    ) %>%
+    mutate(
+      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
+      dataset = "Difference"
+    ) %>%
+    select(disease, year, yearly_outcome, dataset)
+
+  df_combined <- bind_rows(df_combined, df_diff)
+
+  df_combined$dataset <- factor(
+    df_combined$dataset,
+    levels = c(as.character(old), "Difference", as.character(new))
+  )
+
+  df_combined
+}
+
+prep_plot_cumul <- function(df, outcome, disease_filter) {
+  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
+
+  outcome_sym <- sym(outcome)
+  cum_col <- paste0("cum_", outcome)
+  avg_col <- paste0("avg_", outcome)
+
+  col_old <- paste0(outcome, "_old")
+  col_new <- paste0(outcome, "_new")
+
+  combined2 <- df %>%
+    select(
+      country,
+      country_name,
+      disease,
+      vaccine,
+      activity_type,
+      year,
+      modelling_group,
+      all_of(outcome_cols)
+    ) %>%
+    pivot_longer(
+      cols = all_of(outcome_cols),
+      names_to = "touchstone",
+      values_to = "value"
+    ) %>%
+    mutate(
+      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
+      touchstone = recode(
+        touchstone,
+        "old" = as.character(old),
+        "new" = as.character(new),
+        .default = touchstone
+      ),
+      touchstone = factor(
+        touchstone,
+        levels = c(as.character(old), as.character(new))
+      )
+    )
+  # Cumulative values by modelling group
+  df_cum <- combined2 %>%
+    filter(disease == disease_filter) %>%
+    group_by(modelling_group, touchstone) %>%
+    complete(year = full_seq(year, 1)) %>%
+    arrange(year) %>%
+    mutate(
+      first_valid = min(year[!is.na(value)]),
+      !!cum_col := ifelse(
+        year < first_valid,
+        NA,
+        cumsum(replace_na(value, 0))
+      )
+    ) %>%
+    select(-first_valid) %>%
+    ungroup() %>%
+    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
+
+  # Model average
+  df_avg <- df_cum %>%
+    group_by(year, touchstone) %>%
+    summarise(
+      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
+      n_models = sum(!is.na(!!sym(cum_col))),
+      .groups = "drop"
+    ) %>%
+    filter(n_models >= 1) %>%
+    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
+
+  # Combine for plot
+  df_plot <- bind_rows(
+    df_cum %>%
+      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
+    df_avg %>%
+      select(year, modelling_group, touchstone, value = !!sym(avg_col))
+  )
+
+  df_plot <- df_plot %>%
+    group_by(modelling_group) %>%
+    filter(sum(value, na.rm = TRUE) > 0) %>%
+    ungroup() %>%
+    mutate(
+      line_type = ifelse(
+        grepl("Model Average", modelling_group),
+        "dashed",
+        "solid"
+      )
+    )
+
+  if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
+    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
+    return(NULL)
+  }
+
+  df_plot
+}
diff --git a/R/fn_plotting_pressure_testing.R b/R/fn_plotting_pressure_testing.R
index 8aaad49..1bb3da4 100644
--- a/R/fn_plotting_pressure_testing.R
+++ b/R/fn_plotting_pressure_testing.R
@@ -4,8 +4,10 @@
 #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars
 #' labeller label_wrap_gen
 #'
+#' @keywords internal
+#'
 #' @export
-significant_diff_plot <- function(df, outcome) {
+plot_sig_diff <- function(df, outcome) {
   # retained here as this is a small df and a small operation
   df$label <- glue::glue(
     "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}"
@@ -75,15 +77,8 @@ plot_diff <- function(
   return(p)
 }
 
-plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
-  offset <- 1e-6
-
-  df2 %>%
-    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
-    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
-    group_by(vaccine) %>%
-    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>%
-    ggplot() +
+plot_modelling_group_variation <- function(df, outcome) {
+  ggplot(df) +
     aes(
       fill = as.character(mod_num),
       x = adj_outc,
@@ -109,7 +104,7 @@ plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
     labs(
       x = paste0(
         "Burden averted (",
-        ifelse(outc == "dalys", "DALYs", outc),
+        ifelse(outcome == "dalys", "DALYs", outcome),
         ")"
       ),
       y = "Vaccine"
@@ -117,51 +112,11 @@ plot_modelling_group_variation <- function(df2, df3, outc = "deaths") {
 }
 
 
-# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended)
-plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") {
-  df_cur <- df %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
-      .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(new))
-
-  df_prev <- prev_dat %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
-      .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(old))
-
-  df_combined <- bind_rows(df_cur, df_prev)
-
-  df_diff <- df_cur %>%
-    left_join(
-      df_prev,
-      by = c("disease", "year"),
-      suffix = c("_curr", "_prev")
-    ) %>%
-    mutate(
-      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
-      dataset = "Difference"
-    ) %>%
-    select(disease, year, yearly_outcome, dataset)
-
-  df_combined <- bind_rows(df_combined, df_diff)
-
-  df_combined$dataset <- factor(
-    df_combined$dataset,
-    levels = c(as.character(old), "Difference", as.character(new))
-  )
-
+#' Gavi plot - future deaths and DALYS averted, 2021-2024
+#' (current time window Gavi looking at, can be amended)
+plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
   ggplot(
-    df_combined,
+    df,
     aes(
       x = reorder(disease, yearly_outcome),
       y = yearly_outcome,
@@ -186,100 +141,8 @@ plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") {
 
 ### Gavi Cumulative Plot (modelling group + average)
 plot_cumul <- function(df, outcome, disease_filter) {
-  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
-
-  outcome_sym <- sym(outcome)
-  cum_col <- paste0("cum_", outcome)
-  avg_col <- paste0("avg_", outcome)
-
-  col_old <- paste0(outcome, "_old")
-  col_new <- paste0(outcome, "_new")
-
-  combined2 <- df %>%
-    select(
-      country,
-      country_name,
-      disease,
-      vaccine,
-      activity_type,
-      year,
-      modelling_group,
-      all_of(outcome_cols)
-    ) %>%
-    pivot_longer(
-      cols = all_of(outcome_cols),
-      names_to = "touchstone",
-      values_to = "value"
-    ) %>%
-    mutate(
-      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
-      touchstone = recode(
-        touchstone,
-        "old" = as.character(old),
-        "new" = as.character(new),
-        .default = touchstone
-      ),
-      touchstone = factor(
-        touchstone,
-        levels = c(as.character(old), as.character(new))
-      )
-    )
-  # Cumulative values by modelling group
-  df_cum <- combined2 %>%
-    filter(disease == disease_filter) %>%
-    group_by(modelling_group, touchstone) %>%
-    complete(year = full_seq(year, 1)) %>%
-    arrange(year) %>%
-    mutate(
-      first_valid = min(year[!is.na(value)]),
-      !!cum_col := ifelse(
-        year < first_valid,
-        NA,
-        cumsum(replace_na(value, 0))
-      )
-    ) %>%
-    select(-first_valid) %>%
-    ungroup() %>%
-    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
-
-  # Model average
-  df_avg <- df_cum %>%
-    group_by(year, touchstone) %>%
-    summarise(
-      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
-      n_models = sum(!is.na(!!sym(cum_col))),
-      .groups = "drop"
-    ) %>%
-    filter(n_models >= 1) %>%
-    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
-
-  # Combine for plot
-  df_plot <- bind_rows(
-    df_cum %>%
-      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
-    df_avg %>%
-      select(year, modelling_group, touchstone, value = !!sym(avg_col))
-  )
-
-  df_plot <- df_plot %>%
-    group_by(modelling_group) %>%
-    filter(sum(value, na.rm = TRUE) > 0) %>%
-    ungroup() %>%
-    mutate(
-      line_type = ifelse(
-        grepl("Model Average", modelling_group),
-        "dashed",
-        "solid"
-      )
-    )
-
-  if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
-    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
-    return(NULL)
-  }
-
   p <- ggplot(
-    df_plot,
+    df,
     aes(
       x = year,
       y = value,

From d0237fb1411aaf5244beffb0b6ee500e5814fe01 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Tue, 21 Apr 2026 09:08:02 +0100
Subject: [PATCH 10/17] WIP add docs and pkg infra

---
 .Rbuildignore                           |  1 +
 DESCRIPTION                             |  2 +-
 NAMESPACE                               | 11 +++++
 _pkgdown.yml                            |  8 +++-
 jarl.toml                               |  2 +
 man/adaptive_round.Rd                   | 27 +++++++++++
 man/add_campaign_id.Rd                  | 21 +++++++++
 man/basic_burden_sanity.Rd              |  2 +-
 man/check_demography_alignment.Rd       |  2 +-
 man/compare_natl_subreg.Rd              | 30 +++++++++++++
 man/constants.Rd                        | 59 +++++++++++++++++++++++-
 man/flag_large_diffs.Rd                 | 50 +++++++++++++++++++++
 man/gen_combined_df.Rd                  | 34 ++++++++++++++
 man/gen_national_iqr.Rd                 | 34 ++++++++++++++
 man/generate_diffs.Rd                   | 39 ++++++++++++++++
 man/helpers.Rd                          |  2 +-
 man/plotting.Rd                         |  2 +-
 man/plotting_prep.Rd                    |  2 +-
 man/plotting_theme.Rd                   |  2 +-
 man/pres_test_filter_data.Rd            | 56 +++++++++++++++++++++++
 man/round_numeric.Rd                    | 15 +++++++
 man/save_outputs.Rd                     | 60 +++++++++++++++++++++++++
 man/significant_diff_plot.Rd            | 12 +++++
 man/validate_complete_incoming_files.Rd |  2 +-
 man/validate_file_dict_template.Rd      |  2 +-
 man/validate_template_alignment.Rd      |  2 +-
 man/validate_ts_year.Rd                 | 19 ++++++++
 tests/spelling.R                        |  3 +-
 28 files changed, 486 insertions(+), 15 deletions(-)
 create mode 100644 jarl.toml
 create mode 100644 man/adaptive_round.Rd
 create mode 100644 man/add_campaign_id.Rd
 create mode 100644 man/compare_natl_subreg.Rd
 create mode 100644 man/flag_large_diffs.Rd
 create mode 100644 man/gen_combined_df.Rd
 create mode 100644 man/gen_national_iqr.Rd
 create mode 100644 man/generate_diffs.Rd
 create mode 100644 man/pres_test_filter_data.Rd
 create mode 100644 man/round_numeric.Rd
 create mode 100644 man/save_outputs.Rd
 create mode 100644 man/significant_diff_plot.Rd
 create mode 100644 man/validate_ts_year.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
index 73513b2..fa4bfd8 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -16,3 +16,4 @@
 ^scratch\.R$
 ^scratch$
 ^data-raw$
+^jarl\.toml$
diff --git a/DESCRIPTION b/DESCRIPTION
index 1dee4e4..93969a5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -48,4 +48,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
diff --git a/NAMESPACE b/NAMESPACE
index 9bcbd19..6a9f7c8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,7 +2,16 @@
 
 export(basic_burden_sanity)
 export(check_demography_alignment)
+export(compare_natl_subreg)
 export(file_dict_colnames)
+export(filter_duplicates)
+export(filter_excluded_diseases_ts)
+export(filter_invalid_trajectories)
+export(filter_recent_ts)
+export(flag_large_diffs)
+export(gen_combined_df)
+export(gen_national_iqr)
+export(generate_diffs)
 export(plot_age_patterns)
 export(plot_compare_demography)
 export(plot_coverage_set)
@@ -15,6 +24,8 @@ export(prep_plot_coverage_set)
 export(prep_plot_demography)
 export(prep_plot_fvp)
 export(prep_plot_global_burden)
+export(save_outputs)
+export(significant_diff_plot)
 export(theme_vimc)
 export(theme_vimc_noxaxis)
 export(validate_complete_incoming_files)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index bbc0826..ae53f13 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -7,10 +7,14 @@ reference:
   - title: Package-level documentation
     contents:
     - has_keyword("package_doc")
-  - title: Diagnostic functions
-    desc: Package diagnostic functions.
+  - title: Burden estimate diagnostics
+    desc: Functions to pressure-test burden estimates.
     contents:
     - has_keyword("diagnostics")
+  - title: Impact estimate diagnostics
+    desc: Functions to pressure-test impact estimates.
+    contents:
+    - has_keyword("pressure_testing")
   - title: Plotting prepartion
     desc: Prepare validated data for plotting.
     contents:
diff --git a/jarl.toml b/jarl.toml
new file mode 100644
index 0000000..a2e2af7
--- /dev/null
+++ b/jarl.toml
@@ -0,0 +1,2 @@
+[lint.assignment]
+operator = "<-"
diff --git a/man/adaptive_round.Rd b/man/adaptive_round.Rd
new file mode 100644
index 0000000..ab1ae9c
--- /dev/null
+++ b/man/adaptive_round.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_helpers.R
+\name{adaptive_round}
+\alias{adaptive_round}
+\title{Adaptively round numerics}
+\usage{
+adaptive_round(x, large_threshold = 1, small_sigfig = 2, large_digits = 1)
+}
+\arguments{
+\item{x}{A numeric vector.}
+
+\item{large_threshold}{A single number for the threshold over which numbers
+are to be considered 'large'.}
+
+\item{small_sigfig}{A single number for the number of significant digits for
+'small' numbers.}
+
+\item{large_digits}{A single number for the number of places to which 'large'
+numbers should be rounded.}
+}
+\value{
+\code{x} rounded to either \code{large_digits} or to \code{small_sigfig}.
+}
+\description{
+Adaptively round numerics
+}
+\keyword{internal}
diff --git a/man/add_campaign_id.Rd b/man/add_campaign_id.Rd
new file mode 100644
index 0000000..7e007ed
--- /dev/null
+++ b/man/add_campaign_id.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_helpers.R
+\name{add_campaign_id}
+\alias{add_campaign_id}
+\title{Add campaign id to dataframe}
+\usage{
+add_campaign_id(df, key_cols)
+}
+\arguments{
+\item{df}{A data.frame.}
+
+\item{key_cols}{A character vector of columns in \code{df} by which the data are
+to be grouped.}
+}
+\value{
+\code{df} with a campaign identifier as a numeric.
+}
+\description{
+Add campaign id to dataframe
+}
+\keyword{internal}
diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd
index 7217a7d..004cffd 100644
--- a/man/basic_burden_sanity.Rd
+++ b/man/basic_burden_sanity.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/burden_diagnostics.R
+% Please edit documentation in R/fn_burden_diagnostics.R
 \name{basic_burden_sanity}
 \alias{basic_burden_sanity}
 \title{Sanity checks on burden estimates}
diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd
index 1b5ca1f..5afb1f0 100644
--- a/man/check_demography_alignment.Rd
+++ b/man/check_demography_alignment.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/burden_diagnostics.R
+% Please edit documentation in R/fn_burden_diagnostics.R
 \name{check_demography_alignment}
 \alias{check_demography_alignment}
 \title{Check incoming burden cohort size against interpolated population}
diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd
new file mode 100644
index 0000000..ecf5cea
--- /dev/null
+++ b/man/compare_natl_subreg.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{compare_natl_subreg}
+\alias{compare_natl_subreg}
+\title{Compare sub-regional and national estimates}
+\usage{
+compare_natl_subreg(
+  df,
+  outcome = c("deaths_averted_rate", "dalys_averted_rate"),
+  activity_filter = c("campaign", "routine")
+)
+}
+\arguments{
+\item{df}{A data.frame with sub-region level data on vaccination impact
+outcomes.}
+
+\item{outcome}{A string for the outcome of interest. May be one of
+\code{"deaths_averted_rate"} or \code{"dalys_averted_rate"}.}
+
+\item{activity_filter}{A string for the type of vaccination activity. May be
+one of \code{"campaign"} or \code{"routine"}.}
+}
+\value{
+A data.frame of sub-regional vaccination impact estimates where the
+impact is considered to be outside the tolerance limit.
+}
+\description{
+Compare sub-regional and national estimates
+}
+\keyword{pressure_testing}
diff --git a/man/constants.Rd b/man/constants.Rd
index 7608951..8faa19a 100644
--- a/man/constants.Rd
+++ b/man/constants.Rd
@@ -7,8 +7,19 @@
 \alias{scenario_data_colnames}
 \alias{burden_outcome_names}
 \alias{colnames_plot_demog_compare}
+\alias{colnames_df_missing_cols}
+\alias{COLNAMES_KEY_PRESSURE_TEST}
+\alias{COLNAMES_INTEREST_PRESSURE_TEST}
 \alias{EXCLUDED_DISEASES}
-\alias{TOUCHSTONE_OLD}
+\alias{N_TS_MIN_CHARS}
+\alias{N_TS_YEAR_CHARS}
+\alias{MIN_TS_YEAR}
+\alias{MAX_TS_YEAR}
+\alias{MIN_TS_MONTH}
+\alias{MAX_TS_MONTH}
+\alias{DEF_TOUCHSTONE_OLD}
+\alias{DEF_TOUCHSTONE_NEW}
+\alias{DEF_TOUCHSTONE_OLD_OLD}
 \title{Package constants}
 \format{
 An object of class \code{character} of length 5.
@@ -19,8 +30,30 @@ An object of class \code{character} of length 10.
 
 An object of class \code{character} of length 7.
 
+An object of class \code{character} of length 5.
+
+An object of class \code{character} of length 7.
+
+An object of class \code{character} of length 14.
+
 An object of class \code{character} of length 4.
 
+An object of class \code{integer} of length 1.
+
+An object of class \code{integer} of length 1.
+
+An object of class \code{numeric} of length 1.
+
+An object of class \code{numeric} of length 1.
+
+An object of class \code{numeric} of length 1.
+
+An object of class \code{numeric} of length 1.
+
+An object of class \code{character} of length 1.
+
+An object of class \code{character} of length 1.
+
 An object of class \code{character} of length 1.
 }
 \usage{
@@ -32,9 +65,31 @@ burden_outcome_names
 
 colnames_plot_demog_compare
 
+colnames_df_missing_cols
+
+COLNAMES_KEY_PRESSURE_TEST
+
+COLNAMES_INTEREST_PRESSURE_TEST
+
 EXCLUDED_DISEASES
 
-TOUCHSTONE_OLD
+N_TS_MIN_CHARS
+
+N_TS_YEAR_CHARS
+
+MIN_TS_YEAR
+
+MAX_TS_YEAR
+
+MIN_TS_MONTH
+
+MAX_TS_MONTH
+
+DEF_TOUCHSTONE_OLD
+
+DEF_TOUCHSTONE_NEW
+
+DEF_TOUCHSTONE_OLD_OLD
 }
 \description{
 Package constants
diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd
new file mode 100644
index 0000000..e0379b1
--- /dev/null
+++ b/man/flag_large_diffs.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{flag_large_diffs}
+\alias{flag_large_diffs}
+\title{Flag significant changes in impact estimates}
+\usage{
+flag_large_diffs(
+  changes_list,
+  iqr_df,
+  variable = c("deaths_averted", "dalys_averted"),
+  group_cols = c("country", "vaccine", "activity_type"),
+  threshold = 100,
+  touchstone_old = DEF_TOUCHSTONE_OLD_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
+)
+}
+\arguments{
+\item{changes_list}{A list of data.frames with one element per variable of
+interest (see \code{variable}). Usually generated using \code{\link[=generate_diffs]{generate_diffs()}}.}
+
+\item{iqr_df}{A data.frame of inter-quartile differences generated using
+\code{\link[=gen_national_iqr]{gen_national_iqr()}}.}
+
+\item{variable}{A string specifying the variable of interest. Must be one of
+"deaths_averted" and "dalys_averted", and must be present as a name and
+element of \code{changes_list}.}
+
+\item{group_cols}{A character vector of grouping columns. Defaults to
+"country", "vaccine", "activity_type".}
+
+\item{threshold}{A single numeric value for the IQR multiplier. Defaults to
+100.}
+
+\item{touchstone_old}{The previous touchstone identifier. Defaults to
+\link{DEF_TOUCHSTONE_OLD_OLD}.}
+
+\item{touchstone_new}{The new touchstone identifier. Defaults to
+\link{DEF_TOUCHSTONE_NEW}.}
+}
+\value{
+A filtered data.frame of differences in impact estimates flagged
+as too large. Rows with differences within tolerance are removed.
+}
+\description{
+Calculates and flags whether the difference in impact estimates
+between touchstones is greater than expected. A row is flagged if the
+difference is greater than \code{threshold} \eqn{\times} the inter-quartile range
+for cases where the IQR is greater than zero.
+}
+\keyword{pressure_testing}
diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd
new file mode 100644
index 0000000..9a329d3
--- /dev/null
+++ b/man/gen_combined_df.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{gen_combined_df}
+\alias{gen_combined_df}
+\title{Combine and align data from two touchstones}
+\usage{
+gen_combined_df(
+  prev_dat,
+  df2,
+  interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
+  key_cols = COLNAMES_KEY_PRESSURE_TEST
+)
+}
+\arguments{
+\item{prev_dat}{A data.frame of impact estimates corresponding to an earlier
+touchstone.}
+
+\item{df2}{A data.frame of impact estimates corresponding to a more recent
+touchstone.}
+
+\item{interest_cols}{A character vector of columns of interest. Defaults to
+\link{COLNAMES_INTEREST_PRESSURE_TEST}.}
+
+\item{key_cols}{A character vector of columns of interest. Defaults to
+\link{COLNAMES_KEY_PRESSURE_TEST}.}
+}
+\value{
+A data.frame which is a full join of \code{prev_dat} and \code{df2}. Columns
+are disambiguated with the suffixes \code{"_old"} and \code{"_new"}.
+}
+\description{
+Generates a full join of two data.frames, selecting for columns of interest.
+}
+\keyword{pressure_testing}
diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd
new file mode 100644
index 0000000..c722e66
--- /dev/null
+++ b/man/gen_national_iqr.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{gen_national_iqr}
+\alias{gen_national_iqr}
+\title{Generate IQR for key outcomes}
+\usage{
+gen_national_iqr(
+  df,
+  group_cols = c("country", "vaccine", "activity_type"),
+  value_cols = c("deaths_averted", "dalys_averted"),
+  prefix = "national_iqr"
+)
+}
+\arguments{
+\item{df}{A data.frame of impact estimates.}
+
+\item{group_cols}{A character vector of grouping columns. Defaults to
+"country", "vaccine", "activity_type".}
+
+\item{value_cols}{A character vector of value columns. Defaults to
+"deaths_averted" and "dalys_averted".}
+
+\item{prefix}{A string for the prefix applied to every IQR summary column.
+Defaults to "national_iqr".}
+}
+\value{
+A \verb{<data.frame>} with the inter-quartile range of the columns
+in \code{value_cols}, with the column name constructed as \verb{\{prefix\}_\{value_col\}}
+using string interpolation.
+}
+\description{
+Generate IQR for key outcomes
+}
+\keyword{pressure_testing}
diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd
new file mode 100644
index 0000000..17fcf32
--- /dev/null
+++ b/man/generate_diffs.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{generate_diffs}
+\alias{generate_diffs}
+\title{Explore significant changes in deaths and DALYs}
+\usage{
+generate_diffs(
+  prev_df,
+  curr_df,
+  interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
+  key_cols = COLNAMES_KEY_PRESSURE_TEST,
+  touchstone = DEF_TOUCHSTONE_OLD
+)
+}
+\arguments{
+\item{prev_df}{A \verb{<data.frame>} of impact estimates from the previous
+touchstone.}
+
+\item{curr_df}{A \verb{<data.frame>} of impact estimates for the current
+touchstone.}
+
+\item{interest_cols}{A character vector of columns to check for differences.
+Defaults to \link{COLNAMES_INTEREST_PRESSURE_TEST}.}
+
+\item{key_cols}{A character vector of columns to use when assigning campaign
+identifiers. Passed to \code{\link[=add_campaign_id]{add_campaign_id()}}, defaults to
+\link{COLNAMES_KEY_PRESSURE_TEST}.}
+
+\item{touchstone}{A six character string that can be converted to a six digit
+numeric giving a touchstone identifier in \code{YYYYMM} format.}
+}
+\value{
+A list of data.frames of differences between \code{prev_df} and \code{curr_df},
+with one list element per element of \code{interest_cols}.
+}
+\description{
+Explore significant changes in deaths and DALYs
+}
+\keyword{pressure_testing}
diff --git a/man/helpers.Rd b/man/helpers.Rd
index fb9c4df..af26b2d 100644
--- a/man/helpers.Rd
+++ b/man/helpers.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helpers.R
+% Please edit documentation in R/fn_helpers.R
 \name{helpers}
 \alias{helpers}
 \alias{make_novax_scenario}
diff --git a/man/plotting.Rd b/man/plotting.Rd
index 8dd565e..7f915a3 100644
--- a/man/plotting.Rd
+++ b/man/plotting.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plotting.R
+% Please edit documentation in R/fn_plotting_burden_diagnostics.R
 \name{plotting}
 \alias{plotting}
 \alias{plot_compare_demography}
diff --git a/man/plotting_prep.Rd b/man/plotting_prep.Rd
index 881a9f3..f8124bc 100644
--- a/man/plotting_prep.Rd
+++ b/man/plotting_prep.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plotting_prep.R
+% Please edit documentation in R/fn_plotting_prep_bur_diag.R
 \name{plotting_prep}
 \alias{plotting_prep}
 \alias{prep_plot_demography}
diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd
index c2c49e0..fe46d78 100644
--- a/man/plotting_theme.Rd
+++ b/man/plotting_theme.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plotting.R
+% Please edit documentation in R/fn_plotting_burden_diagnostics.R
 \name{plotting_theme}
 \alias{plotting_theme}
 \alias{theme_vimc}
diff --git a/man/pres_test_filter_data.Rd b/man/pres_test_filter_data.Rd
new file mode 100644
index 0000000..003617a
--- /dev/null
+++ b/man/pres_test_filter_data.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{pres_test_filter_data}
+\alias{pres_test_filter_data}
+\alias{filter_recent_ts}
+\alias{filter_excluded_diseases_ts}
+\alias{filter_duplicates}
+\alias{filter_invalid_trajectories}
+\title{Filter data for touchstones or diseases}
+\usage{
+filter_recent_ts(df, threshold = 202310)
+
+filter_excluded_diseases_ts(df, threshold = 202110)
+
+filter_duplicates(df, key_cols = COLNAMES_KEY_PRESSURE_TEST)
+
+filter_invalid_trajectories(
+  df,
+  prev_data,
+  outcome = c("deaths_averted", "dalys_averted")
+)
+}
+\arguments{
+\item{df}{A \verb{<data.frame>} holding impact data. This data.frame is not
+checked for contents}
+
+\item{threshold}{A six-digit number that is checked as a valid touchstone
+identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}.}
+
+\item{key_cols}{Key columns in \code{df} to check for duplicates.}
+
+\item{prev_data}{A \verb{<data.frame>} holding data from a previous touchstone for
+the same scenarios as \code{df}.}
+
+\item{outcome}{A string giving the outcome of interest; may be one of
+\code{"deaths_averted"} or \code{"dalys_averted"}.}
+}
+\value{
+A filtered \verb{<data.frame>}.
+\itemize{
+\item \code{filter_recent_ts()} returns \code{df} with rows where the touchstone condition
+is not met excluded.
+\item \code{filter_excluded_diseases_ts()} returns \code{df} with rows where rows relating
+to the \link{EXCLUDED_DISEASES}, when the touchstone year in \code{df} is less than the
+\code{threshold}, excluded.
+\item \code{filter_duplicates()} returns \code{df} with duplicated combinations of
+\code{key_cols} removed.
+\item \code{filter_invalid_trajectories()} returns \code{df} with bad outcome trajectories
+(\code{NA} to non-\code{NA}) removed.
+}
+}
+\description{
+A pair of helper functions allowing filtering out of recent touchstone values
+and excluded diseases.
+}
+\keyword{pressure_testing}
diff --git a/man/round_numeric.Rd b/man/round_numeric.Rd
new file mode 100644
index 0000000..3c95234
--- /dev/null
+++ b/man/round_numeric.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_helpers.R
+\name{round_numeric}
+\alias{round_numeric}
+\title{Round numeric columns of a data.frame}
+\usage{
+round_numeric(df)
+}
+\arguments{
+\item{df}{A data.frame.}
+}
+\description{
+Round numeric columns of a data.frame
+}
+\keyword{internal}
diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd
new file mode 100644
index 0000000..db41a66
--- /dev/null
+++ b/man/save_outputs.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_pressure_testing.R
+\name{save_outputs}
+\alias{save_outputs}
+\title{Save pressure-testing diagnostics to local file}
+\usage{
+save_outputs(
+  missing_in_current,
+  missing_deaths,
+  missing_dalys,
+  changes_deaths,
+  changes_dalys,
+  subregional_flags_deaths_camp,
+  subregional_flags_deaths_rout,
+  subregional_flags_dalys_camp,
+  subregional_flags_dalys_rout,
+  output_dir = here::here("outputs")
+)
+}
+\arguments{
+\item{missing_in_current}{A data.frame.}
+
+\item{missing_deaths}{A data.frame that is the output of
+\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"deaths_averted"}.}
+
+\item{missing_dalys}{A data.frame that is the output of
+\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"dalys_averted"}.}
+
+\item{changes_deaths}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}}
+with the outcome \code{"deaths_averted"}.}
+
+\item{changes_dalys}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}}
+with the outcome \code{"dalys_averted"}.}
+
+\item{subregional_flags_deaths_camp}{A data.frame that is the output of
+\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the
+\code{"campaign"} activity type.}
+
+\item{subregional_flags_deaths_rout}{A data.frame that is the output of
+\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the
+\code{"routine"} activity type.}
+
+\item{subregional_flags_dalys_camp}{A data.frame that is the output of
+\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the
+\code{"campaign"} activity type.}
+
+\item{subregional_flags_dalys_rout}{A data.frame that is the output of
+\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the
+\code{"campaign"} activity type.}
+}
+\value{
+None. Called for the convenience side-effect of saving data.frames as
+\code{.Rds} format.
+}
+\description{
+Save pressure-testing diagnostics data.frames to local compressed files in
+the \code{.Rds} format. Input data.frames are generated by other package functions
+and are not checked here.
+}
+\keyword{pressure_testing}
diff --git a/man/significant_diff_plot.Rd b/man/significant_diff_plot.Rd
new file mode 100644
index 0000000..11b7cb4
--- /dev/null
+++ b/man/significant_diff_plot.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_plotting_pressure_testing.R
+\name{significant_diff_plot}
+\alias{significant_diff_plot}
+\title{Plot significant changes}
+\usage{
+significant_diff_plot(df, outcome)
+}
+\description{
+Plot significant changes
+}
+\keyword{internal}
diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd
index 148311b..75d3cd9 100644
--- a/man/validate_complete_incoming_files.Rd
+++ b/man/validate_complete_incoming_files.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/burden_diagnostics.R
+% Please edit documentation in R/fn_burden_diagnostics.R
 \name{validate_complete_incoming_files}
 \alias{validate_complete_incoming_files}
 \title{Validate files in a burden estimate}
diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd
index 293a605..af2d770 100644
--- a/man/validate_file_dict_template.Rd
+++ b/man/validate_file_dict_template.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/burden_diagnostics.R
+% Please edit documentation in R/fn_burden_diagnostics.R
 \name{validate_file_dict_template}
 \alias{validate_file_dict_template}
 \title{Validate file dictionary template}
diff --git a/man/validate_template_alignment.Rd b/man/validate_template_alignment.Rd
index 7ff801a..7bbccc6 100644
--- a/man/validate_template_alignment.Rd
+++ b/man/validate_template_alignment.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/burden_diagnostics.R
+% Please edit documentation in R/fn_burden_diagnostics.R
 \name{validate_template_alignment}
 \alias{validate_template_alignment}
 \title{Check incoming burden set against template}
diff --git a/man/validate_ts_year.Rd b/man/validate_ts_year.Rd
new file mode 100644
index 0000000..6812fcf
--- /dev/null
+++ b/man/validate_ts_year.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_helpers.R
+\name{validate_ts_year}
+\alias{validate_ts_year}
+\title{Check and return touchstone year-month}
+\usage{
+validate_ts_year(x)
+}
+\arguments{
+\item{x}{A string for the touchstone identifier.}
+}
+\value{
+The first 6 characters of \code{x} converted to a numeric. Also has side
+effects of erroring if conditions on \code{x} are not met.
+}
+\description{
+Check and return touchstone year-month
+}
+\keyword{internal}
diff --git a/tests/spelling.R b/tests/spelling.R
index a8cf85b..d60e024 100644
--- a/tests/spelling.R
+++ b/tests/spelling.R
@@ -1,6 +1,7 @@
-if (requireNamespace('spelling', quietly = TRUE))
+if (requireNamespace("spelling", quietly = TRUE)) {
   spelling::spell_check_test(
     vignettes = TRUE,
     error = FALSE,
     skip_on_cran = TRUE
   )
+}

From 4a52fbb619331bd717bc7604aba97634f237ee4b Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Tue, 21 Apr 2026 10:27:22 +0100
Subject: [PATCH 11/17] Reorganise docs

---
 R/fn_burden_diagnostics.R          | 10 ++--
 R/fn_plotting_burden_diagnostics.R | 81 +++---------------------------
 R/fn_plotting_helpers.R            | 64 +++++++++++++++++++++++
 R/fn_plotting_prep_bur_diag.R      | 31 ++++++++----
 R/fn_pressure_testing.R            | 14 +++---
 _pkgdown.yml                       | 42 ++++++++++------
 6 files changed, 133 insertions(+), 109 deletions(-)
 create mode 100644 R/fn_plotting_helpers.R

diff --git a/R/fn_burden_diagnostics.R b/R/fn_burden_diagnostics.R
index 0986acf..f77f09c 100644
--- a/R/fn_burden_diagnostics.R
+++ b/R/fn_burden_diagnostics.R
@@ -17,7 +17,7 @@
 #' Prints a message to screen informing the user whether any action has been
 #' taken.
 #'
-#' @keywords diagnostics
+#' @keywords burden_diagnostics
 #'
 #' @export
 validate_file_dict_template <- function(
@@ -121,7 +121,7 @@ validate_file_dict_template <- function(
 #' @return A `<tibble>` of the scenario file dictionary in `path_burden` if all
 #' checks pass. Otherwise, exits with informative errors on failed checks.
 #'
-#' @keywords diagnostics
+#' @keywords burden_diagnostics
 #'
 #' @export
 validate_complete_incoming_files <- function(
@@ -212,7 +212,7 @@ validate_complete_incoming_files <- function(
 #' @return A named list of checks carried out on `burden_set` to compare it
 #' against `template`, with information on missing and extra data.
 #'
-#' @keywords diagnostics
+#' @keywords burden_diagnostics
 #'
 #' @export
 validate_template_alignment <- function(burden_set, template) {
@@ -276,7 +276,7 @@ validate_template_alignment <- function(burden_set, template) {
 #' @return A `<tibble>` giving the alignment, i.e., percentage difference of
 #' modelled population size from the WPP-derived population estimates.
 #'
-#' @keywords diagnostics
+#' @keywords burden_diagnostics
 #'
 #' @export
 check_demography_alignment <- function(
@@ -344,7 +344,7 @@ check_demography_alignment <- function(
 #' @return A character vector of messages generated by checks on burden
 #' estimates, with the length of the vector depending on how many checks fail.
 #'
-#' @keywords diagnostics
+#' @keywords burden_diagnostics
 #'
 #' @export
 basic_burden_sanity <- function(burden) {
diff --git a/R/fn_plotting_burden_diagnostics.R b/R/fn_plotting_burden_diagnostics.R
index 9d137ac..71b7d54 100644
--- a/R/fn_plotting_burden_diagnostics.R
+++ b/R/fn_plotting_burden_diagnostics.R
@@ -1,72 +1,7 @@
-#' Plotting theme for vimcheck
-#'
-#' @description
-#' A simple plotting theme building on [ggplot2::theme_bw()].
-#'
-#' @name plotting_theme
-#' @rdname plotting_theme
-#'
-#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees.
-#'
-#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees.
-#'
-#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to
-#' [ggplot2::theme()]. These will be applied in addition to, or in place of,
-#' pre-existing elements defined by this theme. See the examples for this
-#' theme's appearance.
-#'
-#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects.
-#'
-#' @keywords plotting
-#'
-#' @examples
-#' # using an inbuilt dataset
-#' data(mtcars)
-#'
-#' # standard theme
-#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) +
-#'   ggplot2::geom_point() +
-#'   theme_vimc()
-#'
-#' # with X-axis suppression
-#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) +
-#'   ggplot2::geom_point() +
-#'   theme_vimc_noxaxis()
-#'
-#' @export
-theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) {
-  ggplot2::theme_bw() +
-    ggplot2::theme(
-      axis.text.x = ggplot2::element_text(
-        size = 10,
-        angle = x_text_angle
-      ),
-      strip.text.y = ggplot2::element_text(
-        angle = y_text_angle
-      ),
-      plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"),
-      ...
-    )
-}
-
-#' @name plotting_theme
-#'
-#' @importFrom ggplot2 '%+replace%'
-#'
-#' @export
-theme_vimc_noxaxis <- function() {
-  theme_vimc() %+replace%
-    ggplot2::theme(
-      axis.title.x = ggplot2::element_blank(),
-      axis.text.x = ggplot2::element_blank(),
-      axis.ticks.x = ggplot2::element_blank()
-    )
-}
-
 #' Plot burden and impact diagnostics
 #'
-#' @name plotting
-#' @rdname plotting
+#' @name plot_burden_diagnostics
+#' @rdname plot_burden_diagnostics
 #'
 #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid
 #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars
@@ -77,7 +12,7 @@ theme_vimc_noxaxis <- function() {
 #' @description
 #' Plotting functions for burden and impact diagnostics. All functions operate
 #' on data prepared for plotting by a corresponding
-#' [plotting-preparation function][plotting_prep].
+#' [plotting-preparation function][plot_prep_burden_diagnostics].
 #'
 #' @param fig_number The figure number displayed in the plot title.
 #'
@@ -121,7 +56,7 @@ plot_compare_demography <- function(data, fig_number) {
   g
 }
 
-#' @name plotting
+#' @name plot_burden_diagnostics
 #'
 #' @param burden_age A `<tibble>` with the minimum column names
 #' "age", "value_millions", "burden_outcome", and "scenario"; expected to be the
@@ -154,7 +89,7 @@ plot_age_patterns <- function(burden_age, fig_number) {
   g
 }
 
-#' @name plotting
+#' @name plot_burden_diagnostics
 #'
 #' @param burden_decades A `<tibble>` giving the burden by decade, up to
 #' `year_max`; expected to be the output of [prep_plot_burden_decades()].
@@ -182,7 +117,7 @@ plot_global_burden_decades <- function(burden_decades, fig_number) {
   g
 }
 
-#' @name plotting
+#' @name plot_burden_diagnostics
 #'
 #' @param burden_data This is expected to be a `<tibble>` from a
 #' nested-`<tibble>` constructed using [prep_plot_global_burden()].
@@ -226,7 +161,7 @@ plot_global_burden <- function(burden_data, outcome_name, fig_number) {
   g
 }
 
-#' @name plotting
+#' @name plot_burden_diagnostics
 #'
 #' @param coverage_set A `<tibble>` that is the output of
 #' [prep_plot_coverage_set()].
@@ -271,7 +206,7 @@ plot_coverage_set <- function(coverage_set, fig_number) {
   g
 }
 
-#' @name plotting
+#' @name plot_burden_diagnostics
 #'
 #' @param fvp_data A `<tibble>` of estimates of fully-vaccinated persons (FVPs)
 #' per scenario, with scenarios as factors in order of the number of
diff --git a/R/fn_plotting_helpers.R b/R/fn_plotting_helpers.R
new file mode 100644
index 0000000..4d14cce
--- /dev/null
+++ b/R/fn_plotting_helpers.R
@@ -0,0 +1,64 @@
+#' Plotting theme for vimcheck
+#'
+#' @description
+#' A simple plotting theme building on [ggplot2::theme_bw()].
+#'
+#' @name plotting_theme
+#' @rdname plotting_theme
+#'
+#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees.
+#'
+#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees.
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to
+#' [ggplot2::theme()]. These will be applied in addition to, or in place of,
+#' pre-existing elements defined by this theme. See the examples for this
+#' theme's appearance.
+#'
+#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects.
+#'
+#' @keywords plotting
+#'
+#' @examples
+#' # using an inbuilt dataset
+#' data(mtcars)
+#'
+#' # standard theme
+#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) +
+#'   ggplot2::geom_point() +
+#'   theme_vimc()
+#'
+#' # with X-axis suppression
+#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) +
+#'   ggplot2::geom_point() +
+#'   theme_vimc_noxaxis()
+#'
+#' @export
+theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) {
+  ggplot2::theme_bw() +
+    ggplot2::theme(
+      axis.text.x = ggplot2::element_text(
+        size = 10,
+        angle = x_text_angle
+      ),
+      strip.text.y = ggplot2::element_text(
+        angle = y_text_angle
+      ),
+      plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"),
+      ...
+    )
+}
+
+#' @name plotting_theme
+#'
+#' @importFrom ggplot2 '%+replace%'
+#'
+#' @export
+theme_vimc_noxaxis <- function() {
+  theme_vimc() %+replace%
+    ggplot2::theme(
+      axis.title.x = ggplot2::element_blank(),
+      axis.text.x = ggplot2::element_blank(),
+      axis.ticks.x = ggplot2::element_blank()
+    )
+}
diff --git a/R/fn_plotting_prep_bur_diag.R b/R/fn_plotting_prep_bur_diag.R
index 277c083..b76b29f 100644
--- a/R/fn_plotting_prep_bur_diag.R
+++ b/R/fn_plotting_prep_bur_diag.R
@@ -1,12 +1,13 @@
 #' Prepare data for plotting
 #'
-#' @name plotting_prep
-#' @rdname plotting_prep
+#' @name plot_prep_burden_diagnostics
+#' @rdname plot_prep_burden_diagnostics
 #'
 #' @description
 #' Transform burden estimate data from modelling groups to make them suitable
-#' for plotting using an appropriate [plotting function][plotting]. Each
-#' preparation function corresponds to a plotting function.
+#' for plotting using an appropriate
+#' [plotting function][plot_prep_burden_diagnostics]. Each preparation function
+#' corresponds to a plotting function.
 #'
 #' @param burden For `prep_plot_demography()`, a `<tibble>` output from
 #' [check_demography_alignment()].
@@ -31,6 +32,8 @@
 #'
 #' - For `prep_plot_fvp()`: WIP.
 #'
+#' @keywords plot_prep_burden_diagnostics
+#'
 #' @export
 prep_plot_demography <- function(burden) {
   checkmate::assert_tibble(burden)
@@ -88,7 +91,9 @@ prep_plot_demography <- function(burden) {
   burden_long
 }
 
-#' @name plotting_prep
+#' @name plot_prep_burden_diagnostics
+#'
+#' @keywords plot_prep_burden_diagnostics
 #'
 #' @export
 prep_plot_age <- function(burden) {
@@ -109,11 +114,13 @@ prep_plot_age <- function(burden) {
   burden_summary
 }
 
-#' @name plotting_prep
+#' @name plot_prep_burden_diagnostics
 #'
 #' @param year_max The maximum year to be represented in a subsequent figure.
 #' For `prep_plot_burden_decades()`, must be a decade, i.e., multiple of 10.
 #'
+#' @keywords plot_prep_burden_diagnostics
+#'
 #' @export
 prep_plot_burden_decades <- function(burden, year_max) {
   checkmate::assert_tibble(burden)
@@ -158,7 +165,9 @@ prep_plot_burden_decades <- function(burden, year_max) {
   burden_data
 }
 
-#' @name plotting_prep
+#' @name plot_prep_burden_diagnostics
+#'
+#' @keywords plot_prep_burden_diagnostics
 #'
 #' @export
 prep_plot_global_burden <- function(burden) {
@@ -184,10 +193,12 @@ prep_plot_global_burden <- function(burden) {
   burden_nested
 }
 
-#' @name plotting_prep
+#' @name plot_prep_burden_diagnostics
 #'
 #' @param coverage WIP. Coverage data.
 #'
+#' @keywords plot_prep_burden_diagnostics
+#'
 #' @export
 prep_plot_coverage_set <- function(coverage) {
   checkmate::assert_tibble(coverage)
@@ -245,12 +256,14 @@ prep_plot_coverage_set <- function(coverage) {
   coverage_set
 }
 
-#' @name plotting_prep
+#' @name plot_prep_burden_diagnostics
 #'
 #' @param fvp WIP. Data on counts of fully vaccinated persons.
 #'
 #' @param year_min Minimum year.
 #'
+#' @keywords plot_prep_burden_diagnostics
+#'
 #' @export
 prep_plot_fvp <- function(fvp, year_min, year_max) {
   checkmate::assert_tibble(fvp)
diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R
index 9f25309..fbcc3fb 100644
--- a/R/fn_pressure_testing.R
+++ b/R/fn_pressure_testing.R
@@ -13,7 +13,7 @@
 #' @param threshold A six-digit number that is checked as a valid touchstone
 #' identifier (YYYYMM format) using [validate_ts_year()].
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @return A filtered `<data.frame>`.
 #'
@@ -207,7 +207,7 @@ filter_invalid_trajectories <- function(
 #' @return A list of data.frames of differences between `prev_df` and `curr_df`,
 #' with one list element per element of `interest_cols`.
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @export
 generate_diffs <- function(
@@ -272,7 +272,7 @@ generate_diffs <- function(
 
 #' Generate IQR for key outcomes
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @param df A data.frame of impact estimates.
 #'
@@ -358,7 +358,7 @@ gen_national_iqr <- function(
 #' @return A filtered data.frame of differences in impact estimates flagged
 #' as too large. Rows with differences within tolerance are removed.
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @export
 flag_large_diffs <- function(
@@ -477,7 +477,7 @@ flag_large_diffs <- function(
 #' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns
 #' are disambiguated with the suffixes `"_old"` and `"_new"`.
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @export
 gen_combined_df <- function(
@@ -537,7 +537,7 @@ gen_combined_df <- function(
 #' @return A data.frame of sub-regional vaccination impact estimates where the
 #' impact is considered to be outside the tolerance limit.
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @export
 compare_natl_subreg <- function(
@@ -657,7 +657,7 @@ compare_natl_subreg <- function(
 #' @return None. Called for the convenience side-effect of saving data.frames as
 #' `.Rds` format.
 #'
-#' @keywords pressure_testing
+#' @keywords impact_diagnostics
 #'
 #' @export
 save_outputs <- function(
diff --git a/_pkgdown.yml b/_pkgdown.yml
index ae53f13..40a29ee 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -7,32 +7,44 @@ reference:
   - title: Package-level documentation
     contents:
     - has_keyword("package_doc")
+  
   - title: Burden estimate diagnostics
-    desc: Functions to pressure-test burden estimates.
+
+  - subtitle: Check burden estimates
+    contents:
+    - has_keyword("burden_diagnostics")
+  - subtitle: Prepare burden estimates for plotting
+    contents:
+    - plot_prep_burden_diagnostics
+  - subtitle: Plot burden estimates
     contents:
-    - has_keyword("diagnostics")
+    - plot_burden_diagnostics
+
+    
   - title: Impact estimate diagnostics
-    desc: Functions to pressure-test impact estimates.
+    
+  - subtitle: Check impact estimates
+    contents:
+    - has_keyword("impact_diagnostics")
+  - subtitle: Prepare impact estimate checks for plotting
     contents:
-    - has_keyword("pressure_testing")
-  - title: Plotting prepartion
-    desc: Prepare validated data for plotting.
+    - plot_prep_impact_diagnostics
+  - subtitle: Plot impact estimates
     contents:
-    - plotting_prep
-  - title: Plotting functions
-    desc: Package plotting functions.
+    - plot_impact_diagnostics
+
+  - title: Plotting helper functions
     contents:
-    - plotting
     - plotting_theme
+  
   - title: Internal functions
-    desc: Internal helper functions.
     contents:
     - has_keyword("internal")
-  - title: Data
-    desc: Package data.
+  
+  - title: Package data
     contents:
     - has_keyword("data")
-  - title: Constants
-    desc: Package constants.
+  
+  - title: Package constants
     contents:
     - has_keyword("constants")

From bba30c5b5f66f90613a585b607eefbc384a66bd4 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Tue, 21 Apr 2026 10:27:57 +0100
Subject: [PATCH 12/17] WIP plotting and prep fns impact diagnostics

---
 ...ing.R => fn_plotting_impact_diagnostics.R} | 24 +++++++++++++-----
 ... => fn_plotting_prep_impact_diagnostics.R} | 25 +++++++++++++++++++
 2 files changed, 43 insertions(+), 6 deletions(-)
 rename R/{fn_plotting_pressure_testing.R => fn_plotting_impact_diagnostics.R} (90%)
 rename R/{fn_plotting_prep_pres_test.R => fn_plotting_prep_impact_diagnostics.R} (85%)

diff --git a/R/fn_plotting_pressure_testing.R b/R/fn_plotting_impact_diagnostics.R
similarity index 90%
rename from R/fn_plotting_pressure_testing.R
rename to R/fn_plotting_impact_diagnostics.R
index 1bb3da4..66477a8 100644
--- a/R/fn_plotting_pressure_testing.R
+++ b/R/fn_plotting_impact_diagnostics.R
@@ -1,11 +1,12 @@
 #' Plot significant changes
 #'
+#' @name plot_impact_diagnostics
+#' @rdname plot_impact_diagnostics
+#'
 #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid
 #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars
 #' labeller label_wrap_gen
 #'
-#' @keywords internal
-#'
 #' @export
 plot_sig_diff <- function(df, outcome) {
   # retained here as this is a small df and a small operation
@@ -27,6 +28,9 @@ plot_sig_diff <- function(df, outcome) {
     theme_vimc(x_text_angle = 0)
 }
 
+#' @name plot_impact_diagnostics
+#'
+#' @export
 plot_diff <- function(
   combined,
   variable,
@@ -77,6 +81,9 @@ plot_diff <- function(
   return(p)
 }
 
+#' @name plot_impact_diagnostics
+#'
+#' @export
 plot_modelling_group_variation <- function(df, outcome) {
   ggplot(df) +
     aes(
@@ -111,9 +118,11 @@ plot_modelling_group_variation <- function(df, outcome) {
     )
 }
 
-
-#' Gavi plot - future deaths and DALYS averted, 2021-2024
-#' (current time window Gavi looking at, can be amended)
+# Gavi plot - future deaths and DALYS averted, 2021-2024
+# (current time window Gavi looking at, can be amended)
+#' @name plot_impact_diagnostics
+#'
+#' @export
 plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
   ggplot(
     df,
@@ -139,7 +148,10 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
     labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year")
 }
 
-### Gavi Cumulative Plot (modelling group + average)
+# Gavi Cumulative Plot (modelling group + average)
+#' @name plot_impact_diagnostics
+#'
+#' @export
 plot_cumul <- function(df, outcome, disease_filter) {
   p <- ggplot(
     df,
diff --git a/R/fn_plotting_prep_pres_test.R b/R/fn_plotting_prep_impact_diagnostics.R
similarity index 85%
rename from R/fn_plotting_prep_pres_test.R
rename to R/fn_plotting_prep_impact_diagnostics.R
index d1959dd..ca5328b 100644
--- a/R/fn_plotting_prep_pres_test.R
+++ b/R/fn_plotting_prep_impact_diagnostics.R
@@ -1,3 +1,18 @@
+#' Prepare impact diagnostics for plotting
+#'
+#' @name plot_prep_impact_diagnostics
+#' @rdname plot_prep_impact_diagnostics
+#'
+#' @description
+#' A suite of helper functions that sit between impact diagnostics functions and
+#' plotting functions. These functions have some basic checks on the input data,
+#' but otherwise assume that users will not modify inputs.
+#'
+#' @param name description
+#'
+#' @return [prep_plot_mod_grp_varn()] returns a ... TODO add
+#'
+#' @export
 prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") {
   offset <- 1e-6
 
@@ -8,6 +23,11 @@ prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") {
     mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE))
 }
 
+#' @name plot_prep_impact_diagnostics
+#'
+#' @return [prep_plot_vax_gavi()] returns a ... TODO add
+#'
+#' @export
 prep_plot_vax_gavi <- function(
   df,
   prev_dat = NULL,
@@ -57,6 +77,11 @@ prep_plot_vax_gavi <- function(
   df_combined
 }
 
+#' @name plot_prep_impact_diagnostics
+#'
+#' @param df description
+#'
+#' @export
 prep_plot_cumul <- function(df, outcome, disease_filter) {
   outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
 

From a455c5ae37ced424d171c57bb5a8ae298425edc6 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Tue, 21 Apr 2026 16:31:42 +0100
Subject: [PATCH 13/17] Add constants

---
 R/constants.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/constants.R b/R/constants.R
index adb8175..0dc3a3f 100644
--- a/R/constants.R
+++ b/R/constants.R
@@ -81,6 +81,11 @@ COLNAMES_INTEREST_PRESSURE_TEST <- union(
   )
 )
 
+#' @name constants
+IMPACT_OUTCOMES <- c("deaths_averted", "dalys_averted")
+
+IMPACT_GROUP_VARS <- c("activity_type", "vaccine")
+
 #' @name constants
 EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE")
 
@@ -110,3 +115,6 @@ DEF_TOUCHSTONE_NEW <- "202310"
 
 #' @name constants
 DEF_TOUCHSTONE_OLD_OLD <- "202110"
+
+#' @name constants
+COLOUR_VIMC <- "#008080"

From beb2a0721f50069904f1f6e346132328b704bfff Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Tue, 21 Apr 2026 16:32:30 +0100
Subject: [PATCH 14/17] Update plot prep and plot fns

---
 R/fn_plotting_impact_diagnostics.R      | 178 ++++++++----
 R/fn_plotting_prep_impact_diagnostics.R | 372 +++++++++++++++---------
 2 files changed, 359 insertions(+), 191 deletions(-)

diff --git a/R/fn_plotting_impact_diagnostics.R b/R/fn_plotting_impact_diagnostics.R
index 66477a8..bc07b25 100644
--- a/R/fn_plotting_impact_diagnostics.R
+++ b/R/fn_plotting_impact_diagnostics.R
@@ -1,21 +1,45 @@
-#' Plot significant changes
+#' Create impact diagnostics plots
 #'
 #' @name plot_impact_diagnostics
 #' @rdname plot_impact_diagnostics
 #'
 #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid
 #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars
-#' labeller label_wrap_gen
+#' labeller label_wrap_gen theme geom_segment geom_point
+#'
+#' @importFrom rlang .data
+#'
+#' @description
+#' Plotting functions for impact diagnostics. See
+#' [plotting-preparation functions][plot_prep_impact_diagnostics] for a set of
+#' helper functions that prepare impact diagnostics for plotting.
+#'
+#' @param data A data.frame that gives the
+#'
+#' @param outcome
 #'
 #' @export
-plot_sig_diff <- function(df, outcome) {
+plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) {
+  checkmate::assert_tibble(data)
+  outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES)
+
   # retained here as this is a small df and a small operation
-  df$label <- glue::glue(
+  data$label <- glue::glue(
     "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}"
   )
 
-  ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) +
-    geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) +
+  ggplot(
+    df,
+    aes(
+      .data$diff,
+      reorder(.data$label, .data$diff),
+      color = .data$modelling_group
+    )
+  ) +
+    geom_segment(
+      aes(x = 0, xend = .data$diff, y = .data$label, yend = .data$label),
+      size = 1
+    ) +
     geom_point(size = 2) +
     labs(
       x = "Difference",
@@ -30,25 +54,53 @@ plot_sig_diff <- function(df, outcome) {
 
 #' @name plot_impact_diagnostics
 #'
+#' @param group_vars A single string for the grouping variables. May be any of
+#' `"activity_type"` and `"vaccine"`.
+#'
+#' @param touchstone_old A string for the previous touchstone in
+#' format `"YYYYMM"`.
+#'
+#' @param touchstone_new A string for the current or new touchstone in
+#' format `"YYYYMM"`.
+#'
 #' @export
 plot_diff <- function(
-  combined,
-  variable,
-  group_vars = c("activity_type", "vaccine")
+  data,
+  outcome = IMPACT_OUTCOMES,
+  group_vars = IMPACT_GROUP_VARS,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
 ) {
-  x_var <- paste0(variable, "_new")
-  y_var <- paste0(variable, "_old")
-  x_sym <- rlang::sym(x_var)
-  y_sym <- rlang::sym(y_var)
+  checkmate::assert_tibble(data)
+  outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES)
+  group_vars <- rlang::arg_match(group_vars, IMPACT_GROUP_VARS, multiple = TRUE)
+
+  touchstone_old <- validate_ts_year(touchstone_old)
+  touchstone_new <- validate_ts_year(touchstone_new)
 
-  combined <- combined %>%
-    filter(!is.na(!!x_sym) & !is.na(!!y_sym))
+  x_var <- glue::glue("{outcome}_new")
+  y_var <- glue::glue("{outcome}_old")
 
-  n_facets <- combined %>%
-    distinct(activity_type, vaccine) %>%
-    nrow()
+  # small operations retained
+  combined <- dplyr::filter(
+    combined,
+    dplyr::filter(
+      !is.na({{ x_var }}),
+      !is.na({{ y_var }})
+    )
+  )
 
-  ncol_dynamic <- case_when(
+  # nolint start
+  n_facets <- nrow(
+    dplyr::distinct(
+      combined,
+      .data$activity_type,
+      .data$vaccine
+    )
+  )
+  # nolint end
+
+  ncol_dynamic <- dplyr::case_when(
     n_facets <= 4 ~ 2,
     n_facets <= 9 ~ 3,
     n_facets <= 16 ~ 4,
@@ -56,58 +108,62 @@ plot_diff <- function(
     TRUE ~ 8
   )
 
-  p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) +
-    geom_point(alpha = 0.5, colour = "#008080") +
-    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
+  p <- ggplot(
+    combined,
+    aes({{ x_var }}, {{ y_var }})
+  ) +
+    ggplot2::geom_point(alpha = 0.5, colour = COLOUR_VIMC) +
+    ggplot2::geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
     facet_wrap(
-      ~ activity_type + vaccine,
+      facets = c("activity_type", "vaccine,"),
       scales = "free",
       ncol = ncol_dynamic
     ) +
-    scale_x_log10() +
-    scale_y_log10() +
-    theme_bw() +
+    ggplot2::scale_x_log10() +
+    ggplot2::scale_y_log10() +
+    # TODO: check if VIMC theme okay here
+    theme_vimc() +
     theme(
-      strip.text = element_text(size = 7),
-      panel.spacing = unit(0.05, "lines"),
-      axis.text = element_text(size = 6.5)
+      strip.text = ggplot2::element_text(size = 7),
+      panel.spacing = ggplot2::unit(0.05, "lines"),
+      axis.text = ggplot2::element_text(size = 6.5)
     ) +
     labs(
-      title = glue("{variable}: Current vs Previous Report"),
-      x = glue("{new} - {variable}"),
-      y = glue("{old} - {variable}")
+      title = glue::glue("{variable}: Current vs Previous Report"),
+      x = glue::glue("{new} - {variable}"),
+      y = glue::glue("{old} - {variable}")
     )
 
-  return(p)
+  p
 }
 
 #' @name plot_impact_diagnostics
 #'
 #' @export
-plot_modelling_group_variation <- function(df, outcome) {
-  ggplot(df) +
+plot_modelling_group_variation <- function(data, outcome) {
+  ggplot(data) +
     aes(
-      fill = as.character(mod_num),
-      x = adj_outc,
-      y = reorder(vaccine, mean_outc)
+      fill = as.character(.data$mod_num),
+      x = .data$adj_outc,
+      y = reorder(.data$vaccine, .data$mean_outc)
     ) +
-    geom_density_ridges(
+    ggridges::geom_density_ridges(
       alpha = 0.5,
       stat = "binline",
       bins = 200,
       draw_baseline = FALSE
     ) +
-    facet_grid(. ~ activity_type, scales = "fixed") +
-    theme_bw() +
+    facet_grid(cols = ggplot2::vars("activity_type"), scales = "fixed") +
+    theme_vimc +
     theme(
       legend.position = "none",
-      axis.text.x = element_text(angle = 90, hjust = 1)
+      axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)
     ) +
-    scale_x_log10(
+    ggplot2::scale_x_log10(
       breaks = scales::trans_breaks("log10", function(x) 10^x),
-      labels = scales::trans_format("log10", math_format(10^.x))
+      labels = scales::trans_format("log10", scales::math_format(10^.x))
     ) +
-    scale_fill_viridis_d() +
+    ggplot2::scale_fill_viridis_d() +
     labs(
       x = paste0(
         "Burden averted (",
@@ -127,13 +183,13 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
   ggplot(
     df,
     aes(
-      x = reorder(disease, yearly_outcome),
-      y = yearly_outcome,
-      fill = factor(year)
+      x = reorder(.data$disease, .data$yearly_outcome),
+      y = .data$yearly_outcome,
+      fill = factor(.data$year)
     )
   ) +
     geom_col(position = "dodge") +
-    scale_fill_manual(
+    ggplot2::scale_fill_manual(
       values = c(
         "2021" = "#008080",
         "2022" = "#E68424",
@@ -143,8 +199,8 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
     ) +
     facet_wrap(~dataset, scales = "free_y") +
     scale_y_continuous(labels = scales::scientific) +
-    theme_bw() +
-    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+    theme_vimc() +
+    theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
     labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year")
 }
 
@@ -156,17 +212,19 @@ plot_cumul <- function(df, outcome, disease_filter) {
   p <- ggplot(
     df,
     aes(
-      x = year,
-      y = value,
-      color = modelling_group,
-      linetype = line_type
+      x = .data$year,
+      y = .data$value,
+      color = .data$modelling_group,
+      linetype = .data$line_type
     )
   ) +
-    geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) +
-    scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) +
-    guides(linetype = "none") +
+    ggplot2::geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) +
+    ggplot2::scale_linetype_manual(
+      values = c(solid = "solid", dashed = "dashed")
+    ) +
+    ggplot2::guides(linetype = "none") +
     scale_y_continuous(labels = scales::scientific) +
-    theme_minimal() +
+    theme_vimc() +
     labs(
       x = "Year",
       y = paste("Cumulative", outcome),
@@ -175,5 +233,5 @@ plot_cumul <- function(df, outcome, disease_filter) {
     ) +
     theme(legend.position = "bottom")
 
-  return(p)
+  p
 }
diff --git a/R/fn_plotting_prep_impact_diagnostics.R b/R/fn_plotting_prep_impact_diagnostics.R
index ca5328b..b999d33 100644
--- a/R/fn_plotting_prep_impact_diagnostics.R
+++ b/R/fn_plotting_prep_impact_diagnostics.R
@@ -8,70 +8,137 @@
 #' plotting functions. These functions have some basic checks on the input data,
 #' but otherwise assume that users will not modify inputs.
 #'
-#' @param name description
+#' @param data A data.frame of impact estimates.
 #'
-#' @return [prep_plot_mod_grp_varn()] returns a ... TODO add
+#' @param comparison A data.frame of impact estimates used as a comparator for
+#' `comparison`.
+#'
+#' @param outcome A string for the impact outcome; may be one of
+#' "deaths_averted" or "dalys_averted".
+#'
+#' @return
+#'
+#' - [prep_plot_mod_grp_varn()] returns a `<tibble>` TODO add
 #'
 #' @export
-prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") {
-  offset <- 1e-6
-
-  df2 %>%
-    left_join(df3, by = join_by(modelling_group, vaccine)) %>%
-    mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>%
-    group_by(vaccine) %>%
-    mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE))
+prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) {
+  # TODO: df2 and df3 need informative names
+
+  offset_manual <- 1e-6
+  df_combined <- dplyr::left_join(
+    df2,
+    df3,
+    by = c("modelling_group", "vaccine")
+  )
+
+  df_combined <- dplyr::mutate(
+    df_combined,
+    adj_outc = {{ outcome }} + offset_manual
+  )
+
+  df_combined <- dplyr::group_by(
+    df_combined,
+    .data$vaccine
+  )
+
+  df_combined <- dplyr::mutate(
+    df_combined,
+    mean_outc = stats::weighted.mean(.data$adj_outc, .data$fvps, na.rm = TRUE)
+  )
+
+  df_combined
 }
 
 #' @name plot_prep_impact_diagnostics
 #'
+#' @param data description
+#'
+#' @param outcome
+#'
+#' @param disease
+#'
+#' @param touchstone_old
+#'
+#' @param touchstone_new
+#'
 #' @return [prep_plot_vax_gavi()] returns a ... TODO add
 #'
 #' @export
 prep_plot_vax_gavi <- function(
-  df,
-  prev_dat = NULL,
-  outcome = "deaths_averted"
+  data,
+  prev_data,
+  outcome = IMPACT_OUTCOMES,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
 ) {
-  df_cur <- df %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
-      .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(new))
-
-  df_prev <- prev_dat %>%
-    select(all_of(key_cols), !!outcome) %>%
-    filter(year >= 2021, year <= 2024, disease != "COVID") %>%
-    group_by(disease, year) %>%
-    summarise(
-      yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
-      .groups = "drop"
-    ) %>%
-    mutate(dataset = as.character(old))
-
-  df_combined <- bind_rows(df_cur, df_prev)
-
-  df_diff <- df_cur %>%
-    left_join(
-      df_prev,
-      by = c("disease", "year"),
-      suffix = c("_curr", "_prev")
-    ) %>%
-    mutate(
-      yearly_outcome = yearly_outcome_curr - yearly_outcome_prev,
-      dataset = "Difference"
-    ) %>%
-    select(disease, year, yearly_outcome, dataset)
-
-  df_combined <- bind_rows(df_combined, df_diff)
+  checkmate::assert_tibble(data)
+  checkmate::assert_tibble(prev_data)
+  outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES)
+  touchstone_old <- validate_ts_year(touchstone_old)
+  touchstone_new <- validate_ts_year(touchstone_new)
+
+  df_list <- Map(
+    list(data, prev_data),
+    list(touchstone_new, touchstone_old),
+    f = function(df, ts_id) {
+      df <- dplyr::select(
+        df,
+        dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST),
+        {{ outcome }}
+      )
+
+      df <- dplyr::filter(
+        df,
+        dplyr::between(.data$year, 2021, 2024),
+        Negate(grepl("COVID", .data$disease, ignore.case = TRUE))
+      )
+
+      df <- dplyr::group_by(df, .data$disease, .data$year)
+
+      df <- dplyr::summarise(
+        df,
+        yearly_outcome = sum({{ outcome }}, na.rm = TRUE),
+        .groups = "drop"
+      )
+
+      df <- dplyr::mutate(
+        df,
+        dataset = as.character(ts_id)
+      )
+    }
+  )
+
+  df_combined <- dplyr::bind_rows(df_list)
+
+  df_diff <- Reduce(
+    df_list,
+    f = function(x, y) {
+      dplyr::left_join(
+        x,
+        y,
+        by = c("disease", "year"),
+        suffix = c("_curr", "_prev")
+      )
+    }
+  )
+
+  df_diff <- dplyr::mutate(
+    df_diff,
+    yearly_outcome = .data$yearly_outcome_curr - .data$yearly_outcome_prev,
+    dataset = "Difference"
+  )
+  cols_to_select <- c("disease", "year", "yearly_outcome", "dataset")
+  df_diff <- dplyr::select(df_diff, {{ cols_to_select }})
+
+  df_combined <- dplyr::bind_rows(df_combined, df_diff)
 
   df_combined$dataset <- factor(
     df_combined$dataset,
-    levels = c(as.character(old), "Difference", as.character(new))
+    levels = c(
+      as.character(touchstone_old),
+      "Difference",
+      as.character(touchstone_new)
+    )
   )
 
   df_combined
@@ -79,99 +146,142 @@ prep_plot_vax_gavi <- function(
 
 #' @name plot_prep_impact_diagnostics
 #'
-#' @param df description
+#' @param data description
+#'
+#' @param outcome
+#'
+#' @param disease
+#'
+#' @param touchstone_old
+#'
+#' @param touchstone_new
 #'
 #' @export
-prep_plot_cumul <- function(df, outcome, disease_filter) {
-  outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))]
-
-  outcome_sym <- sym(outcome)
-  cum_col <- paste0("cum_", outcome)
-  avg_col <- paste0("avg_", outcome)
-
-  col_old <- paste0(outcome, "_old")
-  col_new <- paste0(outcome, "_new")
-
-  combined2 <- df %>%
-    select(
-      country,
-      country_name,
-      disease,
-      vaccine,
-      activity_type,
-      year,
-      modelling_group,
-      all_of(outcome_cols)
-    ) %>%
-    pivot_longer(
-      cols = all_of(outcome_cols),
-      names_to = "touchstone",
-      values_to = "value"
-    ) %>%
-    mutate(
-      touchstone = str_remove(touchstone, paste0("^", outcome, "_")),
-      touchstone = recode(
-        touchstone,
-        "old" = as.character(old),
-        "new" = as.character(new),
-        .default = touchstone
-      ),
-      touchstone = factor(
-        touchstone,
-        levels = c(as.character(old), as.character(new))
-      )
+prep_plot_cumul <- function(
+  data,
+  outcome,
+  disease,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
+) {
+  outcome_cols <- colnames(data)[stringr::str_detect(
+    colnames(data),
+    glue::glue("^{outcome}_")
+  )]
+
+  cum_col <- glue::glue("cum_{outcome}")
+  avg_col <- glue::glue("avg_{outcome}")
+
+  combined2 <- dplyr::select(
+    data,
+    {{ COLNAMES_KEY_PRESSURE_TEST }},
+    {{ outcome_cols }}
+  )
+
+  combined2 <- tidyr::pivot_longer(
+    combined2,
+    cols = dplyr::all_of(outcome_cols),
+    names_to = "touchstone",
+    values_to = "value"
+  )
+
+  combined2 <- dplyr::mutate(
+    combined2,
+    touchstone = stringr::str_remove(
+      .data$touchstone,
+      glue::glue("^{outcome}_")
+    ),
+    touchstone = dplyr::replace_values(
+      .data$touchstone,
+      c("old", "new"),
+      as.character(c(touchstone_old, touchstone_new)),
+      .default = .data$touchstone
+    ),
+    touchstone = factor(
+      .data$touchstone,
+      levels = as.character(c(touchstone_old, touchstone_new))
     )
+  )
+
   # Cumulative values by modelling group
-  df_cum <- combined2 %>%
-    filter(disease == disease_filter) %>%
-    group_by(modelling_group, touchstone) %>%
-    complete(year = full_seq(year, 1)) %>%
-    arrange(year) %>%
-    mutate(
-      first_valid = min(year[!is.na(value)]),
-      !!cum_col := ifelse(
-        year < first_valid,
-        NA,
-        cumsum(replace_na(value, 0))
-      )
-    ) %>%
-    select(-first_valid) %>%
-    ungroup() %>%
-    mutate(modelling_group = paste(modelling_group, touchstone, sep = "-"))
+  df_cum <- dplyr::filter(combined2, .data$disease == disease)
+  df_cum <- dplyr::group_by(
+    df_cum,
+    .data$modelling_group,
+    .data$touchstone
+  )
+  df_cum <- tidyr::complete(
+    df_cum,
+    year = tidyr::full_seq(.data$year, 1)
+  )
+  df_cum <- dplyr::arrange(df_cum, .data$year)
+  df_cum <- dplyr::mutate(
+    df_cum,
+    first_valid = min(.data$year[!is.na(data$value)]),
+    {{ cum_col }} := dplyr::if_else(
+      .data$year < .data$first_valid,
+      NA_real_,
+      cumsum(tidyr::replace_na(.data$value, 0.0))
+    )
+  )
+
+  df_cum$first_valid <- NULL
+  df_cum <- dplyr::ungroup(df_cum)
+  df_cum <- dplyr::mutate(
+    df_cum,
+    modelling_group = glue::glue("{.data$modelling_group}-{.data$touchstone}")
+  )
 
   # Model average
-  df_avg <- df_cum %>%
-    group_by(year, touchstone) %>%
-    summarise(
-      !!avg_col := mean(!!sym(cum_col), na.rm = TRUE),
-      n_models = sum(!is.na(!!sym(cum_col))),
-      .groups = "drop"
-    ) %>%
-    filter(n_models >= 1) %>%
-    mutate(modelling_group = paste("Model Average", touchstone, sep = "-"))
+  df_avg <- dplyr::summarise(
+    df_cum,
+    {{ avg_col }} := mean({{ cum_col }}, na.rm = TRUE),
+    n_models = sum(!is.na({{ cum_col }})),
+    .groups = c("year", "touchstone")
+  )
+  df_avg <- dplyr::filter(
+    df_avg,
+    .data$n_models >= 1
+  )
+  df_avg <- dplyr::mutate(
+    df_avg,
+    modelling_group = glue::glue(
+      "Model Average-{.data$touchstone}"
+    )
+  )
 
   # Combine for plot
-  df_plot <- bind_rows(
-    df_cum %>%
-      select(year, modelling_group, touchstone, value = !!sym(cum_col)),
-    df_avg %>%
-      select(year, modelling_group, touchstone, value = !!sym(avg_col))
-  )
-
-  df_plot <- df_plot %>%
-    group_by(modelling_group) %>%
-    filter(sum(value, na.rm = TRUE) > 0) %>%
-    ungroup() %>%
-    mutate(
-      line_type = ifelse(
-        grepl("Model Average", modelling_group),
-        "dashed",
-        "solid"
-      )
+  cols_to_select <- c("year", "modelling_group", "touchstone")
+  df_plot <- dplyr::bind_rows(
+    dplyr::select(
+      df_cum,
+      {{ cols_to_select }},
+      value = {{ cum_col }}
+    ),
+    dplyr::select(
+      df_avg,
+      {{ cols_to_select }},
+      value = {{ avg_col }}
+    )
+  )
+
+  df_plot <- dplyr::group_by(df_plot, .data$modelling_group)
+  df_plot <- dplyr::filter(
+    df_plot,
+    sum(.data$value, na.rm = TRUE) > 0
+  )
+  df_plot <- dplyr::ungroup(df_plot)
+  df_plot <- dplyr::mutate(
+    df_plot,
+    line_type = dplyr::if_else(
+      grepl("Model Average", .data$modelling_group, fixed = TRUE),
+      "dashed",
+      "solid"
     )
+  )
 
   if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
-    message("No non-zero data to plot for ", disease_filter, ". Skipping plot.")
+    message("No non-zero data to plot for ", disease, ". Skipping plot.")
     return(NULL)
   }
 

From 0424f171c438e63ae256283c71e455e004259b56 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Wed, 22 Apr 2026 15:55:43 +0100
Subject: [PATCH 15/17] Fixes from initial test run

---
 R/constants.R                                 |  97 +++++++++++++
 R/fn_helpers.R                                |  11 +-
 R/fn_plotting_burden_diagnostics.R            |   2 +-
 R/fn_plotting_impact_diagnostics.R            | 133 ++++++++++++------
 R/fn_plotting_prep_impact_diagnostics.R       | 128 +++++++++++------
 R/fn_pressure_testing.R                       |  87 ++++++++----
 inst/WORDLIST                                 |   5 +-
 man/basic_burden_sanity.Rd                    |   2 +-
 man/check_demography_alignment.Rd             |   2 +-
 man/compare_natl_subreg.Rd                    |   2 +-
 man/constants.Rd                              |  53 ++++++-
 man/flag_large_diffs.Rd                       |   4 +-
 man/gen_combined_df.Rd                        |   2 +-
 man/gen_national_iqr.Rd                       |   2 +-
 man/generate_diffs.Rd                         |   2 +-
 ...plotting.Rd => plot_burden_diagnostics.Rd} |   8 +-
 man/plot_impact_diagnostics.Rd                |  66 +++++++++
 ...rep.Rd => plot_prep_burden_diagnostics.Rd} |  10 +-
 man/plot_prep_impact_diagnostics.Rd           |  82 +++++++++++
 man/plotting_theme.Rd                         |   2 +-
 man/pres_test_filter_data.Rd                  |   9 +-
 man/save_outputs.Rd                           |   4 +-
 man/significant_diff_plot.Rd                  |  12 --
 man/validate_complete_incoming_files.Rd       |   2 +-
 man/validate_file_dict_template.Rd            |   2 +-
 man/validate_template_alignment.Rd            |   2 +-
 26 files changed, 578 insertions(+), 153 deletions(-)
 rename man/{plotting.Rd => plot_burden_diagnostics.Rd} (91%)
 create mode 100644 man/plot_impact_diagnostics.Rd
 rename man/{plotting_prep.Rd => plot_prep_burden_diagnostics.Rd} (87%)
 create mode 100644 man/plot_prep_impact_diagnostics.Rd
 delete mode 100644 man/significant_diff_plot.Rd

diff --git a/R/constants.R b/R/constants.R
index 0dc3a3f..e4c3532 100644
--- a/R/constants.R
+++ b/R/constants.R
@@ -1,10 +1,17 @@
 #' Package constants
 #'
+#' @description
+#' Constant values used in _vimcheck_. See the **Examples** section for the
+#' constant values.
+#'
 #' @name constants
 #' @rdname constants
 #'
 #' @keywords constants
 #'
+#' @examples
+#' file_dict_colnames
+#'
 #' @export
 file_dict_colnames <- c(
   "scenario_type",
@@ -15,6 +22,11 @@ file_dict_colnames <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' scenario_data_colnames
+#'
+#' @export
 scenario_data_colnames <- c(
   "scenario_type",
   "scenario_type_description",
@@ -23,6 +35,11 @@ scenario_data_colnames <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' burden_outcome_names
+#'
+#' @export
 burden_outcome_names <- c(
   "cases",
   "deaths",
@@ -37,6 +54,11 @@ burden_outcome_names <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' colnames_plot_demog_compare
+#'
+#' @export
 colnames_plot_demog_compare <- c(
   "variable",
   "scenario",
@@ -48,6 +70,11 @@ colnames_plot_demog_compare <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' colnames_df_missing_cols
+#'
+#' @export
 colnames_df_missing_cols <- c(
   "country_name",
   "vaccine",
@@ -57,6 +84,11 @@ colnames_df_missing_cols <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' COLNAMES_KEY_PRESSURE_TEST
+#'
+#' @export
 COLNAMES_KEY_PRESSURE_TEST <- c(
   "country",
   "country_name",
@@ -68,6 +100,11 @@ COLNAMES_KEY_PRESSURE_TEST <- c(
 )
 
 #' @name constants
+#'
+#' @examples
+#' COLNAMES_INTEREST_PRESSURE_TEST
+#'
+#' @export
 COLNAMES_INTEREST_PRESSURE_TEST <- union(
   COLNAMES_KEY_PRESSURE_TEST,
   c(
@@ -82,39 +119,99 @@ COLNAMES_INTEREST_PRESSURE_TEST <- union(
 )
 
 #' @name constants
+#'
+#' @examples
+#' IMPACT_OUTCOMES
+#'
+#' @export
 IMPACT_OUTCOMES <- c("deaths_averted", "dalys_averted")
 
 IMPACT_GROUP_VARS <- c("activity_type", "vaccine")
 
 #' @name constants
+#'
+#' @examples
+#' EXCLUDED_DISEASES
+#'
+#' @export
 EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE")
 
 #' @name constants
+#'
+#' @examples
+#' N_TS_MIN_CHARS
+#'
+#' @export
 N_TS_MIN_CHARS <- 6L
 
 #' @name constants
+#'
+#' @examples
+#' N_TS_YEAR_CHARS
+#'
+#' @export
 N_TS_YEAR_CHARS <- 4L
 
 #' @name constants
+#'
+#' @examples
+#' MIN_TS_YEAR
+#'
+#' @export
 MIN_TS_YEAR <- 2000
 
 #' @name constants
+#'
+#' @examples
+#' MAX_TS_YEAR
+#'
+#' @export
 MAX_TS_YEAR <- 2100
 
 #' @name constants
+#'
+#' @examples
+#' MIN_TS_MONTH
+#'
+#' @export
 MIN_TS_MONTH <- 1
 
 #' @name constants
+#'
+#' @examples
+#' MAX_TS_MONTH
+#'
+#' @export
 MAX_TS_MONTH <- 12
 
 #' @name constants
+#'
+#' @examples
+#' DEF_TOUCHSTONE_OLD
+#'
+#' @export
 DEF_TOUCHSTONE_OLD <- "201910"
 
 #' @name constants
+#'
+#' @examples
+#' DEF_TOUCHSTONE_NEW
+#'
+#' @export
 DEF_TOUCHSTONE_NEW <- "202310"
 
 #' @name constants
+#'
+#' @examples
+#' DEF_TOUCHSTONE_OLD_OLD
+#'
+#' @export
 DEF_TOUCHSTONE_OLD_OLD <- "202110"
 
 #' @name constants
+#'
+#' @examples
+#' COLOUR_VIMC
+#'
+#' @export
 COLOUR_VIMC <- "#008080"
diff --git a/R/fn_helpers.R b/R/fn_helpers.R
index 5159361..d3997f8 100644
--- a/R/fn_helpers.R
+++ b/R/fn_helpers.R
@@ -86,7 +86,8 @@ round_numeric <- function(df) {
   dplyr::mutate(
     df,
     dplyr::across(
-      .cols = dplyr::where(is.numeric) & !matches("year", ignore.case = TRUE),
+      .cols = dplyr::where(is.numeric) &
+        !dplyr::matches("year", ignore.case = TRUE),
       .fns = adaptive_round
     )
   )
@@ -106,10 +107,10 @@ validate_ts_year <- function(x) {
     min.chars = N_TS_MIN_CHARS
   )
   if (!has_n_chars) {
-    n_chars <- nchars(x)
+    n_chars <- nchar(x) # nolint used in cli
     cli::cli_abort(
-      "Touchstone year string should have at least {N_TS_MIN_CHARS}, but got \
-      {n_chars} characters."
+      "Touchstone year should be a string with at least {N_TS_MIN_CHARS} \
+      characters, but got class {.cls {class(x)}} with {n_chars} characters."
     )
   }
 
@@ -170,7 +171,7 @@ add_campaign_id <- function(df, key_cols) {
     must.include = key_cols
   )
   if (!has_cols) {
-    missing_cols <- setdiff(colnames(df), key_cols)
+    missing_cols <- setdiff(colnames(df), key_cols) # nolint used in cli
     cli::cli_abort(
       "Expected {.code df} to have columns {.str {key_cols}} but columns \
     {.str {missing_cols}} are missing."
diff --git a/R/fn_plotting_burden_diagnostics.R b/R/fn_plotting_burden_diagnostics.R
index 71b7d54..b9a30a6 100644
--- a/R/fn_plotting_burden_diagnostics.R
+++ b/R/fn_plotting_burden_diagnostics.R
@@ -10,7 +10,7 @@
 #' @importFrom rlang .data
 #'
 #' @description
-#' Plotting functions for burden and impact diagnostics. All functions operate
+#' Plotting functions for burden diagnostics. All functions operate
 #' on data prepared for plotting by a corresponding
 #' [plotting-preparation function][plot_prep_burden_diagnostics].
 #'
diff --git a/R/fn_plotting_impact_diagnostics.R b/R/fn_plotting_impact_diagnostics.R
index bc07b25..dfb1e8f 100644
--- a/R/fn_plotting_impact_diagnostics.R
+++ b/R/fn_plotting_impact_diagnostics.R
@@ -1,5 +1,11 @@
 #' Create impact diagnostics plots
 #'
+#' @description
+#' Functions that create impact diagnostics plots (or plotting objects). All
+#' functions are associated with one other upstream data processing function,
+#' and can be used in a pipe with that function. Where appropriate, outcome
+#' selection and label preparation is automated to reduce function arguments.
+#'
 #' @name plot_impact_diagnostics
 #' @rdname plot_impact_diagnostics
 #'
@@ -14,9 +20,26 @@
 #' [plotting-preparation functions][plot_prep_impact_diagnostics] for a set of
 #' helper functions that prepare impact diagnostics for plotting.
 #'
-#' @param data A data.frame that gives the
+#' @param data A data.frame suitable for plotting.
+#'
+#' - `plot_sig_diff()`: Output of
+#' [`flag_large_diff()`][plot_prep_impact_diagnostics].
+#'
+#' - `plot_diff()`: Output of
+#' [`gen_combined_df()`][plot_prep_impact_diagnostics].
+#'
+#' - `plot_modelling_group_variation()`: Output of
+#' [`plot_prep_mod_grp_varn()`][plot_prep_impact_diagnostics].
+#'
+#' - `plot_vaccine_gavi()`: Output of
+#' [`plot_prep_vax_gavi()`][plot_prep_impact_diagnostics]
+#'
+#' - `plot_cumul()`: Output of
+#' [`plot_prep_cumul()`][plot_prep_impact_diagnostics]
 #'
-#' @param outcome
+#' @param outcome A string for the impact outcome. One of [IMPACT_OUTCOMES].
+#'
+#' @return A `<ggplot2>` object that can be viewed or saved.
 #'
 #' @export
 plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) {
@@ -25,14 +48,14 @@ plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) {
 
   # retained here as this is a small df and a small operation
   data$label <- glue::glue(
-    "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}"
+    "{data$country_name} | {data$vaccine} | {data$activity_type} | {data$year}"
   )
 
   ggplot(
-    df,
+    data,
     aes(
       .data$diff,
-      reorder(.data$label, .data$diff),
+      stats::reorder(.data$label, .data$diff),
       color = .data$modelling_group
     )
   ) +
@@ -55,13 +78,13 @@ plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) {
 #' @name plot_impact_diagnostics
 #'
 #' @param group_vars A single string for the grouping variables. May be any of
-#' `"activity_type"` and `"vaccine"`.
+#' [IMPACT_OUTCOMES], which are `"activity_type"` and `"vaccine"`.
 #'
 #' @param touchstone_old A string for the previous touchstone in
-#' format `"YYYYMM"`.
+#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_OLD].
 #'
 #' @param touchstone_new A string for the current or new touchstone in
-#' format `"YYYYMM"`.
+#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_NEW].
 #'
 #' @export
 plot_diff <- function(
@@ -73,7 +96,10 @@ plot_diff <- function(
 ) {
   checkmate::assert_tibble(data)
   outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES)
-  group_vars <- rlang::arg_match(group_vars, IMPACT_GROUP_VARS, multiple = TRUE)
+  checkmate::assert_subset(
+    group_vars,
+    IMPACT_GROUP_VARS
+  )
 
   touchstone_old <- validate_ts_year(touchstone_old)
   touchstone_new <- validate_ts_year(touchstone_new)
@@ -82,18 +108,24 @@ plot_diff <- function(
   y_var <- glue::glue("{outcome}_old")
 
   # small operations retained
-  combined <- dplyr::filter(
-    combined,
-    dplyr::filter(
-      !is.na({{ x_var }}),
-      !is.na({{ y_var }})
+  # NOTE: data masking using `{{` does not appear to work
+  # see last example in https://dplyr.tidyverse.org/reference/filter.html
+  #
+  # NOTE: exclude values < 1 to prevent log transform errors
+  data <- dplyr::filter_out(
+    data,
+    dplyr::when_any(
+      is.na(.data[[x_var]]),
+      is.na(.data[[y_var]]),
+      .data[[x_var]] < 1,
+      .data[[y_var]] < 1
     )
   )
 
   # nolint start
   n_facets <- nrow(
     dplyr::distinct(
-      combined,
+      data,
       .data$activity_type,
       .data$vaccine
     )
@@ -109,29 +141,28 @@ plot_diff <- function(
   )
 
   p <- ggplot(
-    combined,
-    aes({{ x_var }}, {{ y_var }})
+    data,
+    aes(.data[[x_var]], .data[[y_var]])
   ) +
     ggplot2::geom_point(alpha = 0.5, colour = COLOUR_VIMC) +
     ggplot2::geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
     facet_wrap(
-      facets = c("activity_type", "vaccine,"),
+      facets = c("activity_type", "vaccine"),
       scales = "free",
       ncol = ncol_dynamic
     ) +
     ggplot2::scale_x_log10() +
     ggplot2::scale_y_log10() +
-    # TODO: check if VIMC theme okay here
-    theme_vimc() +
+    theme_vimc(0) +
     theme(
       strip.text = ggplot2::element_text(size = 7),
       panel.spacing = ggplot2::unit(0.05, "lines"),
       axis.text = ggplot2::element_text(size = 6.5)
     ) +
     labs(
-      title = glue::glue("{variable}: Current vs Previous Report"),
-      x = glue::glue("{new} - {variable}"),
-      y = glue::glue("{old} - {variable}")
+      title = glue::glue("{outcome}: Current vs Previous Report"),
+      x = glue::glue("{touchstone_new} - {outcome}"),
+      y = glue::glue("{touchstone_old} - {outcome}")
     )
 
   p
@@ -140,12 +171,29 @@ plot_diff <- function(
 #' @name plot_impact_diagnostics
 #'
 #' @export
-plot_modelling_group_variation <- function(data, outcome) {
+plot_modelling_group_variation <- function(data) {
+  checkmate::assert_tibble(data, min.rows = 1L, min.cols = 1L)
+
+  outcome <- unique(data[["outcome_name"]])
+  checkmate::assert_string(outcome)
+
+  outcome_short <- stringr::word(outcome, sep = "_")
+  outcome_short <- dplyr::if_else(
+    outcome_short == "dalys",
+    stringr::str_to_upper(outcome_short),
+    outcome_short
+  )
+  x_lab <- glue::glue("Burden averted ({outcome_short})")
+
+  # for scales formatting
+  .x <- NULL
+
+  # TODO: should NA-producing values (< 1) be removed?
   ggplot(data) +
     aes(
       fill = as.character(.data$mod_num),
       x = .data$adj_outc,
-      y = reorder(.data$vaccine, .data$mean_outc)
+      y = stats::reorder(.data$vaccine, .data$mean_outc)
     ) +
     ggridges::geom_density_ridges(
       alpha = 0.5,
@@ -154,22 +202,18 @@ plot_modelling_group_variation <- function(data, outcome) {
       draw_baseline = FALSE
     ) +
     facet_grid(cols = ggplot2::vars("activity_type"), scales = "fixed") +
-    theme_vimc +
-    theme(
-      legend.position = "none",
-      axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)
-    ) +
     ggplot2::scale_x_log10(
       breaks = scales::trans_breaks("log10", function(x) 10^x),
       labels = scales::trans_format("log10", scales::math_format(10^.x))
     ) +
     ggplot2::scale_fill_viridis_d() +
+    theme_vimc() +
+    theme(
+      legend.position = "none",
+      axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)
+    ) +
     labs(
-      x = paste0(
-        "Burden averted (",
-        ifelse(outcome == "dalys", "DALYs", outcome),
-        ")"
-      ),
+      x = x_lab,
       y = "Vaccine"
     )
 }
@@ -179,11 +223,14 @@ plot_modelling_group_variation <- function(data, outcome) {
 #' @name plot_impact_diagnostics
 #'
 #' @export
-plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
+plot_vaccine_gavi <- function(data) {
+  checkmate::assert_tibble(data)
+  outcome <- unique(data[["outcome_name"]])
+
   ggplot(
-    df,
+    data,
     aes(
-      x = reorder(.data$disease, .data$yearly_outcome),
+      x = stats::reorder(.data$disease, .data$yearly_outcome),
       y = .data$yearly_outcome,
       fill = factor(.data$year)
     )
@@ -208,9 +255,13 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") {
 #' @name plot_impact_diagnostics
 #'
 #' @export
-plot_cumul <- function(df, outcome, disease_filter) {
+plot_cumul <- function(data) {
+  checkmate::assert_tibble(data)
+  outcome <- unique(data[["outcome_name"]])
+  disease <- unique(data[["disease"]])
+
   p <- ggplot(
-    df,
+    data,
     aes(
       x = .data$year,
       y = .data$value,
@@ -229,7 +280,7 @@ plot_cumul <- function(df, outcome, disease_filter) {
       x = "Year",
       y = paste("Cumulative", outcome),
       color = "Modelling Group",
-      title = paste("Cumulative", outcome, "Over Time –", disease_filter)
+      title = paste("Cumulative", outcome, "Over Time -", disease)
     ) +
     theme(legend.position = "bottom")
 
diff --git a/R/fn_plotting_prep_impact_diagnostics.R b/R/fn_plotting_prep_impact_diagnostics.R
index b999d33..d529d7d 100644
--- a/R/fn_plotting_prep_impact_diagnostics.R
+++ b/R/fn_plotting_prep_impact_diagnostics.R
@@ -5,24 +5,69 @@
 #'
 #' @description
 #' A suite of helper functions that sit between impact diagnostics functions and
-#' plotting functions. These functions have some basic checks on the input data,
-#' but otherwise assume that users will not modify inputs.
+#' plotting functions. These functions transform and aggregate impact estimates
+#' to prepare them for visualisation. Functions have basic checks on input data
+#' but otherwise assume users will not modify inputs.
 #'
-#' @param data A data.frame of impact estimates.
+#' @param df2 A `<tibble>` of impact estimates with at least columns
+#' `modelling_group`, `vaccine`, outcome variable, and `fvps` (doses
+#' delivered). Used as the primary data source for calculations in
+#' [prep_plot_mod_grp_varn()].
 #'
-#' @param comparison A data.frame of impact estimates used as a comparator for
-#' `comparison`.
+#' @param df3 A `<tibble>` of modelling group and vaccine combinations,
+#' typically with one row per modelling group per vaccine. Joined with `df2`
+#' to ensure complete group coverage in [prep_plot_mod_grp_varn()].
 #'
-#' @param outcome A string for the impact outcome; may be one of
-#' "deaths_averted" or "dalys_averted".
+#' @param data A `<tibble>` of impact estimates with columns including at least
+#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and
+#' potentially outcome-specific columns (for [prep_plot_cumul()]). Used in
+#' [prep_plot_vax_gavi()] and [prep_plot_cumul()].
+#'
+#' @param prev_data A `<tibble>` of impact estimates from a previous touchstone,
+#' used as a comparison baseline in [prep_plot_vax_gavi()]. Should have the
+#' same structure as `data`.
+#'
+#' @param outcome A character string for the impact outcome. Must be one of
+#' `"deaths_averted"` or `"dalys_averted"`. For [prep_plot_cumul()],
+#' `data` must include columns named `{outcome}_old` and `{outcome}_new`.
+#'
+#' @param disease A character string specifying a single disease for filtering
+#' in [prep_plot_cumul()].
+#'
+#' @param touchstone_old A six-character touchstone identifier (YYYYMM format)
+#' for the previous dataset. Defaults to [DEF_TOUCHSTONE_OLD]. Used in
+#' [prep_plot_vax_gavi()] and [prep_plot_cumul()].
+#'
+#' @param touchstone_new A six-character touchstone identifier (YYYYMM format)
+#' for the current dataset. Defaults to [DEF_TOUCHSTONE_NEW]. Used in
+#' [prep_plot_vax_gavi()] and [prep_plot_cumul()].
+#'
+#' @importFrom rlang :=
 #'
 #' @return
 #'
-#' - [prep_plot_mod_grp_varn()] returns a `<tibble>` TODO add
+#' - [prep_plot_mod_grp_varn()] returns a grouped `<tibble>` (grouped by
+#' `vaccine`) with all columns from `df2` and `df3` plus derived columns:
+#' `adj_outc` (adjusted outcome with small offset), `outcome_name` (input
+#' outcome), and `mean_outc` (vaccine-level weighted mean outcome).
+#'
+#' - [prep_plot_vax_gavi()] returns a `<tibble>` with columns `disease`,
+#' `year`, `yearly_outcome`, `dataset` (factor with levels for old touchstone,
+#' "Difference", and new touchstone), and `outcome_name`. Summarizes outcomes
+#' by disease and year across two touchstones.
+#'
+#' - [prep_plot_cumul()] returns a `<tibble>` with columns `year`,
+#' `modelling_group`, `touchstone`, `value` (cumulative or average outcome),
+#' `line_type` ("solid" for individual models, "dashed" for model average),
+#' and `outcome_name`. Returns `NULL` if the specified disease has no non-zero
+#' data to plot.
 #'
 #' @export
 prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) {
-  # TODO: df2 and df3 need informative names
+  checkmate::assert_tibble(df2, min.rows = 1L, min.cols = 1L)
+  checkmate::assert_tibble(df3, min.rows = 1L, min.cols = 1L)
+
+  outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES)
 
   offset_manual <- 1e-6
   df_combined <- dplyr::left_join(
@@ -33,7 +78,8 @@ prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) {
 
   df_combined <- dplyr::mutate(
     df_combined,
-    adj_outc = {{ outcome }} + offset_manual
+    adj_outc = .data[[outcome]] + offset_manual,
+    outcome_name = outcome
   )
 
   df_combined <- dplyr::group_by(
@@ -51,17 +97,12 @@ prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) {
 
 #' @name plot_prep_impact_diagnostics
 #'
-#' @param data description
-#'
-#' @param outcome
-#'
-#' @param disease
-#'
-#' @param touchstone_old
-#'
-#' @param touchstone_new
+#' @param data A `<tibble>` of impact estimates with columns including at least
+#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and
+#' potentially other columns for analysis.
 #'
-#' @return [prep_plot_vax_gavi()] returns a ... TODO add
+#' @param prev_data A `<tibble>` of impact estimates from a previous touchstone,
+#' used as a comparison baseline. Should have the same structure as `data`.
 #'
 #' @export
 prep_plot_vax_gavi <- function(
@@ -89,15 +130,19 @@ prep_plot_vax_gavi <- function(
 
       df <- dplyr::filter(
         df,
-        dplyr::between(.data$year, 2021, 2024),
-        Negate(grepl("COVID", .data$disease, ignore.case = TRUE))
+        dplyr::between(.data$year, 2021, 2024)
+      )
+
+      df <- dplyr::filter_out(
+        df,
+        grepl("COVID", .data$disease, ignore.case = TRUE)
       )
 
       df <- dplyr::group_by(df, .data$disease, .data$year)
 
       df <- dplyr::summarise(
         df,
-        yearly_outcome = sum({{ outcome }}, na.rm = TRUE),
+        yearly_outcome = sum(.data[[outcome]], na.rm = TRUE),
         .groups = "drop"
       )
 
@@ -141,20 +186,15 @@ prep_plot_vax_gavi <- function(
     )
   )
 
+  df_combined$outcome_name <- outcome
+
   df_combined
 }
 
 #' @name plot_prep_impact_diagnostics
 #'
-#' @param data description
-#'
-#' @param outcome
-#'
-#' @param disease
-#'
-#' @param touchstone_old
-#'
-#' @param touchstone_new
+#' @param disease A character string specifying a single disease for filtering
+#' and analysis.
 #'
 #' @export
 prep_plot_cumul <- function(
@@ -164,6 +204,12 @@ prep_plot_cumul <- function(
   touchstone_old = DEF_TOUCHSTONE_OLD,
   touchstone_new = DEF_TOUCHSTONE_NEW
 ) {
+  checkmate::assert_tibble(data)
+  checkmate::assert_subset(
+    outcome,
+    IMPACT_OUTCOMES
+  )
+
   outcome_cols <- colnames(data)[stringr::str_detect(
     colnames(data),
     glue::glue("^{outcome}_")
@@ -177,6 +223,7 @@ prep_plot_cumul <- function(
     {{ COLNAMES_KEY_PRESSURE_TEST }},
     {{ outcome_cols }}
   )
+  combined2 <- combined2[combined2$disease == disease, ]
 
   combined2 <- tidyr::pivot_longer(
     combined2,
@@ -193,9 +240,8 @@ prep_plot_cumul <- function(
     ),
     touchstone = dplyr::replace_values(
       .data$touchstone,
-      c("old", "new"),
-      as.character(c(touchstone_old, touchstone_new)),
-      .default = .data$touchstone
+      from = c("old", "new"),
+      to = as.character(c(touchstone_old, touchstone_new))
     ),
     touchstone = factor(
       .data$touchstone,
@@ -204,9 +250,8 @@ prep_plot_cumul <- function(
   )
 
   # Cumulative values by modelling group
-  df_cum <- dplyr::filter(combined2, .data$disease == disease)
   df_cum <- dplyr::group_by(
-    df_cum,
+    combined2,
     .data$modelling_group,
     .data$touchstone
   )
@@ -217,7 +262,7 @@ prep_plot_cumul <- function(
   df_cum <- dplyr::arrange(df_cum, .data$year)
   df_cum <- dplyr::mutate(
     df_cum,
-    first_valid = min(.data$year[!is.na(data$value)]),
+    first_valid = min(.data$year[!is.na(.data$value)]),
     {{ cum_col }} := dplyr::if_else(
       .data$year < .data$first_valid,
       NA_real_,
@@ -237,7 +282,7 @@ prep_plot_cumul <- function(
     df_cum,
     {{ avg_col }} := mean({{ cum_col }}, na.rm = TRUE),
     n_models = sum(!is.na({{ cum_col }})),
-    .groups = c("year", "touchstone")
+    .by = c("year", "touchstone")
   )
   df_avg <- dplyr::filter(
     df_avg,
@@ -280,10 +325,13 @@ prep_plot_cumul <- function(
     )
   )
 
+  # add outcome name
+  df_plot$outcome_name <- outcome
+
   if (nrow(df_plot) == 0 || all(df_plot$value == 0)) {
     message("No non-zero data to plot for ", disease, ". Skipping plot.")
     return(NULL)
   }
 
-  df_plot
+  tibble::as_tibble(df_plot)
 }
diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R
index fbcc3fb..c5c9a1f 100644
--- a/R/fn_pressure_testing.R
+++ b/R/fn_pressure_testing.R
@@ -11,7 +11,8 @@
 #' checked for contents
 #'
 #' @param threshold A six-digit number that is checked as a valid touchstone
-#' identifier (YYYYMM format) using [validate_ts_year()].
+#' identifier (YYYYMM format) using [validate_ts_year()]. Defaults to
+#' [DEF_TOUCHSTONE_NEW] (`"202310"`).
 #'
 #' @keywords impact_diagnostics
 #'
@@ -31,7 +32,7 @@
 #' (`NA` to non-`NA`) removed.
 #'
 #' @export
-filter_recent_ts <- function(df, threshold = 202310) {
+filter_recent_ts <- function(df, threshold = DEF_TOUCHSTONE_NEW) {
   checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L)
   checkmate::assert_names(
     names(df),
@@ -58,7 +59,10 @@ filter_recent_ts <- function(df, threshold = 202310) {
 #' @name pres_test_filter_data
 #'
 #' @export
-filter_excluded_diseases_ts <- function(df, threshold = 202110) {
+filter_excluded_diseases_ts <- function(
+  df,
+  threshold = DEF_TOUCHSTONE_OLD_OLD
+) {
   checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L)
   checkmate::assert_names(
     names(df),
@@ -123,10 +127,9 @@ filter_invalid_trajectories <- function(
 ) {
   checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L)
 
-  # TODO: can we assume prev_data is at least the size of df?
+  # TODO: can we find checks for prev_data size in reln to df? rows? cols?
   checkmate::assert_data_frame(
     prev_data,
-    min.cols = ncol(df),
     min.rows = nrow(df)
   )
 
@@ -226,11 +229,11 @@ generate_diffs <- function(
   # check interest cols in dfs. key cols are check in `add_campaign_id`
   checkmate::assert_names(
     colnames(prev_df),
-    interest_cols
+    must.include = interest_cols
   )
   checkmate::assert_names(
     colnames(curr_df),
-    interest_cols
+    must.include = interest_cols
   )
 
   touchstone <- validate_ts_year(touchstone)
@@ -267,7 +270,7 @@ generate_diffs <- function(
     interest_cols
   )
 
-  changes
+  tibble::as_tibble(changes)
 }
 
 #' Generate IQR for key outcomes
@@ -300,10 +303,9 @@ gen_national_iqr <- function(
   checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE)
 
   # NOTE: restricting value columns to deaths and dalys averted
-  value_cols <- rlang::arg_match(
+  checkmate::assert_subset(
     value_cols,
-    c("deaths_averted", "dalys_averted"),
-    multiple = TRUE
+    c("deaths_averted", "dalys_averted")
   )
 
   checkmate::assert_string(prefix)
@@ -325,6 +327,8 @@ gen_national_iqr <- function(
     ),
     .groups = "drop"
   )
+
+  tibble::as_tibble(df)
 }
 
 #' Flag significant changes in impact estimates
@@ -341,7 +345,7 @@ gen_national_iqr <- function(
 #' [gen_national_iqr()].
 #'
 #' @param variable A string specifying the variable of interest. Must be one of
-#' "deaths_averted" and "dalys_averted", and must be present as a name and
+#' "deaths_averted" or "dalys_averted", and must be present as a name and
 #' element of `changes_list`.
 #'
 #' @inheritParams gen_national_iqr
@@ -370,7 +374,7 @@ flag_large_diffs <- function(
   touchstone_old = DEF_TOUCHSTONE_OLD_OLD,
   touchstone_new = DEF_TOUCHSTONE_NEW
 ) {
-  checkmate::assert_list(changes_list, "data.frame")
+  checkmate::assert_list(changes_list, c("data.frame", "NULL"))
   checkmate::assert_data_frame(iqr_df, min.rows = 1L, min.cols = 1L)
 
   variable <- rlang::arg_match(variable)
@@ -454,7 +458,9 @@ flag_large_diffs <- function(
     rename_lookup
   )
 
-  dplyr::arrange(df_compare, dplyr::desc(diff))
+  df_compare <- dplyr::arrange(df_compare, dplyr::desc(diff))
+
+  tibble::as_tibble(df_compare)
 }
 
 #' Combine and align data from two touchstones
@@ -492,15 +498,13 @@ gen_combined_df <- function(
     min.rows = 1L
   )
 
-  # TODO: df2 needs a better name
-  prev_df <- dplyr::select(prev_dat, {{ interest_cols }})
-  cur_df <- dplyr::select(df2, {{ interest_cols }})
-
-  combined <- dplyr::full_join(
-    prev_df,
-    cur_df,
-    by = key_cols,
-    suffix = c("_old", "_new")
+  checkmate::assert_subset(
+    interest_cols,
+    COLNAMES_INTEREST_PRESSURE_TEST
+  )
+  checkmate::assert_subset(
+    key_cols,
+    COLNAMES_KEY_PRESSURE_TEST
   )
 
   cols_to_select <- c(
@@ -517,10 +521,37 @@ gen_combined_df <- function(
     "dalys_averted_new"
   )
 
-  dplyr::select(
+  checkmate::assert_names(
+    colnames(prev_dat),
+    must.include = c(interest_cols, key_cols)
+  )
+  checkmate::assert_names(
+    colnames(df2),
+    must.include = c(interest_cols, key_cols)
+  )
+
+  # TODO: df2 needs a better name
+  prev_df <- dplyr::select(prev_dat, {{ interest_cols }})
+  cur_df <- dplyr::select(df2, {{ interest_cols }})
+
+  combined <- dplyr::full_join(
+    prev_df,
+    cur_df,
+    by = key_cols,
+    suffix = c("_old", "_new")
+  )
+
+  checkmate::assert_names(
+    colnames(combined),
+    must.include = cols_to_select
+  )
+
+  combined <- dplyr::select(
     combined,
-    {{ cols_to_select }}
+    dplyr::all_of(cols_to_select)
   )
+
+  tibble::as_tibble(combined)
 }
 
 #' Compare sub-regional and national estimates
@@ -572,7 +603,7 @@ compare_natl_subreg <- function(
   subregional_summary <- dplyr::summarise(
     subregional_summary,
     subregional_mean = mean(.data[[outcome]], na.rm = TRUE),
-    subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE),
+    subregional_iqr = stats::IQR(.data[[outcome]], na.rm = TRUE),
     .groups = "drop"
   )
 
@@ -614,7 +645,7 @@ compare_natl_subreg <- function(
   comparison <- dplyr::select(comparison, {{ cols_to_select }})
   comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score))
 
-  comparison
+  tibble::as_tibble(comparison)
 }
 
 #' Save pressure-testing diagnostics to local file
@@ -654,6 +685,8 @@ compare_natl_subreg <- function(
 #' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the
 #' `"campaign"` activity type.
 #'
+#' @param output_dir A writeable directory. Defaults to "./outputs".
+#'
 #' @return None. Called for the convenience side-effect of saving data.frames as
 #' `.Rds` format.
 #'
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 69da54f..e9a14dc 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -19,9 +19,12 @@ VIMC
 WIP
 WPP
 YLLs
+YYYYMM
 autogenerated
+dalys
+erroring
 facetted
 ggplot
+iqr
 tibble
 tibbles
-timeseries
diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd
index 004cffd..035018f 100644
--- a/man/basic_burden_sanity.Rd
+++ b/man/basic_burden_sanity.Rd
@@ -18,4 +18,4 @@ estimates, with the length of the vector depending on how many checks fail.
 Helper function for sanity checks on burden estimate values.
 Checks whether any burden estimates are non-numeric, missing, or negative.
 }
-\keyword{diagnostics}
+\keyword{burden_diagnostics}
diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd
index 5afb1f0..d5fecd9 100644
--- a/man/check_demography_alignment.Rd
+++ b/man/check_demography_alignment.Rd
@@ -27,4 +27,4 @@ modelled population size from the WPP-derived population estimates.
 Check the modelled disease burden data has similar population
 sizes as the provided population data.
 }
-\keyword{diagnostics}
+\keyword{burden_diagnostics}
diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd
index ecf5cea..7f4c60e 100644
--- a/man/compare_natl_subreg.Rd
+++ b/man/compare_natl_subreg.Rd
@@ -27,4 +27,4 @@ impact is considered to be outside the tolerance limit.
 \description{
 Compare sub-regional and national estimates
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/constants.Rd b/man/constants.Rd
index 8faa19a..1d93a1f 100644
--- a/man/constants.Rd
+++ b/man/constants.Rd
@@ -10,6 +10,7 @@
 \alias{colnames_df_missing_cols}
 \alias{COLNAMES_KEY_PRESSURE_TEST}
 \alias{COLNAMES_INTEREST_PRESSURE_TEST}
+\alias{IMPACT_OUTCOMES}
 \alias{EXCLUDED_DISEASES}
 \alias{N_TS_MIN_CHARS}
 \alias{N_TS_YEAR_CHARS}
@@ -20,6 +21,7 @@
 \alias{DEF_TOUCHSTONE_OLD}
 \alias{DEF_TOUCHSTONE_NEW}
 \alias{DEF_TOUCHSTONE_OLD_OLD}
+\alias{COLOUR_VIMC}
 \title{Package constants}
 \format{
 An object of class \code{character} of length 5.
@@ -36,6 +38,8 @@ An object of class \code{character} of length 7.
 
 An object of class \code{character} of length 14.
 
+An object of class \code{character} of length 2.
+
 An object of class \code{character} of length 4.
 
 An object of class \code{integer} of length 1.
@@ -54,6 +58,8 @@ An object of class \code{character} of length 1.
 
 An object of class \code{character} of length 1.
 
+An object of class \code{character} of length 1.
+
 An object of class \code{character} of length 1.
 }
 \usage{
@@ -71,6 +77,8 @@ COLNAMES_KEY_PRESSURE_TEST
 
 COLNAMES_INTEREST_PRESSURE_TEST
 
+IMPACT_OUTCOMES
+
 EXCLUDED_DISEASES
 
 N_TS_MIN_CHARS
@@ -90,9 +98,52 @@ DEF_TOUCHSTONE_OLD
 DEF_TOUCHSTONE_NEW
 
 DEF_TOUCHSTONE_OLD_OLD
+
+COLOUR_VIMC
 }
 \description{
-Package constants
+Constant values used in \emph{vimcheck}. See the \strong{Examples} section for the
+constant values.
+}
+\examples{
+file_dict_colnames
+
+scenario_data_colnames
+
+burden_outcome_names
+
+colnames_plot_demog_compare
+
+colnames_df_missing_cols
+
+COLNAMES_KEY_PRESSURE_TEST
+
+COLNAMES_INTEREST_PRESSURE_TEST
+
+IMPACT_OUTCOMES
+
+EXCLUDED_DISEASES
+
+N_TS_MIN_CHARS
+
+N_TS_YEAR_CHARS
+
+MIN_TS_YEAR
+
+MAX_TS_YEAR
+
+MIN_TS_MONTH
+
+MAX_TS_MONTH
+
+DEF_TOUCHSTONE_OLD
+
+DEF_TOUCHSTONE_NEW
+
+DEF_TOUCHSTONE_OLD_OLD
+
+COLOUR_VIMC
+
 }
 \keyword{constants}
 \keyword{datasets}
diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd
index e0379b1..fcc5039 100644
--- a/man/flag_large_diffs.Rd
+++ b/man/flag_large_diffs.Rd
@@ -22,7 +22,7 @@ interest (see \code{variable}). Usually generated using \code{\link[=generate_di
 \code{\link[=gen_national_iqr]{gen_national_iqr()}}.}
 
 \item{variable}{A string specifying the variable of interest. Must be one of
-"deaths_averted" and "dalys_averted", and must be present as a name and
+"deaths_averted" or "dalys_averted", and must be present as a name and
 element of \code{changes_list}.}
 
 \item{group_cols}{A character vector of grouping columns. Defaults to
@@ -47,4 +47,4 @@ between touchstones is greater than expected. A row is flagged if the
 difference is greater than \code{threshold} \eqn{\times} the inter-quartile range
 for cases where the IQR is greater than zero.
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd
index 9a329d3..b3f53f0 100644
--- a/man/gen_combined_df.Rd
+++ b/man/gen_combined_df.Rd
@@ -31,4 +31,4 @@ are disambiguated with the suffixes \code{"_old"} and \code{"_new"}.
 \description{
 Generates a full join of two data.frames, selecting for columns of interest.
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd
index c722e66..4257953 100644
--- a/man/gen_national_iqr.Rd
+++ b/man/gen_national_iqr.Rd
@@ -31,4 +31,4 @@ using string interpolation.
 \description{
 Generate IQR for key outcomes
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd
index 17fcf32..4e4607e 100644
--- a/man/generate_diffs.Rd
+++ b/man/generate_diffs.Rd
@@ -36,4 +36,4 @@ with one list element per element of \code{interest_cols}.
 \description{
 Explore significant changes in deaths and DALYs
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/plotting.Rd b/man/plot_burden_diagnostics.Rd
similarity index 91%
rename from man/plotting.Rd
rename to man/plot_burden_diagnostics.Rd
index 7f915a3..8431edd 100644
--- a/man/plotting.Rd
+++ b/man/plot_burden_diagnostics.Rd
@@ -1,7 +1,7 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fn_plotting_burden_diagnostics.R
-\name{plotting}
-\alias{plotting}
+\name{plot_burden_diagnostics}
+\alias{plot_burden_diagnostics}
 \alias{plot_compare_demography}
 \alias{plot_age_patterns}
 \alias{plot_global_burden_decades}
@@ -55,8 +55,8 @@ A \verb{<ggplot>} object that can be printed to screen in the plot frame
 or saved to an output device (i.e., saved as an image file).
 }
 \description{
-Plotting functions for burden and impact diagnostics. All functions operate
+Plotting functions for burden diagnostics. All functions operate
 on data prepared for plotting by a corresponding
-\link[=plotting_prep]{plotting-preparation function}.
+\link[=plot_prep_burden_diagnostics]{plotting-preparation function}.
 }
 \keyword{plotting}
diff --git a/man/plot_impact_diagnostics.Rd b/man/plot_impact_diagnostics.Rd
new file mode 100644
index 0000000..c195f97
--- /dev/null
+++ b/man/plot_impact_diagnostics.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_plotting_impact_diagnostics.R
+\name{plot_impact_diagnostics}
+\alias{plot_impact_diagnostics}
+\alias{plot_sig_diff}
+\alias{plot_diff}
+\alias{plot_modelling_group_variation}
+\alias{plot_vaccine_gavi}
+\alias{plot_cumul}
+\title{Create impact diagnostics plots}
+\usage{
+plot_sig_diff(data, outcome = IMPACT_OUTCOMES)
+
+plot_diff(
+  data,
+  outcome = IMPACT_OUTCOMES,
+  group_vars = IMPACT_GROUP_VARS,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
+)
+
+plot_modelling_group_variation(data)
+
+plot_vaccine_gavi(data)
+
+plot_cumul(data)
+}
+\arguments{
+\item{data}{A data.frame suitable for plotting.
+\itemize{
+\item \code{plot_sig_diff()}: Output of
+\code{\link[=plot_prep_impact_diagnostics]{flag_large_diff()}}.
+\item \code{plot_diff()}: Output of
+\code{\link[=plot_prep_impact_diagnostics]{gen_combined_df()}}.
+\item \code{plot_modelling_group_variation()}: Output of
+\code{\link[=plot_prep_impact_diagnostics]{plot_prep_mod_grp_varn()}}.
+\item \code{plot_vaccine_gavi()}: Output of
+\code{\link[=plot_prep_impact_diagnostics]{plot_prep_vax_gavi()}}
+\item \code{plot_cumul()}: Output of
+\code{\link[=plot_prep_impact_diagnostics]{plot_prep_cumul()}}
+}}
+
+\item{outcome}{A string for the impact outcome. One of \link{IMPACT_OUTCOMES}.}
+
+\item{group_vars}{A single string for the grouping variables. May be any of
+\link{IMPACT_OUTCOMES}, which are \code{"activity_type"} and \code{"vaccine"}.}
+
+\item{touchstone_old}{A string for the previous touchstone in
+format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_OLD}.}
+
+\item{touchstone_new}{A string for the current or new touchstone in
+format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_NEW}.}
+}
+\value{
+A \verb{<ggplot2>} object that can be viewed or saved.
+}
+\description{
+Functions that create impact diagnostics plots (or plotting objects). All
+functions are associated with one other upstream data processing function,
+and can be used in a pipe with that function. Where appropriate, outcome
+selection and label preparation is automated to reduce function arguments.
+
+Plotting functions for impact diagnostics. See
+\link[=plot_prep_impact_diagnostics]{plotting-preparation functions} for a set of
+helper functions that prepare impact diagnostics for plotting.
+}
diff --git a/man/plotting_prep.Rd b/man/plot_prep_burden_diagnostics.Rd
similarity index 87%
rename from man/plotting_prep.Rd
rename to man/plot_prep_burden_diagnostics.Rd
index f8124bc..3afa707 100644
--- a/man/plotting_prep.Rd
+++ b/man/plot_prep_burden_diagnostics.Rd
@@ -1,7 +1,7 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/fn_plotting_prep_bur_diag.R
-\name{plotting_prep}
-\alias{plotting_prep}
+\name{plot_prep_burden_diagnostics}
+\alias{plot_prep_burden_diagnostics}
 \alias{prep_plot_demography}
 \alias{prep_plot_age}
 \alias{prep_plot_burden_decades}
@@ -53,6 +53,8 @@ column "burden_outcome", and a list column of tibbles "burden_data".
 }
 \description{
 Transform burden estimate data from modelling groups to make them suitable
-for plotting using an appropriate \link[=plotting]{plotting function}. Each
-preparation function corresponds to a plotting function.
+for plotting using an appropriate
+\link[=plot_prep_burden_diagnostics]{plotting function}. Each preparation function
+corresponds to a plotting function.
 }
+\keyword{plot_prep_burden_diagnostics}
diff --git a/man/plot_prep_impact_diagnostics.Rd b/man/plot_prep_impact_diagnostics.Rd
new file mode 100644
index 0000000..7afc4f1
--- /dev/null
+++ b/man/plot_prep_impact_diagnostics.Rd
@@ -0,0 +1,82 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fn_plotting_prep_impact_diagnostics.R
+\name{plot_prep_impact_diagnostics}
+\alias{plot_prep_impact_diagnostics}
+\alias{prep_plot_mod_grp_varn}
+\alias{prep_plot_vax_gavi}
+\alias{prep_plot_cumul}
+\title{Prepare impact diagnostics for plotting}
+\usage{
+prep_plot_mod_grp_varn(df2, df3, outcome = IMPACT_OUTCOMES)
+
+prep_plot_vax_gavi(
+  data,
+  prev_data,
+  outcome = IMPACT_OUTCOMES,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
+)
+
+prep_plot_cumul(
+  data,
+  outcome,
+  disease,
+  touchstone_old = DEF_TOUCHSTONE_OLD,
+  touchstone_new = DEF_TOUCHSTONE_NEW
+)
+}
+\arguments{
+\item{df2}{A \verb{<tibble>} of impact estimates with at least columns
+\code{modelling_group}, \code{vaccine}, outcome variable, and \code{fvps} (doses
+delivered). Used as the primary data source for calculations in
+\code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.}
+
+\item{df3}{A \verb{<tibble>} of modelling group and vaccine combinations,
+typically with one row per modelling group per vaccine. Joined with \code{df2}
+to ensure complete group coverage in \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.}
+
+\item{outcome}{A character string for the impact outcome. Must be one of
+\code{"deaths_averted"} or \code{"dalys_averted"}. For \code{\link[=prep_plot_cumul]{prep_plot_cumul()}},
+\code{data} must include columns named \verb{\{outcome\}_old} and \verb{\{outcome\}_new}.}
+
+\item{data}{A \verb{<tibble>} of impact estimates with columns including at least
+those in \link{COLNAMES_KEY_PRESSURE_TEST}, the outcome variable, and
+potentially other columns for analysis.}
+
+\item{prev_data}{A \verb{<tibble>} of impact estimates from a previous touchstone,
+used as a comparison baseline. Should have the same structure as \code{data}.}
+
+\item{touchstone_old}{A six-character touchstone identifier (YYYYMM format)
+for the previous dataset. Defaults to \link{DEF_TOUCHSTONE_OLD}. Used in
+\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.}
+
+\item{touchstone_new}{A six-character touchstone identifier (YYYYMM format)
+for the current dataset. Defaults to \link{DEF_TOUCHSTONE_NEW}. Used in
+\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.}
+
+\item{disease}{A character string specifying a single disease for filtering
+and analysis.}
+}
+\value{
+\itemize{
+\item \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}} returns a grouped \verb{<tibble>} (grouped by
+\code{vaccine}) with all columns from \code{df2} and \code{df3} plus derived columns:
+\code{adj_outc} (adjusted outcome with small offset), \code{outcome_name} (input
+outcome), and \code{mean_outc} (vaccine-level weighted mean outcome).
+\item \code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} returns a \verb{<tibble>} with columns \code{disease},
+\code{year}, \code{yearly_outcome}, \code{dataset} (factor with levels for old touchstone,
+"Difference", and new touchstone), and \code{outcome_name}. Summarizes outcomes
+by disease and year across two touchstones.
+\item \code{\link[=prep_plot_cumul]{prep_plot_cumul()}} returns a \verb{<tibble>} with columns \code{year},
+\code{modelling_group}, \code{touchstone}, \code{value} (cumulative or average outcome),
+\code{line_type} ("solid" for individual models, "dashed" for model average),
+and \code{outcome_name}. Returns \code{NULL} if the specified disease has no non-zero
+data to plot.
+}
+}
+\description{
+A suite of helper functions that sit between impact diagnostics functions and
+plotting functions. These functions transform and aggregate impact estimates
+to prepare them for visualisation. Functions have basic checks on input data
+but otherwise assume users will not modify inputs.
+}
diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd
index fe46d78..0e95add 100644
--- a/man/plotting_theme.Rd
+++ b/man/plotting_theme.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_plotting_burden_diagnostics.R
+% Please edit documentation in R/fn_plotting_helpers.R
 \name{plotting_theme}
 \alias{plotting_theme}
 \alias{theme_vimc}
diff --git a/man/pres_test_filter_data.Rd b/man/pres_test_filter_data.Rd
index 003617a..27b1d43 100644
--- a/man/pres_test_filter_data.Rd
+++ b/man/pres_test_filter_data.Rd
@@ -8,9 +8,9 @@
 \alias{filter_invalid_trajectories}
 \title{Filter data for touchstones or diseases}
 \usage{
-filter_recent_ts(df, threshold = 202310)
+filter_recent_ts(df, threshold = DEF_TOUCHSTONE_NEW)
 
-filter_excluded_diseases_ts(df, threshold = 202110)
+filter_excluded_diseases_ts(df, threshold = DEF_TOUCHSTONE_OLD_OLD)
 
 filter_duplicates(df, key_cols = COLNAMES_KEY_PRESSURE_TEST)
 
@@ -25,7 +25,8 @@ filter_invalid_trajectories(
 checked for contents}
 
 \item{threshold}{A six-digit number that is checked as a valid touchstone
-identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}.}
+identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}. Defaults to
+\link{DEF_TOUCHSTONE_NEW} (\code{"202310"}).}
 
 \item{key_cols}{Key columns in \code{df} to check for duplicates.}
 
@@ -53,4 +54,4 @@ to the \link{EXCLUDED_DISEASES}, when the touchstone year in \code{df} is less t
 A pair of helper functions allowing filtering out of recent touchstone values
 and excluded diseases.
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd
index db41a66..27e513d 100644
--- a/man/save_outputs.Rd
+++ b/man/save_outputs.Rd
@@ -47,6 +47,8 @@ with the outcome \code{"dalys_averted"}.}
 \item{subregional_flags_dalys_rout}{A data.frame that is the output of
 \code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the
 \code{"campaign"} activity type.}
+
+\item{output_dir}{A writeable directory. Defaults to "./outputs".}
 }
 \value{
 None. Called for the convenience side-effect of saving data.frames as
@@ -57,4 +59,4 @@ Save pressure-testing diagnostics data.frames to local compressed files in
 the \code{.Rds} format. Input data.frames are generated by other package functions
 and are not checked here.
 }
-\keyword{pressure_testing}
+\keyword{impact_diagnostics}
diff --git a/man/significant_diff_plot.Rd b/man/significant_diff_plot.Rd
deleted file mode 100644
index 11b7cb4..0000000
--- a/man/significant_diff_plot.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_plotting_pressure_testing.R
-\name{significant_diff_plot}
-\alias{significant_diff_plot}
-\title{Plot significant changes}
-\usage{
-significant_diff_plot(df, outcome)
-}
-\description{
-Plot significant changes
-}
-\keyword{internal}
diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd
index 75d3cd9..dcdfb7b 100644
--- a/man/validate_complete_incoming_files.Rd
+++ b/man/validate_complete_incoming_files.Rd
@@ -20,4 +20,4 @@ This function expects that incoming burden files are in the
 directory given by \code{path_burden}, which holds a file dictionary which maps
 each data file to a specific scenario.
 }
-\keyword{diagnostics}
+\keyword{burden_diagnostics}
diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd
index af2d770..e84f3f7 100644
--- a/man/validate_file_dict_template.Rd
+++ b/man/validate_file_dict_template.Rd
@@ -25,4 +25,4 @@ scenarios i.e. the number of files that we expect from a model.
 Users should populate the file column to match the scenario-file.
 This function will run if a \code{file_dictionary.csv} file does not exist
 }
-\keyword{diagnostics}
+\keyword{burden_diagnostics}
diff --git a/man/validate_template_alignment.Rd b/man/validate_template_alignment.Rd
index 7bbccc6..c6c9e36 100644
--- a/man/validate_template_alignment.Rd
+++ b/man/validate_template_alignment.Rd
@@ -19,4 +19,4 @@ against \code{template}, with information on missing and extra data.
 \description{
 Identify extra and missing columns and rows in burden data.
 }
-\keyword{diagnostics}
+\keyword{burden_diagnostics}

From 386be9b6a1aa618626cf0f0a1eee5adc76a6ddb8 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Wed, 22 Apr 2026 15:58:57 +0100
Subject: [PATCH 16/17] Update infra, bump to v0.0.4

---
 DESCRIPTION   |  6 +++++-
 NAMESPACE     | 31 ++++++++++++++++++++++++++++++-
 NEWS.md       |  8 ++++++++
 inst/WORDLIST |  1 +
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 93969a5..d96f7e7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: vimcheck
 Title: Diagnostics for Vaccine Impact Modelling Consortium Burden and
     Impact Estimates
-Version: 0.0.3
+Version: 0.0.4
 Authors@R: c(
     person("Pratik", "Gupte", , "p.gupte24@imperial.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-5294-7819")),
@@ -24,13 +24,17 @@ Depends:
 Imports: 
     checkmate,
     cli,
+    diffdf,
     dplyr,
     forcats,
     ggplot2,
+    ggridges,
     glue,
+    here,
     readr,
     rlang,
     scales,
+    stats,
     stringr,
     tidyr
 Suggests: 
diff --git a/NAMESPACE b/NAMESPACE
index 6a9f7c8..7caa5f0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,24 @@
 # Generated by roxygen2: do not edit by hand
 
+export(COLNAMES_INTEREST_PRESSURE_TEST)
+export(COLNAMES_KEY_PRESSURE_TEST)
+export(COLOUR_VIMC)
+export(DEF_TOUCHSTONE_NEW)
+export(DEF_TOUCHSTONE_OLD)
+export(DEF_TOUCHSTONE_OLD_OLD)
+export(EXCLUDED_DISEASES)
+export(IMPACT_OUTCOMES)
+export(MAX_TS_MONTH)
+export(MAX_TS_YEAR)
+export(MIN_TS_MONTH)
+export(MIN_TS_YEAR)
+export(N_TS_MIN_CHARS)
+export(N_TS_YEAR_CHARS)
 export(basic_burden_sanity)
+export(burden_outcome_names)
 export(check_demography_alignment)
+export(colnames_df_missing_cols)
+export(colnames_plot_demog_compare)
 export(compare_natl_subreg)
 export(file_dict_colnames)
 export(filter_duplicates)
@@ -15,17 +32,25 @@ export(generate_diffs)
 export(plot_age_patterns)
 export(plot_compare_demography)
 export(plot_coverage_set)
+export(plot_cumul)
+export(plot_diff)
 export(plot_fvp)
 export(plot_global_burden)
 export(plot_global_burden_decades)
+export(plot_modelling_group_variation)
+export(plot_sig_diff)
+export(plot_vaccine_gavi)
 export(prep_plot_age)
 export(prep_plot_burden_decades)
 export(prep_plot_coverage_set)
+export(prep_plot_cumul)
 export(prep_plot_demography)
 export(prep_plot_fvp)
 export(prep_plot_global_burden)
+export(prep_plot_mod_grp_varn)
+export(prep_plot_vax_gavi)
 export(save_outputs)
-export(significant_diff_plot)
+export(scenario_data_colnames)
 export(theme_vimc)
 export(theme_vimc_noxaxis)
 export(validate_complete_incoming_files)
@@ -37,6 +62,8 @@ importFrom(ggplot2,facet_grid)
 importFrom(ggplot2,facet_wrap)
 importFrom(ggplot2,geom_col)
 importFrom(ggplot2,geom_hline)
+importFrom(ggplot2,geom_point)
+importFrom(ggplot2,geom_segment)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,label_wrap_gen)
 importFrom(ggplot2,labeller)
@@ -44,5 +71,7 @@ importFrom(ggplot2,labs)
 importFrom(ggplot2,scale_fill_distiller)
 importFrom(ggplot2,scale_x_continuous)
 importFrom(ggplot2,scale_y_continuous)
+importFrom(ggplot2,theme)
 importFrom(ggplot2,vars)
+importFrom(rlang,":=")
 importFrom(rlang,.data)
diff --git a/NEWS.md b/NEWS.md
index 81b8136..b7f6fc8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# vimcheck 0.0.4
+
+- Added impact diagnostics functions in `R/fn_impact_diagnostics.R`.
+
+- Added plotting preparation functions and plotting functions in `R/fn_plotting_prep_impact_diagnostics.R` and `R/fn_plotting_impact_diagnostics.R`.
+
+- Added dependencies _diffdf_ and _here_.
+
 # vimcheck 0.0.3
 
 - Separated data-prep for plotting from plotting functions.
diff --git a/inst/WORDLIST b/inst/WORDLIST
index e9a14dc..bc3ac78 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -22,6 +22,7 @@ YLLs
 YYYYMM
 autogenerated
 dalys
+diffdf
 erroring
 facetted
 ggplot

From 43753fb05464dd0b45026651af99ba470f89e237 Mon Sep 17 00:00:00 2001
From: Pratik Gupte <pratikgupte16@gmail.com>
Date: Wed, 22 Apr 2026 16:10:24 +0100
Subject: [PATCH 17/17] Rename fn file, fix arg name

---
 ...sure_testing.R => fn_impact_diagnostics.R} | 21 +++++++++----------
 man/compare_natl_subreg.Rd                    |  2 +-
 ...t_filter_data.Rd => filter_impact_data.Rd} |  6 +++---
 man/flag_large_diffs.Rd                       |  2 +-
 man/gen_combined_df.Rd                        |  8 +++----
 man/gen_national_iqr.Rd                       |  2 +-
 man/generate_diffs.Rd                         |  2 +-
 man/save_outputs.Rd                           |  2 +-
 8 files changed, 22 insertions(+), 23 deletions(-)
 rename R/{fn_pressure_testing.R => fn_impact_diagnostics.R} (98%)
 rename man/{pres_test_filter_data.Rd => filter_impact_data.Rd} (94%)

diff --git a/R/fn_pressure_testing.R b/R/fn_impact_diagnostics.R
similarity index 98%
rename from R/fn_pressure_testing.R
rename to R/fn_impact_diagnostics.R
index c5c9a1f..a79a22d 100644
--- a/R/fn_pressure_testing.R
+++ b/R/fn_impact_diagnostics.R
@@ -1,7 +1,7 @@
 #' Filter data for touchstones or diseases
 #'
-#' @name pres_test_filter_data
-#' @rdname pres_test_filter_data
+#' @name filter_impact_data
+#' @rdname filter_impact_data
 #'
 #' @description
 #' A pair of helper functions allowing filtering out of recent touchstone values
@@ -56,7 +56,7 @@ filter_recent_ts <- function(df, threshold = DEF_TOUCHSTONE_NEW) {
   }
 }
 
-#' @name pres_test_filter_data
+#' @name filter_impact_data
 #'
 #' @export
 filter_excluded_diseases_ts <- function(
@@ -81,7 +81,7 @@ filter_excluded_diseases_ts <- function(
   }
 }
 
-#' @name pres_test_filter_data
+#' @name filter_impact_data
 #'
 #' @param key_cols Key columns in `df` to check for duplicates.
 #'
@@ -111,7 +111,7 @@ filter_duplicates <- function(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) {
   dplyr::filter(df, .data$n_key > 1)
 }
 
-#' @name pres_test_filter_data
+#' @name filter_impact_data
 #'
 #' @param prev_data A `<data.frame>` holding data from a previous touchstone for
 #' the same scenarios as `df`.
@@ -471,7 +471,7 @@ flag_large_diffs <- function(
 #' @param prev_dat A data.frame of impact estimates corresponding to an earlier
 #' touchstone.
 #'
-#' @param df2 A data.frame of impact estimates corresponding to a more recent
+#' @param df_clean A data.frame of impact estimates corresponding to a more recent
 #' touchstone.
 #'
 #' @param interest_cols A character vector of columns of interest. Defaults to
@@ -480,7 +480,7 @@ flag_large_diffs <- function(
 #' @param key_cols A character vector of columns of interest. Defaults to
 #' [COLNAMES_KEY_PRESSURE_TEST].
 #'
-#' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns
+#' @return A data.frame which is a full join of `prev_dat` and `df_clean`. Columns
 #' are disambiguated with the suffixes `"_old"` and `"_new"`.
 #'
 #' @keywords impact_diagnostics
@@ -488,7 +488,7 @@ flag_large_diffs <- function(
 #' @export
 gen_combined_df <- function(
   prev_dat,
-  df2,
+  df_clean,
   interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
   key_cols = COLNAMES_KEY_PRESSURE_TEST
 ) {
@@ -526,13 +526,12 @@ gen_combined_df <- function(
     must.include = c(interest_cols, key_cols)
   )
   checkmate::assert_names(
-    colnames(df2),
+    colnames(df_clean),
     must.include = c(interest_cols, key_cols)
   )
 
-  # TODO: df2 needs a better name
   prev_df <- dplyr::select(prev_dat, {{ interest_cols }})
-  cur_df <- dplyr::select(df2, {{ interest_cols }})
+  cur_df <- dplyr::select(df_clean, {{ interest_cols }})
 
   combined <- dplyr::full_join(
     prev_df,
diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd
index 7f4c60e..395d116 100644
--- a/man/compare_natl_subreg.Rd
+++ b/man/compare_natl_subreg.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{compare_natl_subreg}
 \alias{compare_natl_subreg}
 \title{Compare sub-regional and national estimates}
diff --git a/man/pres_test_filter_data.Rd b/man/filter_impact_data.Rd
similarity index 94%
rename from man/pres_test_filter_data.Rd
rename to man/filter_impact_data.Rd
index 27b1d43..da36793 100644
--- a/man/pres_test_filter_data.Rd
+++ b/man/filter_impact_data.Rd
@@ -1,7 +1,7 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
-\name{pres_test_filter_data}
-\alias{pres_test_filter_data}
+% Please edit documentation in R/fn_impact_diagnostics.R
+\name{filter_impact_data}
+\alias{filter_impact_data}
 \alias{filter_recent_ts}
 \alias{filter_excluded_diseases_ts}
 \alias{filter_duplicates}
diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd
index fcc5039..f54488d 100644
--- a/man/flag_large_diffs.Rd
+++ b/man/flag_large_diffs.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{flag_large_diffs}
 \alias{flag_large_diffs}
 \title{Flag significant changes in impact estimates}
diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd
index b3f53f0..13dd05d 100644
--- a/man/gen_combined_df.Rd
+++ b/man/gen_combined_df.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{gen_combined_df}
 \alias{gen_combined_df}
 \title{Combine and align data from two touchstones}
 \usage{
 gen_combined_df(
   prev_dat,
-  df2,
+  df_clean,
   interest_cols = COLNAMES_INTEREST_PRESSURE_TEST,
   key_cols = COLNAMES_KEY_PRESSURE_TEST
 )
@@ -15,7 +15,7 @@ gen_combined_df(
 \item{prev_dat}{A data.frame of impact estimates corresponding to an earlier
 touchstone.}
 
-\item{df2}{A data.frame of impact estimates corresponding to a more recent
+\item{df_clean}{A data.frame of impact estimates corresponding to a more recent
 touchstone.}
 
 \item{interest_cols}{A character vector of columns of interest. Defaults to
@@ -25,7 +25,7 @@ touchstone.}
 \link{COLNAMES_KEY_PRESSURE_TEST}.}
 }
 \value{
-A data.frame which is a full join of \code{prev_dat} and \code{df2}. Columns
+A data.frame which is a full join of \code{prev_dat} and \code{df_clean}. Columns
 are disambiguated with the suffixes \code{"_old"} and \code{"_new"}.
 }
 \description{
diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd
index 4257953..853a8b0 100644
--- a/man/gen_national_iqr.Rd
+++ b/man/gen_national_iqr.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{gen_national_iqr}
 \alias{gen_national_iqr}
 \title{Generate IQR for key outcomes}
diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd
index 4e4607e..03a8500 100644
--- a/man/generate_diffs.Rd
+++ b/man/generate_diffs.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{generate_diffs}
 \alias{generate_diffs}
 \title{Explore significant changes in deaths and DALYs}
diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd
index 27e513d..0b2afbe 100644
--- a/man/save_outputs.Rd
+++ b/man/save_outputs.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fn_pressure_testing.R
+% Please edit documentation in R/fn_impact_diagnostics.R
 \name{save_outputs}
 \alias{save_outputs}
 \title{Save pressure-testing diagnostics to local file}