From c8b4bc15aaefd0a1225341068fc85079df595e26 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Thu, 12 Mar 2026 13:46:00 +0000 Subject: [PATCH 01/17] Add initial fns from pressure testing report --- R/helpers.R | 25 ++ R/pressure_testing.R | 580 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 605 insertions(+) create mode 100644 R/pressure_testing.R diff --git a/R/helpers.R b/R/helpers.R index 7557d02..4070345 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -35,3 +35,28 @@ make_novax_scenario <- function(disease) { names_from = "variable" ) } + +adaptive_round <- function( + x, + large_threshold = 1, + small_sigfig = 2, + large_digits = 1 +) { + ifelse( + abs(x) >= large_threshold, + round(x, large_digits), + signif(x, small_sigfig) + ) +} + +round_numeric <- function(df) { + df %>% + mutate(across( + where(is.numeric) & !matches("year", ignore.case = TRUE), + ~ adaptive_round(.x) + )) +} + +str_as_ts_year <- function(x) { + as.numeric(substr(x, 1, 6)) +} diff --git a/R/pressure_testing.R b/R/pressure_testing.R new file mode 100644 index 0000000..d4a15c6 --- /dev/null +++ b/R/pressure_testing.R @@ -0,0 +1,580 @@ +### All functions for pressure testing + +# Flexible rounding + +# Fix for scenario_type variable being included from 202310 onwards +filter_recent_ts <- function(df, threshold = 202310) { + touchstone_year <- unique(df$touchstone) + + # TODO: check that touchstone year is 6 digit - can there be more digits? + ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R + + if (ts_number >= threshold) { + df <- dplyr::filter( + df, + scenario_type == "default" + ) + } + + df +} + +# Helper for removing excluded diseases post-202110 +filter_excluded_diseases_ts <- function(df, threshold = 202110) { + exclude_dis <- c("Hib", "PCV", "Rota", "JE") + + touchstone_year <- unique(df$touchstone) + ts_number <- as.numeric(substr(touchstone_year, 1, 6)) + + if (ts_number <= threshold) { + df %>% filter(!disease %in% exclude_dis) + } else { + df + } +} + +# Identify duplicates +flag_duplicates <- function(df, key_cols) { + df %>% + add_count(across(all_of(key_cols)), name = "n_key") %>% + filter(n_key > 1) +} + +# Identify rows where deaths_averted went from non-NA to NA +comparison_prev <- function(df, prev_dat, outcome) { + prev_df <- prev_dat %>% + select(all_of(key_cols), all_of(outcome)) %>% + rename(outcome_prev = !!sym(outcome)) + + current_df <- df %>% + select(all_of(key_cols), all_of(outcome)) %>% + rename(outcome_cur = !!sym(outcome)) + + result <- prev_df %>% + inner_join(current_df, by = key_cols) %>% + filter(!is.na(outcome_prev) & is.na(outcome_cur)) + + return(result) +} + +# Explore significant changes in key outcomes (i.e. deaths/dalys) +generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) { + #fix for erroneous duplicated YF data in 201910 dataset + prev_df <- prev_df %>% + { + if (identical(pars$touchstone_old, "201910")) { + filter(., !(disease == "YF" & support_type == "other" & coverage == 0)) + } else { + . + } + } + + # Fix for multiple campaigns per year (i.e. not true duplicates) - only applicable for 2019 true non-duplicates. + add_campaign_id <- function(df, key_cols) { + df %>% + group_by(across(all_of(key_cols))) %>% + mutate(campaign_id = row_number()) %>% + ungroup() + } + + prev_df <- add_campaign_id(prev_df, key_cols) + curr_df <- add_campaign_id(curr_df, key_cols) + + diff_keys <- c(key_cols, "campaign_id") + cols_needed <- unique(c(diff_keys, interest_cols)) + + diff <- diffdf::diffdf( + prev_df[, cols_needed], + curr_df[, cols_needed], + keys = diff_keys + ) + + changes <- setNames( + lapply(interest_cols, function(v) { + nm <- paste0("VarDiff_", v) + if (nm %in% names(diff)) diff[[nm]] else NULL + }), + interest_cols + ) + + return(changes) +} + +# Generate IQR for key outcomes - for threshold of "significant" +gen_national_iqr <- function( + df, + group_cols, + value_cols, + prefix = "national_iqr_" +) { + df %>% + group_by(across(all_of(group_cols))) %>% + summarise( + across( + all_of(value_cols), + \(x) IQR(x, na.rm = TRUE), + .names = "{prefix}{.col}" + ), + .groups = "drop" + ) +} + +## Flag significant changes +flag_large_diffs <- function( + changes_list, + iqr_df, + variable, + group_cols, + threshold +) { + iqr_col <- paste0("national_iqr_", variable) + + changes_list[[variable]] %>% + mutate(diff = COMPARE - BASE) %>% + left_join( + iqr_df %>% select(all_of(group_cols), all_of(iqr_col)), + by = group_cols + ) %>% + mutate( + flag = abs(diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0 + ) %>% + filter(flag) %>% + select( + country, + country_name, + year, + vaccine, + modelling_group, + activity_type, + BASE, + COMPARE, + diff + ) %>% + rename(!!as.character(old) := BASE, !!as.character(new) := COMPARE) %>% + arrange(desc(diff)) +} + +## Plot significant changes +significant_diff_plot <- function(df, outcome) { + df$label <- paste( + df$country_name, + df$vaccine, + df$activity_type, + df$year, + sep = " | " + ) + + ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) + + geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) + + geom_point(size = 2) + + labs( + x = "Difference", + y = "", + title = glue( + "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year" + ) + ) + + theme_minimal() +} + +### Generate combined df +gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { + prev_df <- prev_dat[, interest_cols] + cur_df <- df2[, interest_cols] + + combined <- full_join( + prev_df, + cur_df, + by = key_cols, + suffix = c("_old", "_new") + ) + + combined <- combined %>% + select( + country, + country_name, + disease, + vaccine, + activity_type, + year, + modelling_group, + deaths_averted_old, + deaths_averted_new, + dalys_averted_old, + dalys_averted_new + ) + return(combined) +} + +plot_diff <- function( + combined, + variable, + group_vars = c("activity_type", "vaccine") +) { + x_var <- paste0(variable, "_new") + y_var <- paste0(variable, "_old") + x_sym <- rlang::sym(x_var) + y_sym <- rlang::sym(y_var) + + combined <- combined %>% + filter(!is.na(!!x_sym) & !is.na(!!y_sym)) + + n_facets <- combined %>% + distinct(activity_type, vaccine) %>% + nrow() + + ncol_dynamic <- case_when( + n_facets <= 4 ~ 2, + n_facets <= 9 ~ 3, + n_facets <= 16 ~ 4, + n_facets <= 25 ~ 6, + TRUE ~ 8 + ) + + p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) + + geom_point(alpha = 0.5, colour = "#008080") + + geom_abline(slope = 1, intercept = 0, linetype = "dashed") + + facet_wrap( + ~ activity_type + vaccine, + scales = "free", + ncol = ncol_dynamic + ) + + scale_x_log10() + + scale_y_log10() + + theme_bw() + + theme( + strip.text = element_text(size = 7), + panel.spacing = unit(0.05, "lines"), + axis.text = element_text(size = 6.5) + ) + + labs( + title = glue("{variable}: Current vs Previous Report"), + x = glue("{new} - {variable}"), + y = glue("{old} - {variable}") + ) + + return(p) +} + +### Subregional v national estimate comparison +compare_national_to_subregional <- function( + df, + outcome, + activity_filter, + threshold +) { + df <- df %>% + filter(activity_type == activity_filter) %>% + select(all_of(key_cols), subregion, !!outcome) + + results <- purrr::map_dfr(outcome, function(outcome) { + subregional_summary <- df %>% + group_by(subregion, disease, activity_type) %>% + summarise( + subregional_mean = mean(.data[[outcome]], na.rm = TRUE), + subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) + + national_summary <- df %>% + select(all_of(key_cols), subregion, !!outcome) %>% + rename(national_value = !!outcome) + + comparison <- national_summary %>% + left_join(subregional_summary, by = c("subregion", "disease")) %>% + mutate( + outcome = outcome, + difference = national_value - subregional_mean, + iqr_score = abs(difference) / subregional_iqr + ) + + dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE) + + comparison <- comparison %>% + mutate( + flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0 + ) %>% + filter(flag_iqr) %>% + select( + country_name, + vaccine, + year, + modelling_group, + national_value, + subregional_mean, + subregional_iqr, + difference, + iqr_score + ) %>% + arrange(desc(iqr_score)) + + comparison + }) + + return(results) +} + +### Modelling group variations +plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { + offset <- 1e-6 + + df2 %>% + left_join(df3, by = join_by(modelling_group, vaccine)) %>% + mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% + group_by(vaccine) %>% + mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>% + ggplot() + + aes( + fill = as.character(mod_num), + x = adj_outc, + y = reorder(vaccine, mean_outc) + ) + + geom_density_ridges( + alpha = 0.5, + stat = "binline", + bins = 200, + draw_baseline = FALSE + ) + + facet_grid(. ~ activity_type, scales = "fixed") + + theme_bw() + + theme( + legend.position = "none", + axis.text.x = element_text(angle = 90, hjust = 1) + ) + + scale_x_log10( + breaks = scales::trans_breaks("log10", function(x) 10^x), + labels = scales::trans_format("log10", math_format(10^.x)) + ) + + scale_fill_viridis_d() + + labs( + x = paste0( + "Burden averted (", + ifelse(outc == "dalys", "DALYs", outc), + ")" + ), + y = "Vaccine" + ) +} + + +# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended) +plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") { + df_cur <- df %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(new)) + + df_prev <- prev_dat %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(old)) + + df_combined <- bind_rows(df_cur, df_prev) + + df_diff <- df_cur %>% + left_join( + df_prev, + by = c("disease", "year"), + suffix = c("_curr", "_prev") + ) %>% + mutate( + yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, + dataset = "Difference" + ) %>% + select(disease, year, yearly_outcome, dataset) + + df_combined <- bind_rows(df_combined, df_diff) + + df_combined$dataset <- factor( + df_combined$dataset, + levels = c(as.character(old), "Difference", as.character(new)) + ) + + ggplot( + df_combined, + aes( + x = reorder(disease, yearly_outcome), + y = yearly_outcome, + fill = factor(year) + ) + ) + + geom_col(position = "dodge") + + scale_fill_manual( + values = c( + "2021" = "#008080", + "2022" = "#E68424", + "2023" = "#9573B5", + "2024" = "#A1D15C" + ) + ) + + facet_wrap(~dataset, scales = "free_y") + + scale_y_continuous(labels = scales::scientific) + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") +} + +### Gavi Cumulative Plot (modelling group + average) +plot_cumul <- function(df, outcome, disease_filter) { + outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] + + outcome_sym <- sym(outcome) + cum_col <- paste0("cum_", outcome) + avg_col <- paste0("avg_", outcome) + + col_old <- paste0(outcome, "_old") + col_new <- paste0(outcome, "_new") + + combined2 <- df %>% + select( + country, + country_name, + disease, + vaccine, + activity_type, + year, + modelling_group, + all_of(outcome_cols) + ) %>% + pivot_longer( + cols = all_of(outcome_cols), + names_to = "touchstone", + values_to = "value" + ) %>% + mutate( + touchstone = str_remove(touchstone, paste0("^", outcome, "_")), + touchstone = recode( + touchstone, + "old" = as.character(old), + "new" = as.character(new), + .default = touchstone + ), + touchstone = factor( + touchstone, + levels = c(as.character(old), as.character(new)) + ) + ) + # Cumulative values by modelling group + df_cum <- combined2 %>% + filter(disease == disease_filter) %>% + group_by(modelling_group, touchstone) %>% + complete(year = full_seq(year, 1)) %>% + arrange(year) %>% + mutate( + first_valid = min(year[!is.na(value)]), + !!cum_col := ifelse( + year < first_valid, + NA, + cumsum(replace_na(value, 0)) + ) + ) %>% + select(-first_valid) %>% + ungroup() %>% + mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) + + # Model average + df_avg <- df_cum %>% + group_by(year, touchstone) %>% + summarise( + !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), + n_models = sum(!is.na(!!sym(cum_col))), + .groups = "drop" + ) %>% + filter(n_models >= 1) %>% + mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) + + # Combine for plot + df_plot <- bind_rows( + df_cum %>% + select(year, modelling_group, touchstone, value = !!sym(cum_col)), + df_avg %>% + select(year, modelling_group, touchstone, value = !!sym(avg_col)) + ) + + df_plot <- df_plot %>% + group_by(modelling_group) %>% + filter(sum(value, na.rm = TRUE) > 0) %>% + ungroup() %>% + mutate( + line_type = ifelse( + grepl("Model Average", modelling_group), + "dashed", + "solid" + ) + ) + + if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { + message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") + return(NULL) + } + + p <- ggplot( + df_plot, + aes( + x = year, + y = value, + color = modelling_group, + linetype = line_type + ) + ) + + geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + + scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) + + guides(linetype = "none") + + scale_y_continuous(labels = scales::scientific) + + theme_minimal() + + labs( + x = "Year", + y = paste("Cumulative", outcome), + color = "Modelling Group", + title = paste("Cumulative", outcome, "Over Time –", disease_filter) + ) + + theme(legend.position = "bottom") + + return(p) +} + +save_outputs <- function() { + saveRDS( + round_numeric( + missing_in_current %>% + select(all_of(c( + "country_name", + "vaccine", + "activity_type", + "year", + "modelling_group" + ))) + ), + "outputs/missing_in_current.rds" + ) + saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds") + saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds") + saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds") + saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds") + saveRDS( + round_numeric(subregional_flags_deaths_camp), + "outputs/subregional_flags_deaths_camp.rds" + ) + saveRDS( + round_numeric(subregional_flags_deaths_rout), + "outputs/subregional_flags_deaths_rout.rds" + ) + saveRDS( + round_numeric(subregional_flags_dalys_camp), + "outputs/subregional_flags_dalys_camp.rds" + ) + saveRDS( + round_numeric(subregional_flags_dalys_rout), + "outputs/subregional_flags_dalys_rout.rds" + ) +} From db03465b35fa129a858df32d58994f6c5e62c4fa Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Thu, 12 Mar 2026 14:56:46 +0000 Subject: [PATCH 02/17] Move pressure testing plotting --- R/pressure_testing.R | 483 +++++++++---------------------------------- 1 file changed, 97 insertions(+), 386 deletions(-) diff --git a/R/pressure_testing.R b/R/pressure_testing.R index d4a15c6..d73c08f 100644 --- a/R/pressure_testing.R +++ b/R/pressure_testing.R @@ -10,24 +10,22 @@ filter_recent_ts <- function(df, threshold = 202310) { ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R if (ts_number >= threshold) { - df <- dplyr::filter( + dplyr::filter( df, scenario_type == "default" ) + } else { + df } - - df } # Helper for removing excluded diseases post-202110 filter_excluded_diseases_ts <- function(df, threshold = 202110) { - exclude_dis <- c("Hib", "PCV", "Rota", "JE") - touchstone_year <- unique(df$touchstone) - ts_number <- as.numeric(substr(touchstone_year, 1, 6)) + ts_number <- str_as_ts_year(touchstone_year) if (ts_number <= threshold) { - df %>% filter(!disease %in% exclude_dis) + filter(df, !disease %in% exclude_dis) } else { df } @@ -35,48 +33,41 @@ filter_excluded_diseases_ts <- function(df, threshold = 202110) { # Identify duplicates flag_duplicates <- function(df, key_cols) { - df %>% - add_count(across(all_of(key_cols)), name = "n_key") %>% - filter(n_key > 1) + df <- dplyr::add_count( + df, + dplyr::across(dplyr::all_of(key_cols)), + name = "n_key" + ) + + filter(df, n_key > 1) } # Identify rows where deaths_averted went from non-NA to NA comparison_prev <- function(df, prev_dat, outcome) { - prev_df <- prev_dat %>% - select(all_of(key_cols), all_of(outcome)) %>% - rename(outcome_prev = !!sym(outcome)) + prev_df <- select(prev_data, all_of(key_cols), all_of(outcome)) + prev_df <- rename(prev_df, outcome_prev = !!sym(outcome)) - current_df <- df %>% - select(all_of(key_cols), all_of(outcome)) %>% - rename(outcome_cur = !!sym(outcome)) + current_df <- select(current_df, all_of(key_cols), all_of(outcome)) + current_df <- rename(current_df, outcome_cur = !!sym(outcome)) - result <- prev_df %>% - inner_join(current_df, by = key_cols) %>% - filter(!is.na(outcome_prev) & is.na(outcome_cur)) + result <- inner_join(prev_df, current_df, by = key_cols) + result <- filter(result, !is.na(outcome_prev) & is.na(outcome_cur)) - return(result) + result } # Explore significant changes in key outcomes (i.e. deaths/dalys) generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) { #fix for erroneous duplicated YF data in 201910 dataset - prev_df <- prev_df %>% - { - if (identical(pars$touchstone_old, "201910")) { - filter(., !(disease == "YF" & support_type == "other" & coverage == 0)) - } else { - . - } - } - - # Fix for multiple campaigns per year (i.e. not true duplicates) - only applicable for 2019 true non-duplicates. - add_campaign_id <- function(df, key_cols) { - df %>% - group_by(across(all_of(key_cols))) %>% - mutate(campaign_id = row_number()) %>% - ungroup() + if (identical(pars$touchstone_old, TOUCHSTONE_OLD)) { + prev_df <- filter( + prev_df, + !(disease == "YF" & support_type == "other" & coverage == 0) + ) } + # Fix for multiple campaigns per year (i.e. not true duplicates) + # only applicable for 2019 true non-duplicates. prev_df <- add_campaign_id(prev_df, key_cols) curr_df <- add_campaign_id(curr_df, key_cols) @@ -97,7 +88,7 @@ generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) { interest_cols ) - return(changes) + changes } # Generate IQR for key outcomes - for threshold of "significant" @@ -107,18 +98,20 @@ gen_national_iqr <- function( value_cols, prefix = "national_iqr_" ) { - df %>% - group_by(across(all_of(group_cols))) %>% - summarise( - across( - all_of(value_cols), - \(x) IQR(x, na.rm = TRUE), - .names = "{prefix}{.col}" - ), - .groups = "drop" - ) + df <- group_by(df, across(all_of(group_cols))) + df <- summarise( + df, + across( + all_of(value_cols), + \(x) IQR(x, na.rm = TRUE), + .names = "{prefix}{.col}" + ), + .groups = "drop" + ) } +# TODO: I don't like how this looks - this should probably be a simpler +# functional that maps over a list in a separate function ## Flag significant changes flag_large_diffs <- function( changes_list, @@ -129,8 +122,22 @@ flag_large_diffs <- function( ) { iqr_col <- paste0("national_iqr_", variable) - changes_list[[variable]] %>% - mutate(diff = COMPARE - BASE) %>% + # returns a list so that the function can accept multiple variables + lapply( + changes_list[[variable]], + temp_fn, + iqr_df, + variable, + group_cols, + threshold + ) +} + +temp_fn <- function(df, iqr_df, variable, group_cols, threshold) { + mutate( + df, + diff = COMPARE - BASE + ) %>% left_join( iqr_df %>% select(all_of(group_cols), all_of(iqr_col)), by = group_cols @@ -154,29 +161,6 @@ flag_large_diffs <- function( arrange(desc(diff)) } -## Plot significant changes -significant_diff_plot <- function(df, outcome) { - df$label <- paste( - df$country_name, - df$vaccine, - df$activity_type, - df$year, - sep = " | " - ) - - ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) + - geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) + - geom_point(size = 2) + - labs( - x = "Difference", - y = "", - title = glue( - "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year" - ) - ) + - theme_minimal() -} - ### Generate combined df gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { prev_df <- prev_dat[, interest_cols] @@ -206,56 +190,6 @@ gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { return(combined) } -plot_diff <- function( - combined, - variable, - group_vars = c("activity_type", "vaccine") -) { - x_var <- paste0(variable, "_new") - y_var <- paste0(variable, "_old") - x_sym <- rlang::sym(x_var) - y_sym <- rlang::sym(y_var) - - combined <- combined %>% - filter(!is.na(!!x_sym) & !is.na(!!y_sym)) - - n_facets <- combined %>% - distinct(activity_type, vaccine) %>% - nrow() - - ncol_dynamic <- case_when( - n_facets <= 4 ~ 2, - n_facets <= 9 ~ 3, - n_facets <= 16 ~ 4, - n_facets <= 25 ~ 6, - TRUE ~ 8 - ) - - p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) + - geom_point(alpha = 0.5, colour = "#008080") + - geom_abline(slope = 1, intercept = 0, linetype = "dashed") + - facet_wrap( - ~ activity_type + vaccine, - scales = "free", - ncol = ncol_dynamic - ) + - scale_x_log10() + - scale_y_log10() + - theme_bw() + - theme( - strip.text = element_text(size = 7), - panel.spacing = unit(0.05, "lines"), - axis.text = element_text(size = 6.5) - ) + - labs( - title = glue("{variable}: Current vs Previous Report"), - x = glue("{new} - {variable}"), - y = glue("{old} - {variable}") - ) - - return(p) -} - ### Subregional v national estimate comparison compare_national_to_subregional <- function( df, @@ -263,286 +197,63 @@ compare_national_to_subregional <- function( activity_filter, threshold ) { - df <- df %>% - filter(activity_type == activity_filter) %>% - select(all_of(key_cols), subregion, !!outcome) - - results <- purrr::map_dfr(outcome, function(outcome) { - subregional_summary <- df %>% - group_by(subregion, disease, activity_type) %>% - summarise( - subregional_mean = mean(.data[[outcome]], na.rm = TRUE), - subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) - - national_summary <- df %>% - select(all_of(key_cols), subregion, !!outcome) %>% - rename(national_value = !!outcome) - - comparison <- national_summary %>% - left_join(subregional_summary, by = c("subregion", "disease")) %>% - mutate( - outcome = outcome, - difference = national_value - subregional_mean, - iqr_score = abs(difference) / subregional_iqr - ) - - dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE) - - comparison <- comparison %>% - mutate( - flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0 - ) %>% - filter(flag_iqr) %>% - select( - country_name, - vaccine, - year, - modelling_group, - national_value, - subregional_mean, - subregional_iqr, - difference, - iqr_score - ) %>% - arrange(desc(iqr_score)) - - comparison - }) - - return(results) -} - -### Modelling group variations -plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { - offset <- 1e-6 - - df2 %>% - left_join(df3, by = join_by(modelling_group, vaccine)) %>% - mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% - group_by(vaccine) %>% - mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>% - ggplot() + - aes( - fill = as.character(mod_num), - x = adj_outc, - y = reorder(vaccine, mean_outc) - ) + - geom_density_ridges( - alpha = 0.5, - stat = "binline", - bins = 200, - draw_baseline = FALSE - ) + - facet_grid(. ~ activity_type, scales = "fixed") + - theme_bw() + - theme( - legend.position = "none", - axis.text.x = element_text(angle = 90, hjust = 1) - ) + - scale_x_log10( - breaks = scales::trans_breaks("log10", function(x) 10^x), - labels = scales::trans_format("log10", math_format(10^.x)) - ) + - scale_fill_viridis_d() + - labs( - x = paste0( - "Burden averted (", - ifelse(outc == "dalys", "DALYs", outc), - ")" - ), - y = "Vaccine" - ) -} - - -# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended) -plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") { - df_cur <- df %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + df <- filter(df, activity_type == activity_filter) + df <- select(df, all_of(key_cols), subregion, !!outcome) + + results <- purrr::map_dfr(outcome, function(otc) { + subregional_summary <- + group_by(df, subregion, disease, activity_type) + subregional_summary <- summarise( + subregional_summary, + subregional_mean = mean(.data[[otc]], na.rm = TRUE), + subregional_iqr = IQR(.data[[otc]], na.rm = TRUE), .groups = "drop" - ) %>% - mutate(dataset = as.character(new)) - - df_prev <- prev_dat %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) %>% - mutate(dataset = as.character(old)) - - df_combined <- bind_rows(df_cur, df_prev) - - df_diff <- df_cur %>% - left_join( - df_prev, - by = c("disease", "year"), - suffix = c("_curr", "_prev") - ) %>% - mutate( - yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, - dataset = "Difference" - ) %>% - select(disease, year, yearly_outcome, dataset) - - df_combined <- bind_rows(df_combined, df_diff) - - df_combined$dataset <- factor( - df_combined$dataset, - levels = c(as.character(old), "Difference", as.character(new)) - ) - - ggplot( - df_combined, - aes( - x = reorder(disease, yearly_outcome), - y = yearly_outcome, - fill = factor(year) ) - ) + - geom_col(position = "dodge") + - scale_fill_manual( - values = c( - "2021" = "#008080", - "2022" = "#E68424", - "2023" = "#9573B5", - "2024" = "#A1D15C" - ) - ) + - facet_wrap(~dataset, scales = "free_y") + - scale_y_continuous(labels = scales::scientific) + - theme_bw() + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) + - labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") -} -### Gavi Cumulative Plot (modelling group + average) -plot_cumul <- function(df, outcome, disease_filter) { - outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] + national_summary <- + select(df, all_of(key_cols), subregion, !!outcome) + national_summary <- rename(national_summary, national_value = !!outcome) - outcome_sym <- sym(outcome) - cum_col <- paste0("cum_", outcome) - avg_col <- paste0("avg_", outcome) + comparison <- left_join( + national_summary, + subregional_summary, + by = c("subregion", "disease") + ) + comparison <- mutate( + comparison, + outcome = outcome, + difference = national_value - subregional_mean, + iqr_score = abs(difference) / subregional_iqr + ) - col_old <- paste0(outcome, "_old") - col_new <- paste0(outcome, "_new") + dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE) - combined2 <- df %>% - select( - country, + comparison <- mutate( + comparison, + flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0 + ) + comparison <- filter(comparison, flag_iqr) + comparison <- select( + comparison, country_name, - disease, vaccine, - activity_type, year, modelling_group, - all_of(outcome_cols) - ) %>% - pivot_longer( - cols = all_of(outcome_cols), - names_to = "touchstone", - values_to = "value" - ) %>% - mutate( - touchstone = str_remove(touchstone, paste0("^", outcome, "_")), - touchstone = recode( - touchstone, - "old" = as.character(old), - "new" = as.character(new), - .default = touchstone - ), - touchstone = factor( - touchstone, - levels = c(as.character(old), as.character(new)) - ) - ) - # Cumulative values by modelling group - df_cum <- combined2 %>% - filter(disease == disease_filter) %>% - group_by(modelling_group, touchstone) %>% - complete(year = full_seq(year, 1)) %>% - arrange(year) %>% - mutate( - first_valid = min(year[!is.na(value)]), - !!cum_col := ifelse( - year < first_valid, - NA, - cumsum(replace_na(value, 0)) - ) - ) %>% - select(-first_valid) %>% - ungroup() %>% - mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) - - # Model average - df_avg <- df_cum %>% - group_by(year, touchstone) %>% - summarise( - !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), - n_models = sum(!is.na(!!sym(cum_col))), - .groups = "drop" - ) %>% - filter(n_models >= 1) %>% - mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) - - # Combine for plot - df_plot <- bind_rows( - df_cum %>% - select(year, modelling_group, touchstone, value = !!sym(cum_col)), - df_avg %>% - select(year, modelling_group, touchstone, value = !!sym(avg_col)) - ) - - df_plot <- df_plot %>% - group_by(modelling_group) %>% - filter(sum(value, na.rm = TRUE) > 0) %>% - ungroup() %>% - mutate( - line_type = ifelse( - grepl("Model Average", modelling_group), - "dashed", - "solid" - ) + national_value, + subregional_mean, + subregional_iqr, + difference, + iqr_score ) + comparison <- arrange(comparison, desc(iqr_score)) - if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { - message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") - return(NULL) - } + comparison + }) - p <- ggplot( - df_plot, - aes( - x = year, - y = value, - color = modelling_group, - linetype = line_type - ) - ) + - geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + - scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) + - guides(linetype = "none") + - scale_y_continuous(labels = scales::scientific) + - theme_minimal() + - labs( - x = "Year", - y = paste("Cumulative", outcome), - color = "Modelling Group", - title = paste("Cumulative", outcome, "Over Time –", disease_filter) - ) + - theme(legend.position = "bottom") - - return(p) + results } +### Modelling group variations save_outputs <- function() { saveRDS( round_numeric( From 50f6b0d4e35a51382c13cd57a1dae7f85d7d482c Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Thu, 12 Mar 2026 14:57:14 +0000 Subject: [PATCH 03/17] Clean up funs and add constants --- R/constants.R | 6 + R/helpers.R | 7 + R/plotting_pressure_testing.R | 300 ++++++++++++++++++++++++++++++++++ man/constants.Rd | 10 ++ 4 files changed, 323 insertions(+) create mode 100644 R/plotting_pressure_testing.R diff --git a/R/constants.R b/R/constants.R index 03cd537..e214aa2 100644 --- a/R/constants.R +++ b/R/constants.R @@ -46,3 +46,9 @@ colnames_plot_demog_compare <- c( "value", "value_millions" ) + +#' @name constants +EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") + +#' @name constants +TOUCHSTONE_OLD <- "201910" diff --git a/R/helpers.R b/R/helpers.R index 4070345..d065a9e 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -60,3 +60,10 @@ round_numeric <- function(df) { str_as_ts_year <- function(x) { as.numeric(substr(x, 1, 6)) } + +add_campaign_id <- function(df, key_cols) { + df <- group_by(df, across(all_of(key_cols))) + df <- mutate(df, campaign_id = row_number()) + + ungroup(df) +} diff --git a/R/plotting_pressure_testing.R b/R/plotting_pressure_testing.R new file mode 100644 index 0000000..08a4329 --- /dev/null +++ b/R/plotting_pressure_testing.R @@ -0,0 +1,300 @@ +## Plot significant changes +significant_diff_plot <- function(df, outcome) { + df$label <- paste( + df$country_name, + df$vaccine, + df$activity_type, + df$year, + sep = " | " + ) + + ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) + + geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) + + geom_point(size = 2) + + labs( + x = "Difference", + y = "", + title = glue( + "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year" + ) + ) + + theme_minimal() +} + +plot_diff <- function( + combined, + variable, + group_vars = c("activity_type", "vaccine") +) { + x_var <- paste0(variable, "_new") + y_var <- paste0(variable, "_old") + x_sym <- rlang::sym(x_var) + y_sym <- rlang::sym(y_var) + + combined <- combined %>% + filter(!is.na(!!x_sym) & !is.na(!!y_sym)) + + n_facets <- combined %>% + distinct(activity_type, vaccine) %>% + nrow() + + ncol_dynamic <- case_when( + n_facets <= 4 ~ 2, + n_facets <= 9 ~ 3, + n_facets <= 16 ~ 4, + n_facets <= 25 ~ 6, + TRUE ~ 8 + ) + + p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) + + geom_point(alpha = 0.5, colour = "#008080") + + geom_abline(slope = 1, intercept = 0, linetype = "dashed") + + facet_wrap( + ~ activity_type + vaccine, + scales = "free", + ncol = ncol_dynamic + ) + + scale_x_log10() + + scale_y_log10() + + theme_bw() + + theme( + strip.text = element_text(size = 7), + panel.spacing = unit(0.05, "lines"), + axis.text = element_text(size = 6.5) + ) + + labs( + title = glue("{variable}: Current vs Previous Report"), + x = glue("{new} - {variable}"), + y = glue("{old} - {variable}") + ) + + return(p) +} + +plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { + offset <- 1e-6 + + df2 %>% + left_join(df3, by = join_by(modelling_group, vaccine)) %>% + mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% + group_by(vaccine) %>% + mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>% + ggplot() + + aes( + fill = as.character(mod_num), + x = adj_outc, + y = reorder(vaccine, mean_outc) + ) + + geom_density_ridges( + alpha = 0.5, + stat = "binline", + bins = 200, + draw_baseline = FALSE + ) + + facet_grid(. ~ activity_type, scales = "fixed") + + theme_bw() + + theme( + legend.position = "none", + axis.text.x = element_text(angle = 90, hjust = 1) + ) + + scale_x_log10( + breaks = scales::trans_breaks("log10", function(x) 10^x), + labels = scales::trans_format("log10", math_format(10^.x)) + ) + + scale_fill_viridis_d() + + labs( + x = paste0( + "Burden averted (", + ifelse(outc == "dalys", "DALYs", outc), + ")" + ), + y = "Vaccine" + ) +} + + +# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended) +plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") { + df_cur <- df %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(new)) + + df_prev <- prev_dat %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(old)) + + df_combined <- bind_rows(df_cur, df_prev) + + df_diff <- df_cur %>% + left_join( + df_prev, + by = c("disease", "year"), + suffix = c("_curr", "_prev") + ) %>% + mutate( + yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, + dataset = "Difference" + ) %>% + select(disease, year, yearly_outcome, dataset) + + df_combined <- bind_rows(df_combined, df_diff) + + df_combined$dataset <- factor( + df_combined$dataset, + levels = c(as.character(old), "Difference", as.character(new)) + ) + + ggplot( + df_combined, + aes( + x = reorder(disease, yearly_outcome), + y = yearly_outcome, + fill = factor(year) + ) + ) + + geom_col(position = "dodge") + + scale_fill_manual( + values = c( + "2021" = "#008080", + "2022" = "#E68424", + "2023" = "#9573B5", + "2024" = "#A1D15C" + ) + ) + + facet_wrap(~dataset, scales = "free_y") + + scale_y_continuous(labels = scales::scientific) + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") +} + +### Gavi Cumulative Plot (modelling group + average) +plot_cumul <- function(df, outcome, disease_filter) { + outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] + + outcome_sym <- sym(outcome) + cum_col <- paste0("cum_", outcome) + avg_col <- paste0("avg_", outcome) + + col_old <- paste0(outcome, "_old") + col_new <- paste0(outcome, "_new") + + combined2 <- df %>% + select( + country, + country_name, + disease, + vaccine, + activity_type, + year, + modelling_group, + all_of(outcome_cols) + ) %>% + pivot_longer( + cols = all_of(outcome_cols), + names_to = "touchstone", + values_to = "value" + ) %>% + mutate( + touchstone = str_remove(touchstone, paste0("^", outcome, "_")), + touchstone = recode( + touchstone, + "old" = as.character(old), + "new" = as.character(new), + .default = touchstone + ), + touchstone = factor( + touchstone, + levels = c(as.character(old), as.character(new)) + ) + ) + # Cumulative values by modelling group + df_cum <- combined2 %>% + filter(disease == disease_filter) %>% + group_by(modelling_group, touchstone) %>% + complete(year = full_seq(year, 1)) %>% + arrange(year) %>% + mutate( + first_valid = min(year[!is.na(value)]), + !!cum_col := ifelse( + year < first_valid, + NA, + cumsum(replace_na(value, 0)) + ) + ) %>% + select(-first_valid) %>% + ungroup() %>% + mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) + + # Model average + df_avg <- df_cum %>% + group_by(year, touchstone) %>% + summarise( + !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), + n_models = sum(!is.na(!!sym(cum_col))), + .groups = "drop" + ) %>% + filter(n_models >= 1) %>% + mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) + + # Combine for plot + df_plot <- bind_rows( + df_cum %>% + select(year, modelling_group, touchstone, value = !!sym(cum_col)), + df_avg %>% + select(year, modelling_group, touchstone, value = !!sym(avg_col)) + ) + + df_plot <- df_plot %>% + group_by(modelling_group) %>% + filter(sum(value, na.rm = TRUE) > 0) %>% + ungroup() %>% + mutate( + line_type = ifelse( + grepl("Model Average", modelling_group), + "dashed", + "solid" + ) + ) + + if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { + message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") + return(NULL) + } + + p <- ggplot( + df_plot, + aes( + x = year, + y = value, + color = modelling_group, + linetype = line_type + ) + ) + + geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + + scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) + + guides(linetype = "none") + + scale_y_continuous(labels = scales::scientific) + + theme_minimal() + + labs( + x = "Year", + y = paste("Cumulative", outcome), + color = "Modelling Group", + title = paste("Cumulative", outcome, "Over Time –", disease_filter) + ) + + theme(legend.position = "bottom") + + return(p) +} diff --git a/man/constants.Rd b/man/constants.Rd index 70b14da..7608951 100644 --- a/man/constants.Rd +++ b/man/constants.Rd @@ -7,6 +7,8 @@ \alias{scenario_data_colnames} \alias{burden_outcome_names} \alias{colnames_plot_demog_compare} +\alias{EXCLUDED_DISEASES} +\alias{TOUCHSTONE_OLD} \title{Package constants} \format{ An object of class \code{character} of length 5. @@ -16,6 +18,10 @@ An object of class \code{character} of length 4. An object of class \code{character} of length 10. An object of class \code{character} of length 7. + +An object of class \code{character} of length 4. + +An object of class \code{character} of length 1. } \usage{ file_dict_colnames @@ -25,6 +31,10 @@ scenario_data_colnames burden_outcome_names colnames_plot_demog_compare + +EXCLUDED_DISEASES + +TOUCHSTONE_OLD } \description{ Package constants From 13d624435632dedd107d1939cbafb389cffd2656 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Fri, 13 Mar 2026 12:14:18 +0000 Subject: [PATCH 04/17] Clean up fns and add constants --- R/constants.R | 6 + ...sting.R => fn_plotting_pressure_testing.R} | 26 +- R/fn_pressure_testing.R | 361 ++++++++++++++++++ 3 files changed, 382 insertions(+), 11 deletions(-) rename R/{plotting_pressure_testing.R => fn_plotting_pressure_testing.R} (94%) create mode 100644 R/fn_pressure_testing.R diff --git a/R/constants.R b/R/constants.R index e214aa2..20ae152 100644 --- a/R/constants.R +++ b/R/constants.R @@ -52,3 +52,9 @@ EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") #' @name constants TOUCHSTONE_OLD <- "201910" + +#' @name constants +TOUCHSTONE_NEW <- "202310" + +#' @name constants +TOUCHSTONE_OLD_OLD <- "202110" diff --git a/R/plotting_pressure_testing.R b/R/fn_plotting_pressure_testing.R similarity index 94% rename from R/plotting_pressure_testing.R rename to R/fn_plotting_pressure_testing.R index 08a4329..8aaad49 100644 --- a/R/plotting_pressure_testing.R +++ b/R/fn_plotting_pressure_testing.R @@ -1,11 +1,14 @@ -## Plot significant changes +#' Plot significant changes +#' +#' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid +#' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars +#' labeller label_wrap_gen +#' +#' @export significant_diff_plot <- function(df, outcome) { - df$label <- paste( - df$country_name, - df$vaccine, - df$activity_type, - df$year, - sep = " | " + # retained here as this is a small df and a small operation + df$label <- glue::glue( + "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}" ) ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) + @@ -13,12 +16,13 @@ significant_diff_plot <- function(df, outcome) { geom_point(size = 2) + labs( x = "Difference", - y = "", - title = glue( - "Significant Differences in {outcome} by Country, Vaccine, Activity Type and Year" + y = NULL, + title = glue::glue( + "Significant Differences in {outcome} by Country, Vaccine, \\ + Activity Type and Year" ) ) + - theme_minimal() + theme_vimc(x_text_angle = 0) } plot_diff <- function( diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R new file mode 100644 index 0000000..140cc79 --- /dev/null +++ b/R/fn_pressure_testing.R @@ -0,0 +1,361 @@ +#' Fix for scenario_type variable being included from 202310 onwards +#' +#' @keywords pressure_testing +#' +#' @export +filter_recent_ts <- function(df, threshold = 202310) { + touchstone_year <- unique(df$touchstone) + + # TODO: check that touchstone year is 6 digit - can there be more digits? + ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R + + if (ts_number >= threshold) { + dplyr::filter( + df, + .data$scenario_type == "default" + ) + } else { + df + } +} + +#' Helper for removing excluded diseases post-202110 +#' +#' @export +filter_excluded_diseases_ts <- function(df, threshold = 202110) { + touchstone_year <- unique(df$touchstone) + ts_number <- str_as_ts_year(touchstone_year) + + if (ts_number <= threshold) { + dplyr::filter(df, !.data$disease %in% EXCLUDED_DISEASES) + } else { + df + } +} + +#' Identify duplicates +#' +#' @export +flag_duplicates <- function(df, key_cols) { + df <- dplyr::add_count( + df, + dplyr::across(dplyr::all_of(key_cols)), + name = "n_key" + ) + + dplyr::filter(df, .data$n_key > 1) +} + +#' Identify rows where deaths_averted went from non-NA to NA +#' +#' @export +comparison_prev <- function(df, prev_data, outcome) { + prev_df <- dplyr::select( + prev_data, + dplyr::all_of(key_cols), + dplyr::all_of(outcome) + ) + prev_df <- dplyr::rename(prev_df, outcome_prev = {{ outcome }}) + + current_df <- dplyr::select( + df, + dplyr::all_of(key_cols), + dplyr::all_of(outcome) + ) + current_df <- dplyr::rename(current_df, outcome_cur = {{ outcome }}) + + result <- dplyr::inner_join(prev_df, current_df, by = key_cols) + # `,` replaces `&` for dplyr syntax + result <- dplyr::filter(result, !is.na(outcome_prev), is.na(outcome_cur)) + + result +} + +#' Explore significant changes in deaths and DALYs +#' +#' @keywords pressure_testing +#' +#' @export +generate_diffs <- function( + prev_df, + curr_df, + interest_cols, + key_cols, + touchstone = TOUCHSTONE_OLD +) { + # TODO: replace use of `pars$touchstone_old` with arg `touchstone` + #fix for erroneous duplicated YF data in 201910 dataset + if (identical(touchstone, TOUCHSTONE_OLD)) { + prev_df <- dplyr::filter( + prev_df, + !(.data$disease == "YF" & + .data$support_type == "other" & + .data$coverage == 0) + ) + } + + # Fix for multiple campaigns per year (i.e. not true duplicates) + # only applicable for 2019 true non-duplicates. + prev_df <- add_campaign_id(prev_df, key_cols) + curr_df <- add_campaign_id(curr_df, key_cols) + + diff_keys <- c(key_cols, "campaign_id") + cols_needed <- unique(c(diff_keys, interest_cols)) + + diff <- diffdf::diffdf( + prev_df[, cols_needed], + curr_df[, cols_needed], + keys = diff_keys + ) + + changes <- stats::setNames( + lapply(interest_cols, function(v) { + nm <- glue::glue("VarDiff_{v}") + if (nm %in% names(diff)) diff[[nm]] else NULL + }), + interest_cols + ) + + changes +} + +#' Generate IQR for key outcomes +#' +#' @keywords pressure_testing +#' +#' @export +gen_national_iqr <- function( + df, + group_cols, + value_cols, + prefix = "national_iqr_" +) { + df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(group_cols))) + df <- dplyr::summarise( + df, + dplyr::across( + dplyr::all_of(value_cols), + function(x) { + IQR(x, na.rm = TRUE) + }, + .names = "{prefix}{.col}" + ), + .groups = "drop" + ) +} + +#' Flag significant changes +#' +#' @keywords pressure_testing +#' +#' @export +flag_large_diffs <- function( + changes_list, + iqr_df, + variable = c("deaths_averted", "dalys_averted"), + group_cols, + threshold +) { + # TODO: input checking + variable <- rlang::arg_match(variable) + + iqr_col <- glue::glue("national_iqr_{variable}") + + df <- dplyr::mutate( + changes_list[[variable]], + diff = .data$COMPARE - .data$BASE + ) + + iqr_df <- dplyr::select( + iqr_df, + dplyr::all_of(group_cols), + dplyr::all_of(iqr_col) + ) + + df <- left_join( + df, + iqr_df, + by = group_cols + ) + + df <- dplyr::mutate( + df, + flag = abs(.data$diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0 + ) + + df <- dplyr::filter(df, .data$flag) + + cols_to_select <- c( + "country", + "country_name", + "year", + "vaccine", + "modelling_group", + "activity_type", + "BASE", + "COMPARE", + "diff" + ) + + df <- dplyr::select( + df, + {{ cols_to_select }} + ) + + # TODO: replace `old` and `new` with defined objs --- see scratch.R + # unsure why this syntax was used + df <- dplyr::rename( + df, + !!as.character(old) := BASE, + !!as.character(new) := COMPARE + ) + + dplyr::arrange(df, dplyr::desc(diff)) +} + +#' Generate combined df +#' +#' @keywords pressure_testing +#' +#' @export +gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { + # TODO: input checks + # TODO: df2 needs a better name + prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) + cur_df <- dplyr::select(df2, {{ interest_cols }}) + + combined <- dplyr::full_join( + prev_df, + cur_df, + by = key_cols, + suffix = c("_old", "_new") + ) + + dplyr::select( + combined, + country, + country_name, + disease, + vaccine, + activity_type, + year, + modelling_group, + deaths_averted_old, + deaths_averted_new, + dalys_averted_old, + dalys_averted_new + ) +} + +### Subregional v national estimate comparison +compare_national_to_subregional <- function( + df, + outcome, + activity_filter, + threshold +) { + df <- dplyr::filter(df, activity_type == activity_filter) + df <- dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome) + + results <- purrr::map_dfr(outcome, function(otc) { + subregional_summary <- + dplyr::group_by(df, subregion, disease, activity_type) + + subregional_summary <- dplyr::summarise( + subregional_summary, + subregional_mean = mean(.data[[otc]], na.rm = TRUE), + subregional_iqr = IQR(.data[[otc]], na.rm = TRUE), + .groups = "drop" + ) + + national_summary <- + dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome) + national_summary <- dplyr::rename( + national_summary, + national_value = !!outcome + ) + + comparison <- dplyr::left_join( + national_summary, + subregional_summary, + by = c("subregion", "disease") + ) + comparison <- dplyr::mutate( + comparison, + outcome = outcome, + difference = .data$national_value - .data$subregional_mean, + iqr_score = abs(.data$difference) / .data$subregional_iqr + ) + + dynamic_threshold <- stats::quantile( + comparison$iqr_score, + 0.99, + na.rm = TRUE + ) + + comparison <- dplyr::mutate( + comparison, + flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0 + ) + comparison <- dplyr::filter(comparison, .data$flag_iqr) + comparison <- dplyr::select( + comparison, + country_name, + vaccine, + year, + modelling_group, + national_value, + subregional_mean, + subregional_iqr, + difference, + iqr_score + ) + comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score)) + + comparison + }) + + results +} + +# TODO: reconsider function name, add explicit arguments +#' Modelling group variations +#' +#' @keywords pressure_testing +#' +#' @export +save_outputs <- function() { + saveRDS( + round_numeric( + missing_in_current %>% + dplyr::select(dplyr::all_of(c( + "country_name", + "vaccine", + "activity_type", + "year", + "modelling_group" + ))) + ), + "outputs/missing_in_current.rds" + ) + saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds") + saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds") + saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds") + saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds") + saveRDS( + round_numeric(subregional_flags_deaths_camp), + "outputs/subregional_flags_deaths_camp.rds" + ) + saveRDS( + round_numeric(subregional_flags_deaths_rout), + "outputs/subregional_flags_deaths_rout.rds" + ) + saveRDS( + round_numeric(subregional_flags_dalys_camp), + "outputs/subregional_flags_dalys_camp.rds" + ) + saveRDS( + round_numeric(subregional_flags_dalys_rout), + "outputs/subregional_flags_dalys_rout.rds" + ) +} From 8aa8494dfab033d9d4e70092f1b2237343ccf6af Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Fri, 13 Mar 2026 12:15:14 +0000 Subject: [PATCH 05/17] Rename files --- ..._diagnostics.R => fn_burden_diagnostics.R} | 0 R/{helpers.R => fn_helpers.R} | 0 ...ing.R => fn_plotting_burden_diagnostics.R} | 0 ...ing_prep.R => fn_plotting_prep_bur_diag.R} | 0 R/pressure_testing.R | 291 ------------------ 5 files changed, 291 deletions(-) rename R/{burden_diagnostics.R => fn_burden_diagnostics.R} (100%) rename R/{helpers.R => fn_helpers.R} (100%) rename R/{plotting.R => fn_plotting_burden_diagnostics.R} (100%) rename R/{plotting_prep.R => fn_plotting_prep_bur_diag.R} (100%) delete mode 100644 R/pressure_testing.R diff --git a/R/burden_diagnostics.R b/R/fn_burden_diagnostics.R similarity index 100% rename from R/burden_diagnostics.R rename to R/fn_burden_diagnostics.R diff --git a/R/helpers.R b/R/fn_helpers.R similarity index 100% rename from R/helpers.R rename to R/fn_helpers.R diff --git a/R/plotting.R b/R/fn_plotting_burden_diagnostics.R similarity index 100% rename from R/plotting.R rename to R/fn_plotting_burden_diagnostics.R diff --git a/R/plotting_prep.R b/R/fn_plotting_prep_bur_diag.R similarity index 100% rename from R/plotting_prep.R rename to R/fn_plotting_prep_bur_diag.R diff --git a/R/pressure_testing.R b/R/pressure_testing.R deleted file mode 100644 index d73c08f..0000000 --- a/R/pressure_testing.R +++ /dev/null @@ -1,291 +0,0 @@ -### All functions for pressure testing - -# Flexible rounding - -# Fix for scenario_type variable being included from 202310 onwards -filter_recent_ts <- function(df, threshold = 202310) { - touchstone_year <- unique(df$touchstone) - - # TODO: check that touchstone year is 6 digit - can there be more digits? - ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R - - if (ts_number >= threshold) { - dplyr::filter( - df, - scenario_type == "default" - ) - } else { - df - } -} - -# Helper for removing excluded diseases post-202110 -filter_excluded_diseases_ts <- function(df, threshold = 202110) { - touchstone_year <- unique(df$touchstone) - ts_number <- str_as_ts_year(touchstone_year) - - if (ts_number <= threshold) { - filter(df, !disease %in% exclude_dis) - } else { - df - } -} - -# Identify duplicates -flag_duplicates <- function(df, key_cols) { - df <- dplyr::add_count( - df, - dplyr::across(dplyr::all_of(key_cols)), - name = "n_key" - ) - - filter(df, n_key > 1) -} - -# Identify rows where deaths_averted went from non-NA to NA -comparison_prev <- function(df, prev_dat, outcome) { - prev_df <- select(prev_data, all_of(key_cols), all_of(outcome)) - prev_df <- rename(prev_df, outcome_prev = !!sym(outcome)) - - current_df <- select(current_df, all_of(key_cols), all_of(outcome)) - current_df <- rename(current_df, outcome_cur = !!sym(outcome)) - - result <- inner_join(prev_df, current_df, by = key_cols) - result <- filter(result, !is.na(outcome_prev) & is.na(outcome_cur)) - - result -} - -# Explore significant changes in key outcomes (i.e. deaths/dalys) -generate_diffs <- function(prev_df, curr_df, interest_cols, key_cols) { - #fix for erroneous duplicated YF data in 201910 dataset - if (identical(pars$touchstone_old, TOUCHSTONE_OLD)) { - prev_df <- filter( - prev_df, - !(disease == "YF" & support_type == "other" & coverage == 0) - ) - } - - # Fix for multiple campaigns per year (i.e. not true duplicates) - # only applicable for 2019 true non-duplicates. - prev_df <- add_campaign_id(prev_df, key_cols) - curr_df <- add_campaign_id(curr_df, key_cols) - - diff_keys <- c(key_cols, "campaign_id") - cols_needed <- unique(c(diff_keys, interest_cols)) - - diff <- diffdf::diffdf( - prev_df[, cols_needed], - curr_df[, cols_needed], - keys = diff_keys - ) - - changes <- setNames( - lapply(interest_cols, function(v) { - nm <- paste0("VarDiff_", v) - if (nm %in% names(diff)) diff[[nm]] else NULL - }), - interest_cols - ) - - changes -} - -# Generate IQR for key outcomes - for threshold of "significant" -gen_national_iqr <- function( - df, - group_cols, - value_cols, - prefix = "national_iqr_" -) { - df <- group_by(df, across(all_of(group_cols))) - df <- summarise( - df, - across( - all_of(value_cols), - \(x) IQR(x, na.rm = TRUE), - .names = "{prefix}{.col}" - ), - .groups = "drop" - ) -} - -# TODO: I don't like how this looks - this should probably be a simpler -# functional that maps over a list in a separate function -## Flag significant changes -flag_large_diffs <- function( - changes_list, - iqr_df, - variable, - group_cols, - threshold -) { - iqr_col <- paste0("national_iqr_", variable) - - # returns a list so that the function can accept multiple variables - lapply( - changes_list[[variable]], - temp_fn, - iqr_df, - variable, - group_cols, - threshold - ) -} - -temp_fn <- function(df, iqr_df, variable, group_cols, threshold) { - mutate( - df, - diff = COMPARE - BASE - ) %>% - left_join( - iqr_df %>% select(all_of(group_cols), all_of(iqr_col)), - by = group_cols - ) %>% - mutate( - flag = abs(diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0 - ) %>% - filter(flag) %>% - select( - country, - country_name, - year, - vaccine, - modelling_group, - activity_type, - BASE, - COMPARE, - diff - ) %>% - rename(!!as.character(old) := BASE, !!as.character(new) := COMPARE) %>% - arrange(desc(diff)) -} - -### Generate combined df -gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { - prev_df <- prev_dat[, interest_cols] - cur_df <- df2[, interest_cols] - - combined <- full_join( - prev_df, - cur_df, - by = key_cols, - suffix = c("_old", "_new") - ) - - combined <- combined %>% - select( - country, - country_name, - disease, - vaccine, - activity_type, - year, - modelling_group, - deaths_averted_old, - deaths_averted_new, - dalys_averted_old, - dalys_averted_new - ) - return(combined) -} - -### Subregional v national estimate comparison -compare_national_to_subregional <- function( - df, - outcome, - activity_filter, - threshold -) { - df <- filter(df, activity_type == activity_filter) - df <- select(df, all_of(key_cols), subregion, !!outcome) - - results <- purrr::map_dfr(outcome, function(otc) { - subregional_summary <- - group_by(df, subregion, disease, activity_type) - subregional_summary <- summarise( - subregional_summary, - subregional_mean = mean(.data[[otc]], na.rm = TRUE), - subregional_iqr = IQR(.data[[otc]], na.rm = TRUE), - .groups = "drop" - ) - - national_summary <- - select(df, all_of(key_cols), subregion, !!outcome) - national_summary <- rename(national_summary, national_value = !!outcome) - - comparison <- left_join( - national_summary, - subregional_summary, - by = c("subregion", "disease") - ) - comparison <- mutate( - comparison, - outcome = outcome, - difference = national_value - subregional_mean, - iqr_score = abs(difference) / subregional_iqr - ) - - dynamic_threshold <- quantile(comparison$iqr_score, 0.99, na.rm = TRUE) - - comparison <- mutate( - comparison, - flag_iqr = iqr_score > dynamic_threshold & subregional_iqr > 0 - ) - comparison <- filter(comparison, flag_iqr) - comparison <- select( - comparison, - country_name, - vaccine, - year, - modelling_group, - national_value, - subregional_mean, - subregional_iqr, - difference, - iqr_score - ) - comparison <- arrange(comparison, desc(iqr_score)) - - comparison - }) - - results -} - -### Modelling group variations -save_outputs <- function() { - saveRDS( - round_numeric( - missing_in_current %>% - select(all_of(c( - "country_name", - "vaccine", - "activity_type", - "year", - "modelling_group" - ))) - ), - "outputs/missing_in_current.rds" - ) - saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds") - saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds") - saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds") - saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds") - saveRDS( - round_numeric(subregional_flags_deaths_camp), - "outputs/subregional_flags_deaths_camp.rds" - ) - saveRDS( - round_numeric(subregional_flags_deaths_rout), - "outputs/subregional_flags_deaths_rout.rds" - ) - saveRDS( - round_numeric(subregional_flags_dalys_camp), - "outputs/subregional_flags_dalys_camp.rds" - ) - saveRDS( - round_numeric(subregional_flags_dalys_rout), - "outputs/subregional_flags_dalys_rout.rds" - ) -} From de1eda658e8402d12a0624e716f797ecac0fdeed Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Mon, 20 Apr 2026 16:55:14 +0100 Subject: [PATCH 06/17] Add checks and docs for pressure testing fns --- R/fn_pressure_testing.R | 683 ++++++++++++++++++++++++++++++---------- 1 file changed, 523 insertions(+), 160 deletions(-) diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R index 140cc79..9f25309 100644 --- a/R/fn_pressure_testing.R +++ b/R/fn_pressure_testing.R @@ -1,14 +1,50 @@ -#' Fix for scenario_type variable being included from 202310 onwards +#' Filter data for touchstones or diseases +#' +#' @name pres_test_filter_data +#' @rdname pres_test_filter_data +#' +#' @description +#' A pair of helper functions allowing filtering out of recent touchstone values +#' and excluded diseases. +#' +#' @param df A `` holding impact data. This data.frame is not +#' checked for contents +#' +#' @param threshold A six-digit number that is checked as a valid touchstone +#' identifier (YYYYMM format) using [validate_ts_year()]. #' #' @keywords pressure_testing #' +#' @return A filtered ``. +#' +#' - `filter_recent_ts()` returns `df` with rows where the touchstone condition +#' is not met excluded. +#' +#' - `filter_excluded_diseases_ts()` returns `df` with rows where rows relating +#' to the [EXCLUDED_DISEASES], when the touchstone year in `df` is less than the +#' `threshold`, excluded. +#' +#' - `filter_duplicates()` returns `df` with duplicated combinations of +#' `key_cols` removed. +#' +#' - `filter_invalid_trajectories()` returns `df` with bad outcome trajectories +#' (`NA` to non-`NA`) removed. +#' #' @export filter_recent_ts <- function(df, threshold = 202310) { - touchstone_year <- unique(df$touchstone) + checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) + checkmate::assert_names( + names(df), + must.include = "touchstone" + ) + threshold <- validate_ts_year(threshold) # apply same rule as data ts year - # TODO: check that touchstone year is 6 digit - can there be more digits? - ts_number <- str_as_ts_year(touchstone_year) # see R/helpers.R + touchstone_year <- unique(df[["touchstone"]]) + ts_number <- validate_ts_year(touchstone_year) # see R/helpers.R + + # NOTE: consider converting to Date and checking - numeric comparison + # works okay for now if (ts_number >= threshold) { dplyr::filter( df, @@ -19,12 +55,20 @@ filter_recent_ts <- function(df, threshold = 202310) { } } -#' Helper for removing excluded diseases post-202110 +#' @name pres_test_filter_data #' #' @export filter_excluded_diseases_ts <- function(df, threshold = 202110) { + checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) + checkmate::assert_names( + names(df), + must.include = "touchstone" + ) + + threshold <- validate_ts_year(threshold) + touchstone_year <- unique(df$touchstone) - ts_number <- str_as_ts_year(touchstone_year) + ts_number <- validate_ts_year(touchstone_year) if (ts_number <= threshold) { dplyr::filter(df, !.data$disease %in% EXCLUDED_DISEASES) @@ -33,10 +77,27 @@ filter_excluded_diseases_ts <- function(df, threshold = 202110) { } } -#' Identify duplicates +#' @name pres_test_filter_data +#' +#' @param key_cols Key columns in `df` to check for duplicates. #' #' @export -flag_duplicates <- function(df, key_cols) { +filter_duplicates <- function(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) { + checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L) + checkmate::assert_character(key_cols) + + has_cols <- checkmate::test_names( + colnames(df), + must.include = key_cols + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(df), key_cols) # nolint used in cli + cli::cli_abort( + "Expected {.code df} to have columns {.str {key_cols}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + df <- dplyr::add_count( df, dplyr::across(dplyr::all_of(key_cols)), @@ -46,46 +107,136 @@ flag_duplicates <- function(df, key_cols) { dplyr::filter(df, .data$n_key > 1) } -#' Identify rows where deaths_averted went from non-NA to NA +#' @name pres_test_filter_data +#' +#' @param prev_data A `` holding data from a previous touchstone for +#' the same scenarios as `df`. +#' +#' @param outcome A string giving the outcome of interest; may be one of +#' `"deaths_averted"` or `"dalys_averted"`. #' #' @export -comparison_prev <- function(df, prev_data, outcome) { +filter_invalid_trajectories <- function( + df, + prev_data, + outcome = c("deaths_averted", "dalys_averted") +) { + checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L) + + # TODO: can we assume prev_data is at least the size of df? + checkmate::assert_data_frame( + prev_data, + min.cols = ncol(df), + min.rows = nrow(df) + ) + + outcome <- rlang::arg_match(outcome) + + has_cols <- checkmate::test_names( + colnames(df), + must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome) + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(df), COLNAMES_KEY_PRESSURE_TEST) # nolint + cli::cli_abort( + "Expected {.code df} to have columns \ + {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + + has_cols <- checkmate::test_names( + colnames(prev_data), + must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome) + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(prev_data), COLNAMES_KEY_PRESSURE_TEST) + cli::cli_abort( + "Expected {.code prev_data} to have columns \ + {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + prev_df <- dplyr::select( prev_data, - dplyr::all_of(key_cols), + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), dplyr::all_of(outcome) ) prev_df <- dplyr::rename(prev_df, outcome_prev = {{ outcome }}) current_df <- dplyr::select( df, - dplyr::all_of(key_cols), + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), dplyr::all_of(outcome) ) current_df <- dplyr::rename(current_df, outcome_cur = {{ outcome }}) - result <- dplyr::inner_join(prev_df, current_df, by = key_cols) - # `,` replaces `&` for dplyr syntax - result <- dplyr::filter(result, !is.na(outcome_prev), is.na(outcome_cur)) + result <- dplyr::inner_join( + prev_df, + current_df, + by = COLNAMES_KEY_PRESSURE_TEST + ) - result + # `,` replaces `&` for dplyr syntax + dplyr::filter( + result, + !is.na(.data$outcome_prev), + is.na(.data$outcome_cur) + ) } #' Explore significant changes in deaths and DALYs #' +#' @param prev_df A `` of impact estimates from the previous +#' touchstone. +#' +#' @param curr_df A `` of impact estimates for the current +#' touchstone. +#' +#' @param interest_cols A character vector of columns to check for differences. +#' Defaults to [COLNAMES_INTEREST_PRESSURE_TEST]. +#' +#' @param key_cols A character vector of columns to use when assigning campaign +#' identifiers. Passed to [add_campaign_id()], defaults to +#' [COLNAMES_KEY_PRESSURE_TEST]. +#' +#' @param touchstone A six character string that can be converted to a six digit +#' numeric giving a touchstone identifier in `YYYYMM` format. +#' +#' @return A list of data.frames of differences between `prev_df` and `curr_df`, +#' with one list element per element of `interest_cols`. +#' #' @keywords pressure_testing #' #' @export generate_diffs <- function( prev_df, curr_df, - interest_cols, - key_cols, - touchstone = TOUCHSTONE_OLD + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST, + touchstone = DEF_TOUCHSTONE_OLD ) { - # TODO: replace use of `pars$touchstone_old` with arg `touchstone` - #fix for erroneous duplicated YF data in 201910 dataset - if (identical(touchstone, TOUCHSTONE_OLD)) { + checkmate::assert_data_frame(prev_df, min.rows = 1L, min.cols = 1L) + checkmate::assert_data_frame(curr_df, min.rows = 1L, min.cols = 1L) + + checkmate::assert_character(interest_cols, min.len = 1L) + checkmate::assert_character(key_cols, min.len = 1L) + + # check interest cols in dfs. key cols are check in `add_campaign_id` + checkmate::assert_names( + colnames(prev_df), + interest_cols + ) + checkmate::assert_names( + colnames(curr_df), + interest_cols + ) + + touchstone <- validate_ts_year(touchstone) + + # fix for erroneous duplicated YF data in 201910 dataset + if (touchstone == DEF_TOUCHSTONE_OLD) { prev_df <- dplyr::filter( prev_df, !(.data$disease == "YF" & @@ -100,9 +251,9 @@ generate_diffs <- function( curr_df <- add_campaign_id(curr_df, key_cols) diff_keys <- c(key_cols, "campaign_id") - cols_needed <- unique(c(diff_keys, interest_cols)) + cols_needed <- union(diff_keys, interest_cols) - diff <- diffdf::diffdf( + df_diff <- diffdf::diffdf( prev_df[, cols_needed], curr_df[, cols_needed], keys = diff_keys @@ -111,7 +262,7 @@ generate_diffs <- function( changes <- stats::setNames( lapply(interest_cols, function(v) { nm <- glue::glue("VarDiff_{v}") - if (nm %in% names(diff)) diff[[nm]] else NULL + if (nm %in% names(df_diff)) df_diff[[nm]] else NULL }), interest_cols ) @@ -123,28 +274,89 @@ generate_diffs <- function( #' #' @keywords pressure_testing #' +#' @param df A data.frame of impact estimates. +#' +#' @param group_cols A character vector of grouping columns. Defaults to +#' "country", "vaccine", "activity_type". +#' +#' @param value_cols A character vector of value columns. Defaults to +#' "deaths_averted" and "dalys_averted". +#' +#' @param prefix A string for the prefix applied to every IQR summary column. +#' Defaults to "national_iqr". +#' +#' @return A `` with the inter-quartile range of the columns +#' in `value_cols`, with the column name constructed as `{prefix}_{value_col}` +#' using string interpolation. +#' #' @export gen_national_iqr <- function( df, - group_cols, - value_cols, - prefix = "national_iqr_" + group_cols = c("country", "vaccine", "activity_type"), + value_cols = c("deaths_averted", "dalys_averted"), + prefix = "national_iqr" ) { + checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) + checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE) + + # NOTE: restricting value columns to deaths and dalys averted + value_cols <- rlang::arg_match( + value_cols, + c("deaths_averted", "dalys_averted"), + multiple = TRUE + ) + + checkmate::assert_string(prefix) + + checkmate::assert_names( + colnames(df), + must.include = union(group_cols, value_cols) + ) + df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(group_cols))) df <- dplyr::summarise( df, dplyr::across( dplyr::all_of(value_cols), function(x) { - IQR(x, na.rm = TRUE) + stats::IQR(x, na.rm = TRUE) }, - .names = "{prefix}{.col}" + .names = "{prefix}_{.col}" ), .groups = "drop" ) } -#' Flag significant changes +#' Flag significant changes in impact estimates +#' +#' @description Calculates and flags whether the difference in impact estimates +#' between touchstones is greater than expected. A row is flagged if the +#' difference is greater than `threshold` \eqn{\times} the inter-quartile range +#' for cases where the IQR is greater than zero. +#' +#' @param changes_list A list of data.frames with one element per variable of +#' interest (see `variable`). Usually generated using [generate_diffs()]. +#' +#' @param iqr_df A data.frame of inter-quartile differences generated using +#' [gen_national_iqr()]. +#' +#' @param variable A string specifying the variable of interest. Must be one of +#' "deaths_averted" and "dalys_averted", and must be present as a name and +#' element of `changes_list`. +#' +#' @inheritParams gen_national_iqr +#' +#' @param threshold A single numeric value for the IQR multiplier. Defaults to +#' 100. +#' +#' @param touchstone_old The previous touchstone identifier. Defaults to +#' [DEF_TOUCHSTONE_OLD_OLD]. +#' +#' @param touchstone_new The new touchstone identifier. Defaults to +#' [DEF_TOUCHSTONE_NEW]. +#' +#' @return A filtered data.frame of differences in impact estimates flagged +#' as too large. Rows with differences within tolerance are removed. #' #' @keywords pressure_testing #' @@ -153,16 +365,46 @@ flag_large_diffs <- function( changes_list, iqr_df, variable = c("deaths_averted", "dalys_averted"), - group_cols, - threshold + group_cols = c("country", "vaccine", "activity_type"), + threshold = 100, + touchstone_old = DEF_TOUCHSTONE_OLD_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW ) { - # TODO: input checking + checkmate::assert_list(changes_list, "data.frame") + checkmate::assert_data_frame(iqr_df, min.rows = 1L, min.cols = 1L) + variable <- rlang::arg_match(variable) + checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE) + + # TODO: check what a sensible upper limit might be + checkmate::assert_number(threshold, lower = 1.0, finite = TRUE) + + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) + + # cross checking + has_var <- variable %in% names(changes_list) + if (!has_var) { + cli::cli_abort( + "Expected list {.code changes_list} to have an element with the name \ + {.str {variable}}, but it does not." + ) + } + df_compare <- changes_list[[variable]] + + checkmate::assert_names( + colnames(df_compare), + must.include = group_cols + ) + checkmate::assert_names( + colnames(iqr_df), + must.include = group_cols + ) iqr_col <- glue::glue("national_iqr_{variable}") - df <- dplyr::mutate( - changes_list[[variable]], + df_compare <- dplyr::mutate( + df_compare, diff = .data$COMPARE - .data$BASE ) @@ -172,18 +414,18 @@ flag_large_diffs <- function( dplyr::all_of(iqr_col) ) - df <- left_join( - df, + df_compare <- dplyr::left_join( + df_compare, iqr_df, by = group_cols ) - df <- dplyr::mutate( - df, + df_compare <- dplyr::mutate( + df_compare, flag = abs(.data$diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0 ) - df <- dplyr::filter(df, .data$flag) + df_compare <- dplyr::filter(df_compare, .data$flag) cols_to_select <- c( "country", @@ -197,29 +439,59 @@ flag_large_diffs <- function( "diff" ) - df <- dplyr::select( - df, + df_compare <- dplyr::select( + df_compare, {{ cols_to_select }} ) - # TODO: replace `old` and `new` with defined objs --- see scratch.R - # unsure why this syntax was used - df <- dplyr::rename( - df, - !!as.character(old) := BASE, - !!as.character(new) := COMPARE + rename_lookup <- c("BASE", "COMPARE") + names(rename_lookup) <- c( + as.character(touchstone_old), + as.character(touchstone_new) + ) + df_compare <- dplyr::rename( + df_compare, + rename_lookup ) - dplyr::arrange(df, dplyr::desc(diff)) + dplyr::arrange(df_compare, dplyr::desc(diff)) } -#' Generate combined df +#' Combine and align data from two touchstones +#' +#' @description +#' Generates a full join of two data.frames, selecting for columns of interest. +#' +#' @param prev_dat A data.frame of impact estimates corresponding to an earlier +#' touchstone. +#' +#' @param df2 A data.frame of impact estimates corresponding to a more recent +#' touchstone. +#' +#' @param interest_cols A character vector of columns of interest. Defaults to +#' [COLNAMES_INTEREST_PRESSURE_TEST]. +#' +#' @param key_cols A character vector of columns of interest. Defaults to +#' [COLNAMES_KEY_PRESSURE_TEST]. +#' +#' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns +#' are disambiguated with the suffixes `"_old"` and `"_new"`. #' #' @keywords pressure_testing #' #' @export -gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { - # TODO: input checks +gen_combined_df <- function( + prev_dat, + df2, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST +) { + checkmate::assert_data_frame( + prev_dat, + min.cols = 1L, + min.rows = 1L + ) + # TODO: df2 needs a better name prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) cur_df <- dplyr::select(df2, {{ interest_cols }}) @@ -231,131 +503,222 @@ gen_combined_df <- function(prev_dat, df2, interest_cols, key_cols) { suffix = c("_old", "_new") ) + cols_to_select <- c( + "country", + "country_name", + "disease", + "vaccine", + "activity_type", + "year", + "modelling_group", + "deaths_averted_old", + "deaths_averted_new", + "dalys_averted_old", + "dalys_averted_new" + ) + dplyr::select( combined, - country, - country_name, - disease, - vaccine, - activity_type, - year, - modelling_group, - deaths_averted_old, - deaths_averted_new, - dalys_averted_old, - dalys_averted_new + {{ cols_to_select }} ) } -### Subregional v national estimate comparison -compare_national_to_subregional <- function( +#' Compare sub-regional and national estimates +#' +#' @param df A data.frame with sub-region level data on vaccination impact +#' outcomes. +#' +#' @param outcome A string for the outcome of interest. May be one of +#' `"deaths_averted_rate"` or `"dalys_averted_rate"`. +#' +#' @param activity_filter A string for the type of vaccination activity. May be +#' one of `"campaign"` or `"routine"`. +#' +#' @return A data.frame of sub-regional vaccination impact estimates where the +#' impact is considered to be outside the tolerance limit. +#' +#' @keywords pressure_testing +#' +#' @export +compare_natl_subreg <- function( df, - outcome, - activity_filter, - threshold + outcome = c("deaths_averted_rate", "dalys_averted_rate"), + activity_filter = c("campaign", "routine") ) { - df <- dplyr::filter(df, activity_type == activity_filter) - df <- dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome) - - results <- purrr::map_dfr(outcome, function(otc) { - subregional_summary <- - dplyr::group_by(df, subregion, disease, activity_type) - - subregional_summary <- dplyr::summarise( - subregional_summary, - subregional_mean = mean(.data[[otc]], na.rm = TRUE), - subregional_iqr = IQR(.data[[otc]], na.rm = TRUE), - .groups = "drop" - ) + df <- dplyr::filter(df, .data$activity_type == activity_filter) + df <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + "subregion", + !!outcome + ) - national_summary <- - dplyr::select(df, dplyr::all_of(key_cols), subregion, !!outcome) - national_summary <- dplyr::rename( - national_summary, - national_value = !!outcome - ) + # first get national summary + national_summary <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + .data$subregion, + !!outcome + ) + national_summary <- dplyr::rename( + national_summary, + national_value = !!outcome + ) - comparison <- dplyr::left_join( - national_summary, - subregional_summary, - by = c("subregion", "disease") - ) - comparison <- dplyr::mutate( - comparison, - outcome = outcome, - difference = .data$national_value - .data$subregional_mean, - iqr_score = abs(.data$difference) / .data$subregional_iqr - ) + # next get sub-regional summary + subregional_summary <- + dplyr::group_by(df, .data$subregion, .data$disease, .data$activity_type) - dynamic_threshold <- stats::quantile( - comparison$iqr_score, - 0.99, - na.rm = TRUE - ) + subregional_summary <- dplyr::summarise( + subregional_summary, + subregional_mean = mean(.data[[outcome]], na.rm = TRUE), + subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) - comparison <- dplyr::mutate( - comparison, - flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0 - ) - comparison <- dplyr::filter(comparison, .data$flag_iqr) - comparison <- dplyr::select( - comparison, - country_name, - vaccine, - year, - modelling_group, - national_value, - subregional_mean, - subregional_iqr, - difference, - iqr_score - ) - comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score)) + comparison <- dplyr::left_join( + national_summary, + subregional_summary, + by = c("subregion", "disease") + ) + comparison <- dplyr::mutate( + comparison, + outcome = outcome, + difference = .data$national_value - .data$subregional_mean, + iqr_score = abs(.data$difference) / .data$subregional_iqr + ) - comparison - }) + dynamic_threshold <- stats::quantile( + comparison$iqr_score, + 0.99, + na.rm = TRUE + ) - results + comparison <- dplyr::mutate( + comparison, + flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0 + ) + comparison <- dplyr::filter(comparison, .data$flag_iqr) + + cols_to_select <- c( + "country_name", + "vaccine", + "year", + "modelling_group", + "national_value", + "subregional_mean", + "subregional_iqr", + "difference", + "iqr_score" + ) + comparison <- dplyr::select(comparison, {{ cols_to_select }}) + comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score)) + + comparison } -# TODO: reconsider function name, add explicit arguments -#' Modelling group variations +#' Save pressure-testing diagnostics to local file +#' +#' @description +#' Save pressure-testing diagnostics data.frames to local compressed files in +#' the `.Rds` format. Input data.frames are generated by other package functions +#' and are not checked here. +#' +#' @param missing_in_current A data.frame. +#' +#' @param missing_deaths A data.frame that is the output of +#' [filter_invalid_trajectories()] with the outcome `"deaths_averted"`. +#' +#' @param missing_dalys A data.frame that is the output of +#' [filter_invalid_trajectories()] with the outcome `"dalys_averted"`. +#' +#' @param changes_deaths A data.frame that is the output of [flag_large_diffs()] +#' with the outcome `"deaths_averted"`. +#' +#' @param changes_dalys A data.frame that is the output of [flag_large_diffs()] +#' with the outcome `"dalys_averted"`. +#' +#' @param subregional_flags_deaths_camp A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @param subregional_flags_deaths_rout A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the +#' `"routine"` activity type. +#' +#' @param subregional_flags_dalys_camp A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @param subregional_flags_dalys_rout A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @return None. Called for the convenience side-effect of saving data.frames as +#' `.Rds` format. #' #' @keywords pressure_testing #' #' @export -save_outputs <- function() { - saveRDS( - round_numeric( - missing_in_current %>% - dplyr::select(dplyr::all_of(c( - "country_name", - "vaccine", - "activity_type", - "year", - "modelling_group" - ))) - ), - "outputs/missing_in_current.rds" - ) - saveRDS(round_numeric(missing_deaths), "outputs/missing_deaths.rds") - saveRDS(round_numeric(missing_dalys), "outputs/missing_dalys.rds") - saveRDS(round_numeric(changes_deaths), "outputs/changes_deaths.rds") - saveRDS(round_numeric(changes_dalys), "outputs/changes_dalys.rds") - saveRDS( - round_numeric(subregional_flags_deaths_camp), - "outputs/subregional_flags_deaths_camp.rds" - ) - saveRDS( - round_numeric(subregional_flags_deaths_rout), - "outputs/subregional_flags_deaths_rout.rds" - ) - saveRDS( - round_numeric(subregional_flags_dalys_camp), - "outputs/subregional_flags_dalys_camp.rds" - ) - saveRDS( - round_numeric(subregional_flags_dalys_rout), - "outputs/subregional_flags_dalys_rout.rds" +save_outputs <- function( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout, + output_dir = here::here("outputs") +) { + # NOTE: not checking most input args as these are generated from other pkg fns + + output_dir_exists <- dir.exists(output_dir) + if (!output_dir_exists) { + cli::cli_abort( + "Expected output directory {.arg {output_dir}} but it does not exist!" + ) + } + + # NOTE: consider writing to agnostic format e.g. CSV + missing_in_current <- dplyr::select( + missing_in_current, + {{ colnames_df_missing_cols }} + ) + + filenames <- c( + "missing_in_current", + "missing_deaths", + "missing_dalys", + "changes_deaths", + "changes_dalys", + "subregional_flags_deaths_camp", + "subregional_flags_deaths_rout", + "subregional_flags_dalys_camp", + "subregional_flags_dalys_rout" + ) + + df_list <- list( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout + ) + + Map( + df_list, + filenames, + f = function(df, name) { + saveRDS( + round_numeric(df), + file.path(output_dir, glue::glue("{name}.Rds")) + ) + } ) } From 46d3d974131d34d115a658d1cfc87e49f9cbdd63 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Mon, 20 Apr 2026 16:55:25 +0100 Subject: [PATCH 07/17] Add pkg constants --- R/constants.R | 58 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/R/constants.R b/R/constants.R index 20ae152..adb8175 100644 --- a/R/constants.R +++ b/R/constants.R @@ -47,14 +47,66 @@ colnames_plot_demog_compare <- c( "value_millions" ) +#' @name constants +colnames_df_missing_cols <- c( + "country_name", + "vaccine", + "activity_type", + "year", + "modelling_group" +) + +#' @name constants +COLNAMES_KEY_PRESSURE_TEST <- c( + "country", + "country_name", + "vaccine", + "activity_type", + "year", + "disease", + "modelling_group" +) + +#' @name constants +COLNAMES_INTEREST_PRESSURE_TEST <- union( + COLNAMES_KEY_PRESSURE_TEST, + c( + "fvps", + "target_population", + "coverage", + "deaths_averted", + "dalys_averted", + "deaths_averted_rate", + "dalys_averted_rate" + ) +) + #' @name constants EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") #' @name constants -TOUCHSTONE_OLD <- "201910" +N_TS_MIN_CHARS <- 6L + +#' @name constants +N_TS_YEAR_CHARS <- 4L + +#' @name constants +MIN_TS_YEAR <- 2000 + +#' @name constants +MAX_TS_YEAR <- 2100 + +#' @name constants +MIN_TS_MONTH <- 1 + +#' @name constants +MAX_TS_MONTH <- 12 + +#' @name constants +DEF_TOUCHSTONE_OLD <- "201910" #' @name constants -TOUCHSTONE_NEW <- "202310" +DEF_TOUCHSTONE_NEW <- "202310" #' @name constants -TOUCHSTONE_OLD_OLD <- "202110" +DEF_TOUCHSTONE_OLD_OLD <- "202110" From 63e5871eb95be0d2e787e0a92f89dfc74f75977f Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Mon, 20 Apr 2026 16:55:48 +0100 Subject: [PATCH 08/17] Add pkg helpers --- R/fn_helpers.R | 135 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 125 insertions(+), 10 deletions(-) diff --git a/R/fn_helpers.R b/R/fn_helpers.R index d065a9e..5159361 100644 --- a/R/fn_helpers.R +++ b/R/fn_helpers.R @@ -36,12 +36,34 @@ make_novax_scenario <- function(disease) { ) } +#' Adaptively round numerics +#' +#' @param x A numeric vector. +#' +#' @param large_threshold A single number for the threshold over which numbers +#' are to be considered 'large'. +#' +#' @param small_sigfig A single number for the number of significant digits for +#' 'small' numbers. +#' +#' @param large_digits A single number for the number of places to which 'large' +#' numbers should be rounded. +#' +#' @return `x` rounded to either `large_digits` or to `small_sigfig`. +#' +#' @keywords internal adaptive_round <- function( x, large_threshold = 1, small_sigfig = 2, large_digits = 1 ) { + # basic checks for numeric + checkmate::assert_numeric(x, finite = TRUE, any.missing = FALSE) + checkmate::assert_number(large_threshold) + checkmate::assert_count(small_sigfig, positive = TRUE) + checkmate::assert_count(large_digits, positive = TRUE) + ifelse( abs(x) >= large_threshold, round(x, large_digits), @@ -49,21 +71,114 @@ adaptive_round <- function( ) } +#' Round numeric columns of a data.frame +#' +#' @param df A data.frame. +#' +#' @keywords internal round_numeric <- function(df) { - df %>% - mutate(across( - where(is.numeric) & !matches("year", ignore.case = TRUE), - ~ adaptive_round(.x) - )) + checkmate::assert_data_frame( + df, + min.rows = 1L, + min.cols = 1L + ) + + dplyr::mutate( + df, + dplyr::across( + .cols = dplyr::where(is.numeric) & !matches("year", ignore.case = TRUE), + .fns = adaptive_round + ) + ) } -str_as_ts_year <- function(x) { - as.numeric(substr(x, 1, 6)) +#' Check and return touchstone year-month +#' +#' @param x A string for the touchstone identifier. +#' +#' @return The first 6 characters of `x` converted to a numeric. Also has side +#' effects of erroring if conditions on `x` are not met. +#' +#' @keywords internal +validate_ts_year <- function(x) { + has_n_chars <- checkmate::test_string( + x, + min.chars = N_TS_MIN_CHARS + ) + if (!has_n_chars) { + n_chars <- nchars(x) + cli::cli_abort( + "Touchstone year string should have at least {N_TS_MIN_CHARS}, but got \ + {n_chars} characters." + ) + } + + inferred_year <- as.numeric(substr(x, 1, N_TS_YEAR_CHARS)) + is_good_year <- checkmate::test_number( + inferred_year, + lower = MIN_TS_YEAR, + upper = MAX_TS_YEAR, + finite = TRUE + ) + + if (!is_good_year) { + cli::cli_abort( + "Touchstone year string has an inferred year of \ + {.strong {inferred_year}} but expected an year in the range \ + [{MIN_TS_YEAR}, {MAX_TS_YEAR}]." + ) + } + + inferred_month <- as.numeric( + substr(x, N_TS_YEAR_CHARS + 1, N_TS_YEAR_CHARS + 2) + ) + is_good_month <- checkmate::test_number( + inferred_month, + lower = MIN_TS_MONTH, + upper = MAX_TS_MONTH, + finite = TRUE + ) + + if (!is_good_month) { + cli::cli_abort( + "Touchstone month string has an inferred month of \ + {.strong {inferred_month}} but expected an month in the range \ + [{MIN_TS_MONTH}, {MAX_TS_MONTH}]." + ) + } + + # return year-month as numeric + substr(x, 1, N_TS_MIN_CHARS) } +#' Add campaign id to dataframe +#' +#' @param df A data.frame. +#' +#' @param key_cols A character vector of columns in `df` by which the data are +#' to be grouped. +#' +#' @return `df` with a campaign identifier as a numeric. +#' +#' @keywords internal add_campaign_id <- function(df, key_cols) { - df <- group_by(df, across(all_of(key_cols))) - df <- mutate(df, campaign_id = row_number()) + checkmate::assert_data_frame(df) + checkmate::assert_character(key_cols, any.missing = FALSE) + + has_cols <- checkmate::test_names( + names(df), + must.include = key_cols + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(df), key_cols) + cli::cli_abort( + "Expected {.code df} to have columns {.str {key_cols}} but columns \ + {.str {missing_cols}} are missing." + ) + } + + df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(key_cols))) + df <- dplyr::mutate(df, campaign_id = dplyr::row_number()) - ungroup(df) + dplyr::ungroup(df) } From ffff8400c9308d53d06ff610aa46a64ab6733266 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Mon, 20 Apr 2026 17:05:50 +0100 Subject: [PATCH 09/17] WIP separate plot prep from plot code --- R/fn_plotting_prep_pres_test.R | 154 ++++++++++++++++++++++++++++++ R/fn_plotting_pressure_testing.R | 159 +++---------------------------- 2 files changed, 165 insertions(+), 148 deletions(-) create mode 100644 R/fn_plotting_prep_pres_test.R diff --git a/R/fn_plotting_prep_pres_test.R b/R/fn_plotting_prep_pres_test.R new file mode 100644 index 0000000..d1959dd --- /dev/null +++ b/R/fn_plotting_prep_pres_test.R @@ -0,0 +1,154 @@ +prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") { + offset <- 1e-6 + + df2 %>% + left_join(df3, by = join_by(modelling_group, vaccine)) %>% + mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% + group_by(vaccine) %>% + mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) +} + +prep_plot_vax_gavi <- function( + df, + prev_dat = NULL, + outcome = "deaths_averted" +) { + df_cur <- df %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(new)) + + df_prev <- prev_dat %>% + select(all_of(key_cols), !!outcome) %>% + filter(year >= 2021, year <= 2024, disease != "COVID") %>% + group_by(disease, year) %>% + summarise( + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) %>% + mutate(dataset = as.character(old)) + + df_combined <- bind_rows(df_cur, df_prev) + + df_diff <- df_cur %>% + left_join( + df_prev, + by = c("disease", "year"), + suffix = c("_curr", "_prev") + ) %>% + mutate( + yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, + dataset = "Difference" + ) %>% + select(disease, year, yearly_outcome, dataset) + + df_combined <- bind_rows(df_combined, df_diff) + + df_combined$dataset <- factor( + df_combined$dataset, + levels = c(as.character(old), "Difference", as.character(new)) + ) + + df_combined +} + +prep_plot_cumul <- function(df, outcome, disease_filter) { + outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] + + outcome_sym <- sym(outcome) + cum_col <- paste0("cum_", outcome) + avg_col <- paste0("avg_", outcome) + + col_old <- paste0(outcome, "_old") + col_new <- paste0(outcome, "_new") + + combined2 <- df %>% + select( + country, + country_name, + disease, + vaccine, + activity_type, + year, + modelling_group, + all_of(outcome_cols) + ) %>% + pivot_longer( + cols = all_of(outcome_cols), + names_to = "touchstone", + values_to = "value" + ) %>% + mutate( + touchstone = str_remove(touchstone, paste0("^", outcome, "_")), + touchstone = recode( + touchstone, + "old" = as.character(old), + "new" = as.character(new), + .default = touchstone + ), + touchstone = factor( + touchstone, + levels = c(as.character(old), as.character(new)) + ) + ) + # Cumulative values by modelling group + df_cum <- combined2 %>% + filter(disease == disease_filter) %>% + group_by(modelling_group, touchstone) %>% + complete(year = full_seq(year, 1)) %>% + arrange(year) %>% + mutate( + first_valid = min(year[!is.na(value)]), + !!cum_col := ifelse( + year < first_valid, + NA, + cumsum(replace_na(value, 0)) + ) + ) %>% + select(-first_valid) %>% + ungroup() %>% + mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) + + # Model average + df_avg <- df_cum %>% + group_by(year, touchstone) %>% + summarise( + !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), + n_models = sum(!is.na(!!sym(cum_col))), + .groups = "drop" + ) %>% + filter(n_models >= 1) %>% + mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) + + # Combine for plot + df_plot <- bind_rows( + df_cum %>% + select(year, modelling_group, touchstone, value = !!sym(cum_col)), + df_avg %>% + select(year, modelling_group, touchstone, value = !!sym(avg_col)) + ) + + df_plot <- df_plot %>% + group_by(modelling_group) %>% + filter(sum(value, na.rm = TRUE) > 0) %>% + ungroup() %>% + mutate( + line_type = ifelse( + grepl("Model Average", modelling_group), + "dashed", + "solid" + ) + ) + + if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { + message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") + return(NULL) + } + + df_plot +} diff --git a/R/fn_plotting_pressure_testing.R b/R/fn_plotting_pressure_testing.R index 8aaad49..1bb3da4 100644 --- a/R/fn_plotting_pressure_testing.R +++ b/R/fn_plotting_pressure_testing.R @@ -4,8 +4,10 @@ #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars #' labeller label_wrap_gen #' +#' @keywords internal +#' #' @export -significant_diff_plot <- function(df, outcome) { +plot_sig_diff <- function(df, outcome) { # retained here as this is a small df and a small operation df$label <- glue::glue( "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}" @@ -75,15 +77,8 @@ plot_diff <- function( return(p) } -plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { - offset <- 1e-6 - - df2 %>% - left_join(df3, by = join_by(modelling_group, vaccine)) %>% - mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% - group_by(vaccine) %>% - mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) %>% - ggplot() + +plot_modelling_group_variation <- function(df, outcome) { + ggplot(df) + aes( fill = as.character(mod_num), x = adj_outc, @@ -109,7 +104,7 @@ plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { labs( x = paste0( "Burden averted (", - ifelse(outc == "dalys", "DALYs", outc), + ifelse(outcome == "dalys", "DALYs", outcome), ")" ), y = "Vaccine" @@ -117,51 +112,11 @@ plot_modelling_group_variation <- function(df2, df3, outc = "deaths") { } -# Gavi plot - future deaths and DALYS averted, 2021-2024 (current time window Gavi looking at, can be amended) -plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") { - df_cur <- df %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) %>% - mutate(dataset = as.character(new)) - - df_prev <- prev_dat %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) %>% - mutate(dataset = as.character(old)) - - df_combined <- bind_rows(df_cur, df_prev) - - df_diff <- df_cur %>% - left_join( - df_prev, - by = c("disease", "year"), - suffix = c("_curr", "_prev") - ) %>% - mutate( - yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, - dataset = "Difference" - ) %>% - select(disease, year, yearly_outcome, dataset) - - df_combined <- bind_rows(df_combined, df_diff) - - df_combined$dataset <- factor( - df_combined$dataset, - levels = c(as.character(old), "Difference", as.character(new)) - ) - +#' Gavi plot - future deaths and DALYS averted, 2021-2024 +#' (current time window Gavi looking at, can be amended) +plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { ggplot( - df_combined, + df, aes( x = reorder(disease, yearly_outcome), y = yearly_outcome, @@ -186,100 +141,8 @@ plot_vaccine_gavi <- function(df, prev_dat = NULL, outcome = "deaths_averted") { ### Gavi Cumulative Plot (modelling group + average) plot_cumul <- function(df, outcome, disease_filter) { - outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] - - outcome_sym <- sym(outcome) - cum_col <- paste0("cum_", outcome) - avg_col <- paste0("avg_", outcome) - - col_old <- paste0(outcome, "_old") - col_new <- paste0(outcome, "_new") - - combined2 <- df %>% - select( - country, - country_name, - disease, - vaccine, - activity_type, - year, - modelling_group, - all_of(outcome_cols) - ) %>% - pivot_longer( - cols = all_of(outcome_cols), - names_to = "touchstone", - values_to = "value" - ) %>% - mutate( - touchstone = str_remove(touchstone, paste0("^", outcome, "_")), - touchstone = recode( - touchstone, - "old" = as.character(old), - "new" = as.character(new), - .default = touchstone - ), - touchstone = factor( - touchstone, - levels = c(as.character(old), as.character(new)) - ) - ) - # Cumulative values by modelling group - df_cum <- combined2 %>% - filter(disease == disease_filter) %>% - group_by(modelling_group, touchstone) %>% - complete(year = full_seq(year, 1)) %>% - arrange(year) %>% - mutate( - first_valid = min(year[!is.na(value)]), - !!cum_col := ifelse( - year < first_valid, - NA, - cumsum(replace_na(value, 0)) - ) - ) %>% - select(-first_valid) %>% - ungroup() %>% - mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) - - # Model average - df_avg <- df_cum %>% - group_by(year, touchstone) %>% - summarise( - !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), - n_models = sum(!is.na(!!sym(cum_col))), - .groups = "drop" - ) %>% - filter(n_models >= 1) %>% - mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) - - # Combine for plot - df_plot <- bind_rows( - df_cum %>% - select(year, modelling_group, touchstone, value = !!sym(cum_col)), - df_avg %>% - select(year, modelling_group, touchstone, value = !!sym(avg_col)) - ) - - df_plot <- df_plot %>% - group_by(modelling_group) %>% - filter(sum(value, na.rm = TRUE) > 0) %>% - ungroup() %>% - mutate( - line_type = ifelse( - grepl("Model Average", modelling_group), - "dashed", - "solid" - ) - ) - - if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { - message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") - return(NULL) - } - p <- ggplot( - df_plot, + df, aes( x = year, y = value, From d0237fb1411aaf5244beffb0b6ee500e5814fe01 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Tue, 21 Apr 2026 09:08:02 +0100 Subject: [PATCH 10/17] WIP add docs and pkg infra --- .Rbuildignore | 1 + DESCRIPTION | 2 +- NAMESPACE | 11 +++++ _pkgdown.yml | 8 +++- jarl.toml | 2 + man/adaptive_round.Rd | 27 +++++++++++ man/add_campaign_id.Rd | 21 +++++++++ man/basic_burden_sanity.Rd | 2 +- man/check_demography_alignment.Rd | 2 +- man/compare_natl_subreg.Rd | 30 +++++++++++++ man/constants.Rd | 59 +++++++++++++++++++++++- man/flag_large_diffs.Rd | 50 +++++++++++++++++++++ man/gen_combined_df.Rd | 34 ++++++++++++++ man/gen_national_iqr.Rd | 34 ++++++++++++++ man/generate_diffs.Rd | 39 ++++++++++++++++ man/helpers.Rd | 2 +- man/plotting.Rd | 2 +- man/plotting_prep.Rd | 2 +- man/plotting_theme.Rd | 2 +- man/pres_test_filter_data.Rd | 56 +++++++++++++++++++++++ man/round_numeric.Rd | 15 +++++++ man/save_outputs.Rd | 60 +++++++++++++++++++++++++ man/significant_diff_plot.Rd | 12 +++++ man/validate_complete_incoming_files.Rd | 2 +- man/validate_file_dict_template.Rd | 2 +- man/validate_template_alignment.Rd | 2 +- man/validate_ts_year.Rd | 19 ++++++++ tests/spelling.R | 3 +- 28 files changed, 486 insertions(+), 15 deletions(-) create mode 100644 jarl.toml create mode 100644 man/adaptive_round.Rd create mode 100644 man/add_campaign_id.Rd create mode 100644 man/compare_natl_subreg.Rd create mode 100644 man/flag_large_diffs.Rd create mode 100644 man/gen_combined_df.Rd create mode 100644 man/gen_national_iqr.Rd create mode 100644 man/generate_diffs.Rd create mode 100644 man/pres_test_filter_data.Rd create mode 100644 man/round_numeric.Rd create mode 100644 man/save_outputs.Rd create mode 100644 man/significant_diff_plot.Rd create mode 100644 man/validate_ts_year.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 73513b2..fa4bfd8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,4 @@ ^scratch\.R$ ^scratch$ ^data-raw$ +^jarl\.toml$ diff --git a/DESCRIPTION b/DESCRIPTION index 1dee4e4..93969a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,4 +48,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index 9bcbd19..6a9f7c8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,16 @@ export(basic_burden_sanity) export(check_demography_alignment) +export(compare_natl_subreg) export(file_dict_colnames) +export(filter_duplicates) +export(filter_excluded_diseases_ts) +export(filter_invalid_trajectories) +export(filter_recent_ts) +export(flag_large_diffs) +export(gen_combined_df) +export(gen_national_iqr) +export(generate_diffs) export(plot_age_patterns) export(plot_compare_demography) export(plot_coverage_set) @@ -15,6 +24,8 @@ export(prep_plot_coverage_set) export(prep_plot_demography) export(prep_plot_fvp) export(prep_plot_global_burden) +export(save_outputs) +export(significant_diff_plot) export(theme_vimc) export(theme_vimc_noxaxis) export(validate_complete_incoming_files) diff --git a/_pkgdown.yml b/_pkgdown.yml index bbc0826..ae53f13 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,10 +7,14 @@ reference: - title: Package-level documentation contents: - has_keyword("package_doc") - - title: Diagnostic functions - desc: Package diagnostic functions. + - title: Burden estimate diagnostics + desc: Functions to pressure-test burden estimates. contents: - has_keyword("diagnostics") + - title: Impact estimate diagnostics + desc: Functions to pressure-test impact estimates. + contents: + - has_keyword("pressure_testing") - title: Plotting prepartion desc: Prepare validated data for plotting. contents: diff --git a/jarl.toml b/jarl.toml new file mode 100644 index 0000000..a2e2af7 --- /dev/null +++ b/jarl.toml @@ -0,0 +1,2 @@ +[lint.assignment] +operator = "<-" diff --git a/man/adaptive_round.Rd b/man/adaptive_round.Rd new file mode 100644 index 0000000..ab1ae9c --- /dev/null +++ b/man/adaptive_round.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{adaptive_round} +\alias{adaptive_round} +\title{Adaptively round numerics} +\usage{ +adaptive_round(x, large_threshold = 1, small_sigfig = 2, large_digits = 1) +} +\arguments{ +\item{x}{A numeric vector.} + +\item{large_threshold}{A single number for the threshold over which numbers +are to be considered 'large'.} + +\item{small_sigfig}{A single number for the number of significant digits for +'small' numbers.} + +\item{large_digits}{A single number for the number of places to which 'large' +numbers should be rounded.} +} +\value{ +\code{x} rounded to either \code{large_digits} or to \code{small_sigfig}. +} +\description{ +Adaptively round numerics +} +\keyword{internal} diff --git a/man/add_campaign_id.Rd b/man/add_campaign_id.Rd new file mode 100644 index 0000000..7e007ed --- /dev/null +++ b/man/add_campaign_id.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{add_campaign_id} +\alias{add_campaign_id} +\title{Add campaign id to dataframe} +\usage{ +add_campaign_id(df, key_cols) +} +\arguments{ +\item{df}{A data.frame.} + +\item{key_cols}{A character vector of columns in \code{df} by which the data are +to be grouped.} +} +\value{ +\code{df} with a campaign identifier as a numeric. +} +\description{ +Add campaign id to dataframe +} +\keyword{internal} diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd index 7217a7d..004cffd 100644 --- a/man/basic_burden_sanity.Rd +++ b/man/basic_burden_sanity.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{basic_burden_sanity} \alias{basic_burden_sanity} \title{Sanity checks on burden estimates} diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd index 1b5ca1f..5afb1f0 100644 --- a/man/check_demography_alignment.Rd +++ b/man/check_demography_alignment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{check_demography_alignment} \alias{check_demography_alignment} \title{Check incoming burden cohort size against interpolated population} diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd new file mode 100644 index 0000000..ecf5cea --- /dev/null +++ b/man/compare_natl_subreg.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{compare_natl_subreg} +\alias{compare_natl_subreg} +\title{Compare sub-regional and national estimates} +\usage{ +compare_natl_subreg( + df, + outcome = c("deaths_averted_rate", "dalys_averted_rate"), + activity_filter = c("campaign", "routine") +) +} +\arguments{ +\item{df}{A data.frame with sub-region level data on vaccination impact +outcomes.} + +\item{outcome}{A string for the outcome of interest. May be one of +\code{"deaths_averted_rate"} or \code{"dalys_averted_rate"}.} + +\item{activity_filter}{A string for the type of vaccination activity. May be +one of \code{"campaign"} or \code{"routine"}.} +} +\value{ +A data.frame of sub-regional vaccination impact estimates where the +impact is considered to be outside the tolerance limit. +} +\description{ +Compare sub-regional and national estimates +} +\keyword{pressure_testing} diff --git a/man/constants.Rd b/man/constants.Rd index 7608951..8faa19a 100644 --- a/man/constants.Rd +++ b/man/constants.Rd @@ -7,8 +7,19 @@ \alias{scenario_data_colnames} \alias{burden_outcome_names} \alias{colnames_plot_demog_compare} +\alias{colnames_df_missing_cols} +\alias{COLNAMES_KEY_PRESSURE_TEST} +\alias{COLNAMES_INTEREST_PRESSURE_TEST} \alias{EXCLUDED_DISEASES} -\alias{TOUCHSTONE_OLD} +\alias{N_TS_MIN_CHARS} +\alias{N_TS_YEAR_CHARS} +\alias{MIN_TS_YEAR} +\alias{MAX_TS_YEAR} +\alias{MIN_TS_MONTH} +\alias{MAX_TS_MONTH} +\alias{DEF_TOUCHSTONE_OLD} +\alias{DEF_TOUCHSTONE_NEW} +\alias{DEF_TOUCHSTONE_OLD_OLD} \title{Package constants} \format{ An object of class \code{character} of length 5. @@ -19,8 +30,30 @@ An object of class \code{character} of length 10. An object of class \code{character} of length 7. +An object of class \code{character} of length 5. + +An object of class \code{character} of length 7. + +An object of class \code{character} of length 14. + An object of class \code{character} of length 4. +An object of class \code{integer} of length 1. + +An object of class \code{integer} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{character} of length 1. + +An object of class \code{character} of length 1. + An object of class \code{character} of length 1. } \usage{ @@ -32,9 +65,31 @@ burden_outcome_names colnames_plot_demog_compare +colnames_df_missing_cols + +COLNAMES_KEY_PRESSURE_TEST + +COLNAMES_INTEREST_PRESSURE_TEST + EXCLUDED_DISEASES -TOUCHSTONE_OLD +N_TS_MIN_CHARS + +N_TS_YEAR_CHARS + +MIN_TS_YEAR + +MAX_TS_YEAR + +MIN_TS_MONTH + +MAX_TS_MONTH + +DEF_TOUCHSTONE_OLD + +DEF_TOUCHSTONE_NEW + +DEF_TOUCHSTONE_OLD_OLD } \description{ Package constants diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd new file mode 100644 index 0000000..e0379b1 --- /dev/null +++ b/man/flag_large_diffs.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{flag_large_diffs} +\alias{flag_large_diffs} +\title{Flag significant changes in impact estimates} +\usage{ +flag_large_diffs( + changes_list, + iqr_df, + variable = c("deaths_averted", "dalys_averted"), + group_cols = c("country", "vaccine", "activity_type"), + threshold = 100, + touchstone_old = DEF_TOUCHSTONE_OLD_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) +} +\arguments{ +\item{changes_list}{A list of data.frames with one element per variable of +interest (see \code{variable}). Usually generated using \code{\link[=generate_diffs]{generate_diffs()}}.} + +\item{iqr_df}{A data.frame of inter-quartile differences generated using +\code{\link[=gen_national_iqr]{gen_national_iqr()}}.} + +\item{variable}{A string specifying the variable of interest. Must be one of +"deaths_averted" and "dalys_averted", and must be present as a name and +element of \code{changes_list}.} + +\item{group_cols}{A character vector of grouping columns. Defaults to +"country", "vaccine", "activity_type".} + +\item{threshold}{A single numeric value for the IQR multiplier. Defaults to +100.} + +\item{touchstone_old}{The previous touchstone identifier. Defaults to +\link{DEF_TOUCHSTONE_OLD_OLD}.} + +\item{touchstone_new}{The new touchstone identifier. Defaults to +\link{DEF_TOUCHSTONE_NEW}.} +} +\value{ +A filtered data.frame of differences in impact estimates flagged +as too large. Rows with differences within tolerance are removed. +} +\description{ +Calculates and flags whether the difference in impact estimates +between touchstones is greater than expected. A row is flagged if the +difference is greater than \code{threshold} \eqn{\times} the inter-quartile range +for cases where the IQR is greater than zero. +} +\keyword{pressure_testing} diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd new file mode 100644 index 0000000..9a329d3 --- /dev/null +++ b/man/gen_combined_df.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{gen_combined_df} +\alias{gen_combined_df} +\title{Combine and align data from two touchstones} +\usage{ +gen_combined_df( + prev_dat, + df2, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST +) +} +\arguments{ +\item{prev_dat}{A data.frame of impact estimates corresponding to an earlier +touchstone.} + +\item{df2}{A data.frame of impact estimates corresponding to a more recent +touchstone.} + +\item{interest_cols}{A character vector of columns of interest. Defaults to +\link{COLNAMES_INTEREST_PRESSURE_TEST}.} + +\item{key_cols}{A character vector of columns of interest. Defaults to +\link{COLNAMES_KEY_PRESSURE_TEST}.} +} +\value{ +A data.frame which is a full join of \code{prev_dat} and \code{df2}. Columns +are disambiguated with the suffixes \code{"_old"} and \code{"_new"}. +} +\description{ +Generates a full join of two data.frames, selecting for columns of interest. +} +\keyword{pressure_testing} diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd new file mode 100644 index 0000000..c722e66 --- /dev/null +++ b/man/gen_national_iqr.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{gen_national_iqr} +\alias{gen_national_iqr} +\title{Generate IQR for key outcomes} +\usage{ +gen_national_iqr( + df, + group_cols = c("country", "vaccine", "activity_type"), + value_cols = c("deaths_averted", "dalys_averted"), + prefix = "national_iqr" +) +} +\arguments{ +\item{df}{A data.frame of impact estimates.} + +\item{group_cols}{A character vector of grouping columns. Defaults to +"country", "vaccine", "activity_type".} + +\item{value_cols}{A character vector of value columns. Defaults to +"deaths_averted" and "dalys_averted".} + +\item{prefix}{A string for the prefix applied to every IQR summary column. +Defaults to "national_iqr".} +} +\value{ +A \verb{} with the inter-quartile range of the columns +in \code{value_cols}, with the column name constructed as \verb{\{prefix\}_\{value_col\}} +using string interpolation. +} +\description{ +Generate IQR for key outcomes +} +\keyword{pressure_testing} diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd new file mode 100644 index 0000000..17fcf32 --- /dev/null +++ b/man/generate_diffs.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{generate_diffs} +\alias{generate_diffs} +\title{Explore significant changes in deaths and DALYs} +\usage{ +generate_diffs( + prev_df, + curr_df, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST, + touchstone = DEF_TOUCHSTONE_OLD +) +} +\arguments{ +\item{prev_df}{A \verb{} of impact estimates from the previous +touchstone.} + +\item{curr_df}{A \verb{} of impact estimates for the current +touchstone.} + +\item{interest_cols}{A character vector of columns to check for differences. +Defaults to \link{COLNAMES_INTEREST_PRESSURE_TEST}.} + +\item{key_cols}{A character vector of columns to use when assigning campaign +identifiers. Passed to \code{\link[=add_campaign_id]{add_campaign_id()}}, defaults to +\link{COLNAMES_KEY_PRESSURE_TEST}.} + +\item{touchstone}{A six character string that can be converted to a six digit +numeric giving a touchstone identifier in \code{YYYYMM} format.} +} +\value{ +A list of data.frames of differences between \code{prev_df} and \code{curr_df}, +with one list element per element of \code{interest_cols}. +} +\description{ +Explore significant changes in deaths and DALYs +} +\keyword{pressure_testing} diff --git a/man/helpers.Rd b/man/helpers.Rd index fb9c4df..af26b2d 100644 --- a/man/helpers.Rd +++ b/man/helpers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers.R +% Please edit documentation in R/fn_helpers.R \name{helpers} \alias{helpers} \alias{make_novax_scenario} diff --git a/man/plotting.Rd b/man/plotting.Rd index 8dd565e..7f915a3 100644 --- a/man/plotting.Rd +++ b/man/plotting.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R +% Please edit documentation in R/fn_plotting_burden_diagnostics.R \name{plotting} \alias{plotting} \alias{plot_compare_demography} diff --git a/man/plotting_prep.Rd b/man/plotting_prep.Rd index 881a9f3..f8124bc 100644 --- a/man/plotting_prep.Rd +++ b/man/plotting_prep.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting_prep.R +% Please edit documentation in R/fn_plotting_prep_bur_diag.R \name{plotting_prep} \alias{plotting_prep} \alias{prep_plot_demography} diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd index c2c49e0..fe46d78 100644 --- a/man/plotting_theme.Rd +++ b/man/plotting_theme.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R +% Please edit documentation in R/fn_plotting_burden_diagnostics.R \name{plotting_theme} \alias{plotting_theme} \alias{theme_vimc} diff --git a/man/pres_test_filter_data.Rd b/man/pres_test_filter_data.Rd new file mode 100644 index 0000000..003617a --- /dev/null +++ b/man/pres_test_filter_data.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{pres_test_filter_data} +\alias{pres_test_filter_data} +\alias{filter_recent_ts} +\alias{filter_excluded_diseases_ts} +\alias{filter_duplicates} +\alias{filter_invalid_trajectories} +\title{Filter data for touchstones or diseases} +\usage{ +filter_recent_ts(df, threshold = 202310) + +filter_excluded_diseases_ts(df, threshold = 202110) + +filter_duplicates(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) + +filter_invalid_trajectories( + df, + prev_data, + outcome = c("deaths_averted", "dalys_averted") +) +} +\arguments{ +\item{df}{A \verb{} holding impact data. This data.frame is not +checked for contents} + +\item{threshold}{A six-digit number that is checked as a valid touchstone +identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}.} + +\item{key_cols}{Key columns in \code{df} to check for duplicates.} + +\item{prev_data}{A \verb{} holding data from a previous touchstone for +the same scenarios as \code{df}.} + +\item{outcome}{A string giving the outcome of interest; may be one of +\code{"deaths_averted"} or \code{"dalys_averted"}.} +} +\value{ +A filtered \verb{}. +\itemize{ +\item \code{filter_recent_ts()} returns \code{df} with rows where the touchstone condition +is not met excluded. +\item \code{filter_excluded_diseases_ts()} returns \code{df} with rows where rows relating +to the \link{EXCLUDED_DISEASES}, when the touchstone year in \code{df} is less than the +\code{threshold}, excluded. +\item \code{filter_duplicates()} returns \code{df} with duplicated combinations of +\code{key_cols} removed. +\item \code{filter_invalid_trajectories()} returns \code{df} with bad outcome trajectories +(\code{NA} to non-\code{NA}) removed. +} +} +\description{ +A pair of helper functions allowing filtering out of recent touchstone values +and excluded diseases. +} +\keyword{pressure_testing} diff --git a/man/round_numeric.Rd b/man/round_numeric.Rd new file mode 100644 index 0000000..3c95234 --- /dev/null +++ b/man/round_numeric.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{round_numeric} +\alias{round_numeric} +\title{Round numeric columns of a data.frame} +\usage{ +round_numeric(df) +} +\arguments{ +\item{df}{A data.frame.} +} +\description{ +Round numeric columns of a data.frame +} +\keyword{internal} diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd new file mode 100644 index 0000000..db41a66 --- /dev/null +++ b/man/save_outputs.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_pressure_testing.R +\name{save_outputs} +\alias{save_outputs} +\title{Save pressure-testing diagnostics to local file} +\usage{ +save_outputs( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout, + output_dir = here::here("outputs") +) +} +\arguments{ +\item{missing_in_current}{A data.frame.} + +\item{missing_deaths}{A data.frame that is the output of +\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"deaths_averted"}.} + +\item{missing_dalys}{A data.frame that is the output of +\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"dalys_averted"}.} + +\item{changes_deaths}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}} +with the outcome \code{"deaths_averted"}.} + +\item{changes_dalys}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}} +with the outcome \code{"dalys_averted"}.} + +\item{subregional_flags_deaths_camp}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the +\code{"campaign"} activity type.} + +\item{subregional_flags_deaths_rout}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the +\code{"routine"} activity type.} + +\item{subregional_flags_dalys_camp}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the +\code{"campaign"} activity type.} + +\item{subregional_flags_dalys_rout}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the +\code{"campaign"} activity type.} +} +\value{ +None. Called for the convenience side-effect of saving data.frames as +\code{.Rds} format. +} +\description{ +Save pressure-testing diagnostics data.frames to local compressed files in +the \code{.Rds} format. Input data.frames are generated by other package functions +and are not checked here. +} +\keyword{pressure_testing} diff --git a/man/significant_diff_plot.Rd b/man/significant_diff_plot.Rd new file mode 100644 index 0000000..11b7cb4 --- /dev/null +++ b/man/significant_diff_plot.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_plotting_pressure_testing.R +\name{significant_diff_plot} +\alias{significant_diff_plot} +\title{Plot significant changes} +\usage{ +significant_diff_plot(df, outcome) +} +\description{ +Plot significant changes +} +\keyword{internal} diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd index 148311b..75d3cd9 100644 --- a/man/validate_complete_incoming_files.Rd +++ b/man/validate_complete_incoming_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_complete_incoming_files} \alias{validate_complete_incoming_files} \title{Validate files in a burden estimate} diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd index 293a605..af2d770 100644 --- a/man/validate_file_dict_template.Rd +++ b/man/validate_file_dict_template.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_file_dict_template} \alias{validate_file_dict_template} \title{Validate file dictionary template} diff --git a/man/validate_template_alignment.Rd b/man/validate_template_alignment.Rd index 7ff801a..7bbccc6 100644 --- a/man/validate_template_alignment.Rd +++ b/man/validate_template_alignment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_template_alignment} \alias{validate_template_alignment} \title{Check incoming burden set against template} diff --git a/man/validate_ts_year.Rd b/man/validate_ts_year.Rd new file mode 100644 index 0000000..6812fcf --- /dev/null +++ b/man/validate_ts_year.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{validate_ts_year} +\alias{validate_ts_year} +\title{Check and return touchstone year-month} +\usage{ +validate_ts_year(x) +} +\arguments{ +\item{x}{A string for the touchstone identifier.} +} +\value{ +The first 6 characters of \code{x} converted to a numeric. Also has side +effects of erroring if conditions on \code{x} are not met. +} +\description{ +Check and return touchstone year-month +} +\keyword{internal} diff --git a/tests/spelling.R b/tests/spelling.R index a8cf85b..d60e024 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,6 +1,7 @@ -if (requireNamespace('spelling', quietly = TRUE)) +if (requireNamespace("spelling", quietly = TRUE)) { spelling::spell_check_test( vignettes = TRUE, error = FALSE, skip_on_cran = TRUE ) +} From 4a52fbb619331bd717bc7604aba97634f237ee4b Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Tue, 21 Apr 2026 10:27:22 +0100 Subject: [PATCH 11/17] Reorganise docs --- R/fn_burden_diagnostics.R | 10 ++-- R/fn_plotting_burden_diagnostics.R | 81 +++--------------------------- R/fn_plotting_helpers.R | 64 +++++++++++++++++++++++ R/fn_plotting_prep_bur_diag.R | 31 ++++++++---- R/fn_pressure_testing.R | 14 +++--- _pkgdown.yml | 42 ++++++++++------ 6 files changed, 133 insertions(+), 109 deletions(-) create mode 100644 R/fn_plotting_helpers.R diff --git a/R/fn_burden_diagnostics.R b/R/fn_burden_diagnostics.R index 0986acf..f77f09c 100644 --- a/R/fn_burden_diagnostics.R +++ b/R/fn_burden_diagnostics.R @@ -17,7 +17,7 @@ #' Prints a message to screen informing the user whether any action has been #' taken. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_file_dict_template <- function( @@ -121,7 +121,7 @@ validate_file_dict_template <- function( #' @return A `` of the scenario file dictionary in `path_burden` if all #' checks pass. Otherwise, exits with informative errors on failed checks. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_complete_incoming_files <- function( @@ -212,7 +212,7 @@ validate_complete_incoming_files <- function( #' @return A named list of checks carried out on `burden_set` to compare it #' against `template`, with information on missing and extra data. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_template_alignment <- function(burden_set, template) { @@ -276,7 +276,7 @@ validate_template_alignment <- function(burden_set, template) { #' @return A `` giving the alignment, i.e., percentage difference of #' modelled population size from the WPP-derived population estimates. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export check_demography_alignment <- function( @@ -344,7 +344,7 @@ check_demography_alignment <- function( #' @return A character vector of messages generated by checks on burden #' estimates, with the length of the vector depending on how many checks fail. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export basic_burden_sanity <- function(burden) { diff --git a/R/fn_plotting_burden_diagnostics.R b/R/fn_plotting_burden_diagnostics.R index 9d137ac..71b7d54 100644 --- a/R/fn_plotting_burden_diagnostics.R +++ b/R/fn_plotting_burden_diagnostics.R @@ -1,72 +1,7 @@ -#' Plotting theme for vimcheck -#' -#' @description -#' A simple plotting theme building on [ggplot2::theme_bw()]. -#' -#' @name plotting_theme -#' @rdname plotting_theme -#' -#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees. -#' -#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees. -#' -#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to -#' [ggplot2::theme()]. These will be applied in addition to, or in place of, -#' pre-existing elements defined by this theme. See the examples for this -#' theme's appearance. -#' -#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. -#' -#' @keywords plotting -#' -#' @examples -#' # using an inbuilt dataset -#' data(mtcars) -#' -#' # standard theme -#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + -#' ggplot2::geom_point() + -#' theme_vimc() -#' -#' # with X-axis suppression -#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + -#' ggplot2::geom_point() + -#' theme_vimc_noxaxis() -#' -#' @export -theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { - ggplot2::theme_bw() + - ggplot2::theme( - axis.text.x = ggplot2::element_text( - size = 10, - angle = x_text_angle - ), - strip.text.y = ggplot2::element_text( - angle = y_text_angle - ), - plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), - ... - ) -} - -#' @name plotting_theme -#' -#' @importFrom ggplot2 '%+replace%' -#' -#' @export -theme_vimc_noxaxis <- function() { - theme_vimc() %+replace% - ggplot2::theme( - axis.title.x = ggplot2::element_blank(), - axis.text.x = ggplot2::element_blank(), - axis.ticks.x = ggplot2::element_blank() - ) -} - #' Plot burden and impact diagnostics #' -#' @name plotting -#' @rdname plotting +#' @name plot_burden_diagnostics +#' @rdname plot_burden_diagnostics #' #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars @@ -77,7 +12,7 @@ theme_vimc_noxaxis <- function() { #' @description #' Plotting functions for burden and impact diagnostics. All functions operate #' on data prepared for plotting by a corresponding -#' [plotting-preparation function][plotting_prep]. +#' [plotting-preparation function][plot_prep_burden_diagnostics]. #' #' @param fig_number The figure number displayed in the plot title. #' @@ -121,7 +56,7 @@ plot_compare_demography <- function(data, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_age A `` with the minimum column names #' "age", "value_millions", "burden_outcome", and "scenario"; expected to be the @@ -154,7 +89,7 @@ plot_age_patterns <- function(burden_age, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_decades A `` giving the burden by decade, up to #' `year_max`; expected to be the output of [prep_plot_burden_decades()]. @@ -182,7 +117,7 @@ plot_global_burden_decades <- function(burden_decades, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_data This is expected to be a `` from a #' nested-`` constructed using [prep_plot_global_burden()]. @@ -226,7 +161,7 @@ plot_global_burden <- function(burden_data, outcome_name, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param coverage_set A `` that is the output of #' [prep_plot_coverage_set()]. @@ -271,7 +206,7 @@ plot_coverage_set <- function(coverage_set, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param fvp_data A `` of estimates of fully-vaccinated persons (FVPs) #' per scenario, with scenarios as factors in order of the number of diff --git a/R/fn_plotting_helpers.R b/R/fn_plotting_helpers.R new file mode 100644 index 0000000..4d14cce --- /dev/null +++ b/R/fn_plotting_helpers.R @@ -0,0 +1,64 @@ +#' Plotting theme for vimcheck +#' +#' @description +#' A simple plotting theme building on [ggplot2::theme_bw()]. +#' +#' @name plotting_theme +#' @rdname plotting_theme +#' +#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees. +#' +#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees. +#' +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to +#' [ggplot2::theme()]. These will be applied in addition to, or in place of, +#' pre-existing elements defined by this theme. See the examples for this +#' theme's appearance. +#' +#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. +#' +#' @keywords plotting +#' +#' @examples +#' # using an inbuilt dataset +#' data(mtcars) +#' +#' # standard theme +#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + +#' ggplot2::geom_point() + +#' theme_vimc() +#' +#' # with X-axis suppression +#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + +#' ggplot2::geom_point() + +#' theme_vimc_noxaxis() +#' +#' @export +theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { + ggplot2::theme_bw() + + ggplot2::theme( + axis.text.x = ggplot2::element_text( + size = 10, + angle = x_text_angle + ), + strip.text.y = ggplot2::element_text( + angle = y_text_angle + ), + plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), + ... + ) +} + +#' @name plotting_theme +#' +#' @importFrom ggplot2 '%+replace%' +#' +#' @export +theme_vimc_noxaxis <- function() { + theme_vimc() %+replace% + ggplot2::theme( + axis.title.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank() + ) +} diff --git a/R/fn_plotting_prep_bur_diag.R b/R/fn_plotting_prep_bur_diag.R index 277c083..b76b29f 100644 --- a/R/fn_plotting_prep_bur_diag.R +++ b/R/fn_plotting_prep_bur_diag.R @@ -1,12 +1,13 @@ #' Prepare data for plotting #' -#' @name plotting_prep -#' @rdname plotting_prep +#' @name plot_prep_burden_diagnostics +#' @rdname plot_prep_burden_diagnostics #' #' @description #' Transform burden estimate data from modelling groups to make them suitable -#' for plotting using an appropriate [plotting function][plotting]. Each -#' preparation function corresponds to a plotting function. +#' for plotting using an appropriate +#' [plotting function][plot_prep_burden_diagnostics]. Each preparation function +#' corresponds to a plotting function. #' #' @param burden For `prep_plot_demography()`, a `` output from #' [check_demography_alignment()]. @@ -31,6 +32,8 @@ #' #' - For `prep_plot_fvp()`: WIP. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_demography <- function(burden) { checkmate::assert_tibble(burden) @@ -88,7 +91,9 @@ prep_plot_demography <- function(burden) { burden_long } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics +#' +#' @keywords plot_prep_burden_diagnostics #' #' @export prep_plot_age <- function(burden) { @@ -109,11 +114,13 @@ prep_plot_age <- function(burden) { burden_summary } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param year_max The maximum year to be represented in a subsequent figure. #' For `prep_plot_burden_decades()`, must be a decade, i.e., multiple of 10. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_burden_decades <- function(burden, year_max) { checkmate::assert_tibble(burden) @@ -158,7 +165,9 @@ prep_plot_burden_decades <- function(burden, year_max) { burden_data } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics +#' +#' @keywords plot_prep_burden_diagnostics #' #' @export prep_plot_global_burden <- function(burden) { @@ -184,10 +193,12 @@ prep_plot_global_burden <- function(burden) { burden_nested } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param coverage WIP. Coverage data. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_coverage_set <- function(coverage) { checkmate::assert_tibble(coverage) @@ -245,12 +256,14 @@ prep_plot_coverage_set <- function(coverage) { coverage_set } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param fvp WIP. Data on counts of fully vaccinated persons. #' #' @param year_min Minimum year. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_fvp <- function(fvp, year_min, year_max) { checkmate::assert_tibble(fvp) diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R index 9f25309..fbcc3fb 100644 --- a/R/fn_pressure_testing.R +++ b/R/fn_pressure_testing.R @@ -13,7 +13,7 @@ #' @param threshold A six-digit number that is checked as a valid touchstone #' identifier (YYYYMM format) using [validate_ts_year()]. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @return A filtered ``. #' @@ -207,7 +207,7 @@ filter_invalid_trajectories <- function( #' @return A list of data.frames of differences between `prev_df` and `curr_df`, #' with one list element per element of `interest_cols`. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @export generate_diffs <- function( @@ -272,7 +272,7 @@ generate_diffs <- function( #' Generate IQR for key outcomes #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @param df A data.frame of impact estimates. #' @@ -358,7 +358,7 @@ gen_national_iqr <- function( #' @return A filtered data.frame of differences in impact estimates flagged #' as too large. Rows with differences within tolerance are removed. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @export flag_large_diffs <- function( @@ -477,7 +477,7 @@ flag_large_diffs <- function( #' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns #' are disambiguated with the suffixes `"_old"` and `"_new"`. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @export gen_combined_df <- function( @@ -537,7 +537,7 @@ gen_combined_df <- function( #' @return A data.frame of sub-regional vaccination impact estimates where the #' impact is considered to be outside the tolerance limit. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @export compare_natl_subreg <- function( @@ -657,7 +657,7 @@ compare_natl_subreg <- function( #' @return None. Called for the convenience side-effect of saving data.frames as #' `.Rds` format. #' -#' @keywords pressure_testing +#' @keywords impact_diagnostics #' #' @export save_outputs <- function( diff --git a/_pkgdown.yml b/_pkgdown.yml index ae53f13..40a29ee 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,32 +7,44 @@ reference: - title: Package-level documentation contents: - has_keyword("package_doc") + - title: Burden estimate diagnostics - desc: Functions to pressure-test burden estimates. + + - subtitle: Check burden estimates + contents: + - has_keyword("burden_diagnostics") + - subtitle: Prepare burden estimates for plotting + contents: + - plot_prep_burden_diagnostics + - subtitle: Plot burden estimates contents: - - has_keyword("diagnostics") + - plot_burden_diagnostics + + - title: Impact estimate diagnostics - desc: Functions to pressure-test impact estimates. + + - subtitle: Check impact estimates + contents: + - has_keyword("impact_diagnostics") + - subtitle: Prepare impact estimate checks for plotting contents: - - has_keyword("pressure_testing") - - title: Plotting prepartion - desc: Prepare validated data for plotting. + - plot_prep_impact_diagnostics + - subtitle: Plot impact estimates contents: - - plotting_prep - - title: Plotting functions - desc: Package plotting functions. + - plot_impact_diagnostics + + - title: Plotting helper functions contents: - - plotting - plotting_theme + - title: Internal functions - desc: Internal helper functions. contents: - has_keyword("internal") - - title: Data - desc: Package data. + + - title: Package data contents: - has_keyword("data") - - title: Constants - desc: Package constants. + + - title: Package constants contents: - has_keyword("constants") From bba30c5b5f66f90613a585b607eefbc384a66bd4 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Tue, 21 Apr 2026 10:27:57 +0100 Subject: [PATCH 12/17] WIP plotting and prep fns impact diagnostics --- ...ing.R => fn_plotting_impact_diagnostics.R} | 24 +++++++++++++----- ... => fn_plotting_prep_impact_diagnostics.R} | 25 +++++++++++++++++++ 2 files changed, 43 insertions(+), 6 deletions(-) rename R/{fn_plotting_pressure_testing.R => fn_plotting_impact_diagnostics.R} (90%) rename R/{fn_plotting_prep_pres_test.R => fn_plotting_prep_impact_diagnostics.R} (85%) diff --git a/R/fn_plotting_pressure_testing.R b/R/fn_plotting_impact_diagnostics.R similarity index 90% rename from R/fn_plotting_pressure_testing.R rename to R/fn_plotting_impact_diagnostics.R index 1bb3da4..66477a8 100644 --- a/R/fn_plotting_pressure_testing.R +++ b/R/fn_plotting_impact_diagnostics.R @@ -1,11 +1,12 @@ #' Plot significant changes #' +#' @name plot_impact_diagnostics +#' @rdname plot_impact_diagnostics +#' #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars #' labeller label_wrap_gen #' -#' @keywords internal -#' #' @export plot_sig_diff <- function(df, outcome) { # retained here as this is a small df and a small operation @@ -27,6 +28,9 @@ plot_sig_diff <- function(df, outcome) { theme_vimc(x_text_angle = 0) } +#' @name plot_impact_diagnostics +#' +#' @export plot_diff <- function( combined, variable, @@ -77,6 +81,9 @@ plot_diff <- function( return(p) } +#' @name plot_impact_diagnostics +#' +#' @export plot_modelling_group_variation <- function(df, outcome) { ggplot(df) + aes( @@ -111,9 +118,11 @@ plot_modelling_group_variation <- function(df, outcome) { ) } - -#' Gavi plot - future deaths and DALYS averted, 2021-2024 -#' (current time window Gavi looking at, can be amended) +# Gavi plot - future deaths and DALYS averted, 2021-2024 +# (current time window Gavi looking at, can be amended) +#' @name plot_impact_diagnostics +#' +#' @export plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { ggplot( df, @@ -139,7 +148,10 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") } -### Gavi Cumulative Plot (modelling group + average) +# Gavi Cumulative Plot (modelling group + average) +#' @name plot_impact_diagnostics +#' +#' @export plot_cumul <- function(df, outcome, disease_filter) { p <- ggplot( df, diff --git a/R/fn_plotting_prep_pres_test.R b/R/fn_plotting_prep_impact_diagnostics.R similarity index 85% rename from R/fn_plotting_prep_pres_test.R rename to R/fn_plotting_prep_impact_diagnostics.R index d1959dd..ca5328b 100644 --- a/R/fn_plotting_prep_pres_test.R +++ b/R/fn_plotting_prep_impact_diagnostics.R @@ -1,3 +1,18 @@ +#' Prepare impact diagnostics for plotting +#' +#' @name plot_prep_impact_diagnostics +#' @rdname plot_prep_impact_diagnostics +#' +#' @description +#' A suite of helper functions that sit between impact diagnostics functions and +#' plotting functions. These functions have some basic checks on the input data, +#' but otherwise assume that users will not modify inputs. +#' +#' @param name description +#' +#' @return [prep_plot_mod_grp_varn()] returns a ... TODO add +#' +#' @export prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") { offset <- 1e-6 @@ -8,6 +23,11 @@ prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") { mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) } +#' @name plot_prep_impact_diagnostics +#' +#' @return [prep_plot_vax_gavi()] returns a ... TODO add +#' +#' @export prep_plot_vax_gavi <- function( df, prev_dat = NULL, @@ -57,6 +77,11 @@ prep_plot_vax_gavi <- function( df_combined } +#' @name plot_prep_impact_diagnostics +#' +#' @param df description +#' +#' @export prep_plot_cumul <- function(df, outcome, disease_filter) { outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] From a455c5ae37ced424d171c57bb5a8ae298425edc6 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Tue, 21 Apr 2026 16:31:42 +0100 Subject: [PATCH 13/17] Add constants --- R/constants.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/constants.R b/R/constants.R index adb8175..0dc3a3f 100644 --- a/R/constants.R +++ b/R/constants.R @@ -81,6 +81,11 @@ COLNAMES_INTEREST_PRESSURE_TEST <- union( ) ) +#' @name constants +IMPACT_OUTCOMES <- c("deaths_averted", "dalys_averted") + +IMPACT_GROUP_VARS <- c("activity_type", "vaccine") + #' @name constants EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") @@ -110,3 +115,6 @@ DEF_TOUCHSTONE_NEW <- "202310" #' @name constants DEF_TOUCHSTONE_OLD_OLD <- "202110" + +#' @name constants +COLOUR_VIMC <- "#008080" From beb2a0721f50069904f1f6e346132328b704bfff Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Tue, 21 Apr 2026 16:32:30 +0100 Subject: [PATCH 14/17] Update plot prep and plot fns --- R/fn_plotting_impact_diagnostics.R | 178 ++++++++---- R/fn_plotting_prep_impact_diagnostics.R | 372 +++++++++++++++--------- 2 files changed, 359 insertions(+), 191 deletions(-) diff --git a/R/fn_plotting_impact_diagnostics.R b/R/fn_plotting_impact_diagnostics.R index 66477a8..bc07b25 100644 --- a/R/fn_plotting_impact_diagnostics.R +++ b/R/fn_plotting_impact_diagnostics.R @@ -1,21 +1,45 @@ -#' Plot significant changes +#' Create impact diagnostics plots #' #' @name plot_impact_diagnostics #' @rdname plot_impact_diagnostics #' #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars -#' labeller label_wrap_gen +#' labeller label_wrap_gen theme geom_segment geom_point +#' +#' @importFrom rlang .data +#' +#' @description +#' Plotting functions for impact diagnostics. See +#' [plotting-preparation functions][plot_prep_impact_diagnostics] for a set of +#' helper functions that prepare impact diagnostics for plotting. +#' +#' @param data A data.frame that gives the +#' +#' @param outcome #' #' @export -plot_sig_diff <- function(df, outcome) { +plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) { + checkmate::assert_tibble(data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + # retained here as this is a small df and a small operation - df$label <- glue::glue( + data$label <- glue::glue( "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}" ) - ggplot(df, aes(x = diff, y = reorder(label, diff), color = modelling_group)) + - geom_segment(aes(x = 0, xend = diff, y = label, yend = label), size = 1) + + ggplot( + df, + aes( + .data$diff, + reorder(.data$label, .data$diff), + color = .data$modelling_group + ) + ) + + geom_segment( + aes(x = 0, xend = .data$diff, y = .data$label, yend = .data$label), + size = 1 + ) + geom_point(size = 2) + labs( x = "Difference", @@ -30,25 +54,53 @@ plot_sig_diff <- function(df, outcome) { #' @name plot_impact_diagnostics #' +#' @param group_vars A single string for the grouping variables. May be any of +#' `"activity_type"` and `"vaccine"`. +#' +#' @param touchstone_old A string for the previous touchstone in +#' format `"YYYYMM"`. +#' +#' @param touchstone_new A string for the current or new touchstone in +#' format `"YYYYMM"`. +#' #' @export plot_diff <- function( - combined, - variable, - group_vars = c("activity_type", "vaccine") + data, + outcome = IMPACT_OUTCOMES, + group_vars = IMPACT_GROUP_VARS, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW ) { - x_var <- paste0(variable, "_new") - y_var <- paste0(variable, "_old") - x_sym <- rlang::sym(x_var) - y_sym <- rlang::sym(y_var) + checkmate::assert_tibble(data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + group_vars <- rlang::arg_match(group_vars, IMPACT_GROUP_VARS, multiple = TRUE) + + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) - combined <- combined %>% - filter(!is.na(!!x_sym) & !is.na(!!y_sym)) + x_var <- glue::glue("{outcome}_new") + y_var <- glue::glue("{outcome}_old") - n_facets <- combined %>% - distinct(activity_type, vaccine) %>% - nrow() + # small operations retained + combined <- dplyr::filter( + combined, + dplyr::filter( + !is.na({{ x_var }}), + !is.na({{ y_var }}) + ) + ) - ncol_dynamic <- case_when( + # nolint start + n_facets <- nrow( + dplyr::distinct( + combined, + .data$activity_type, + .data$vaccine + ) + ) + # nolint end + + ncol_dynamic <- dplyr::case_when( n_facets <= 4 ~ 2, n_facets <= 9 ~ 3, n_facets <= 16 ~ 4, @@ -56,58 +108,62 @@ plot_diff <- function( TRUE ~ 8 ) - p <- ggplot(combined, aes(x = !!x_sym, y = !!y_sym)) + - geom_point(alpha = 0.5, colour = "#008080") + - geom_abline(slope = 1, intercept = 0, linetype = "dashed") + + p <- ggplot( + combined, + aes({{ x_var }}, {{ y_var }}) + ) + + ggplot2::geom_point(alpha = 0.5, colour = COLOUR_VIMC) + + ggplot2::geom_abline(slope = 1, intercept = 0, linetype = "dashed") + facet_wrap( - ~ activity_type + vaccine, + facets = c("activity_type", "vaccine,"), scales = "free", ncol = ncol_dynamic ) + - scale_x_log10() + - scale_y_log10() + - theme_bw() + + ggplot2::scale_x_log10() + + ggplot2::scale_y_log10() + + # TODO: check if VIMC theme okay here + theme_vimc() + theme( - strip.text = element_text(size = 7), - panel.spacing = unit(0.05, "lines"), - axis.text = element_text(size = 6.5) + strip.text = ggplot2::element_text(size = 7), + panel.spacing = ggplot2::unit(0.05, "lines"), + axis.text = ggplot2::element_text(size = 6.5) ) + labs( - title = glue("{variable}: Current vs Previous Report"), - x = glue("{new} - {variable}"), - y = glue("{old} - {variable}") + title = glue::glue("{variable}: Current vs Previous Report"), + x = glue::glue("{new} - {variable}"), + y = glue::glue("{old} - {variable}") ) - return(p) + p } #' @name plot_impact_diagnostics #' #' @export -plot_modelling_group_variation <- function(df, outcome) { - ggplot(df) + +plot_modelling_group_variation <- function(data, outcome) { + ggplot(data) + aes( - fill = as.character(mod_num), - x = adj_outc, - y = reorder(vaccine, mean_outc) + fill = as.character(.data$mod_num), + x = .data$adj_outc, + y = reorder(.data$vaccine, .data$mean_outc) ) + - geom_density_ridges( + ggridges::geom_density_ridges( alpha = 0.5, stat = "binline", bins = 200, draw_baseline = FALSE ) + - facet_grid(. ~ activity_type, scales = "fixed") + - theme_bw() + + facet_grid(cols = ggplot2::vars("activity_type"), scales = "fixed") + + theme_vimc + theme( legend.position = "none", - axis.text.x = element_text(angle = 90, hjust = 1) + axis.text.x = ggplot2::element_text(angle = 90, hjust = 1) ) + - scale_x_log10( + ggplot2::scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), - labels = scales::trans_format("log10", math_format(10^.x)) + labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + - scale_fill_viridis_d() + + ggplot2::scale_fill_viridis_d() + labs( x = paste0( "Burden averted (", @@ -127,13 +183,13 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { ggplot( df, aes( - x = reorder(disease, yearly_outcome), - y = yearly_outcome, - fill = factor(year) + x = reorder(.data$disease, .data$yearly_outcome), + y = .data$yearly_outcome, + fill = factor(.data$year) ) ) + geom_col(position = "dodge") + - scale_fill_manual( + ggplot2::scale_fill_manual( values = c( "2021" = "#008080", "2022" = "#E68424", @@ -143,8 +199,8 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { ) + facet_wrap(~dataset, scales = "free_y") + scale_y_continuous(labels = scales::scientific) + - theme_bw() + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + theme_vimc() + + theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") } @@ -156,17 +212,19 @@ plot_cumul <- function(df, outcome, disease_filter) { p <- ggplot( df, aes( - x = year, - y = value, - color = modelling_group, - linetype = line_type + x = .data$year, + y = .data$value, + color = .data$modelling_group, + linetype = .data$line_type ) ) + - geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + - scale_linetype_manual(values = c("solid" = "solid", "dashed" = "dashed")) + - guides(linetype = "none") + + ggplot2::geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + + ggplot2::scale_linetype_manual( + values = c(solid = "solid", dashed = "dashed") + ) + + ggplot2::guides(linetype = "none") + scale_y_continuous(labels = scales::scientific) + - theme_minimal() + + theme_vimc() + labs( x = "Year", y = paste("Cumulative", outcome), @@ -175,5 +233,5 @@ plot_cumul <- function(df, outcome, disease_filter) { ) + theme(legend.position = "bottom") - return(p) + p } diff --git a/R/fn_plotting_prep_impact_diagnostics.R b/R/fn_plotting_prep_impact_diagnostics.R index ca5328b..b999d33 100644 --- a/R/fn_plotting_prep_impact_diagnostics.R +++ b/R/fn_plotting_prep_impact_diagnostics.R @@ -8,70 +8,137 @@ #' plotting functions. These functions have some basic checks on the input data, #' but otherwise assume that users will not modify inputs. #' -#' @param name description +#' @param data A data.frame of impact estimates. #' -#' @return [prep_plot_mod_grp_varn()] returns a ... TODO add +#' @param comparison A data.frame of impact estimates used as a comparator for +#' `comparison`. +#' +#' @param outcome A string for the impact outcome; may be one of +#' "deaths_averted" or "dalys_averted". +#' +#' @return +#' +#' - [prep_plot_mod_grp_varn()] returns a `` TODO add #' #' @export -prep_plot_mod_grp_varn <- function(df2, df3, outc = "deaths") { - offset <- 1e-6 - - df2 %>% - left_join(df3, by = join_by(modelling_group, vaccine)) %>% - mutate(adj_outc = !!as.name(paste0(outc, "_averted")) + offset) %>% - group_by(vaccine) %>% - mutate(mean_outc = weighted.mean(adj_outc, fvps, na.rm = TRUE)) +prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) { + # TODO: df2 and df3 need informative names + + offset_manual <- 1e-6 + df_combined <- dplyr::left_join( + df2, + df3, + by = c("modelling_group", "vaccine") + ) + + df_combined <- dplyr::mutate( + df_combined, + adj_outc = {{ outcome }} + offset_manual + ) + + df_combined <- dplyr::group_by( + df_combined, + .data$vaccine + ) + + df_combined <- dplyr::mutate( + df_combined, + mean_outc = stats::weighted.mean(.data$adj_outc, .data$fvps, na.rm = TRUE) + ) + + df_combined } #' @name plot_prep_impact_diagnostics #' +#' @param data description +#' +#' @param outcome +#' +#' @param disease +#' +#' @param touchstone_old +#' +#' @param touchstone_new +#' #' @return [prep_plot_vax_gavi()] returns a ... TODO add #' #' @export prep_plot_vax_gavi <- function( - df, - prev_dat = NULL, - outcome = "deaths_averted" + data, + prev_data, + outcome = IMPACT_OUTCOMES, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW ) { - df_cur <- df %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) %>% - mutate(dataset = as.character(new)) - - df_prev <- prev_dat %>% - select(all_of(key_cols), !!outcome) %>% - filter(year >= 2021, year <= 2024, disease != "COVID") %>% - group_by(disease, year) %>% - summarise( - yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), - .groups = "drop" - ) %>% - mutate(dataset = as.character(old)) - - df_combined <- bind_rows(df_cur, df_prev) - - df_diff <- df_cur %>% - left_join( - df_prev, - by = c("disease", "year"), - suffix = c("_curr", "_prev") - ) %>% - mutate( - yearly_outcome = yearly_outcome_curr - yearly_outcome_prev, - dataset = "Difference" - ) %>% - select(disease, year, yearly_outcome, dataset) - - df_combined <- bind_rows(df_combined, df_diff) + checkmate::assert_tibble(data) + checkmate::assert_tibble(prev_data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) + + df_list <- Map( + list(data, prev_data), + list(touchstone_new, touchstone_old), + f = function(df, ts_id) { + df <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + {{ outcome }} + ) + + df <- dplyr::filter( + df, + dplyr::between(.data$year, 2021, 2024), + Negate(grepl("COVID", .data$disease, ignore.case = TRUE)) + ) + + df <- dplyr::group_by(df, .data$disease, .data$year) + + df <- dplyr::summarise( + df, + yearly_outcome = sum({{ outcome }}, na.rm = TRUE), + .groups = "drop" + ) + + df <- dplyr::mutate( + df, + dataset = as.character(ts_id) + ) + } + ) + + df_combined <- dplyr::bind_rows(df_list) + + df_diff <- Reduce( + df_list, + f = function(x, y) { + dplyr::left_join( + x, + y, + by = c("disease", "year"), + suffix = c("_curr", "_prev") + ) + } + ) + + df_diff <- dplyr::mutate( + df_diff, + yearly_outcome = .data$yearly_outcome_curr - .data$yearly_outcome_prev, + dataset = "Difference" + ) + cols_to_select <- c("disease", "year", "yearly_outcome", "dataset") + df_diff <- dplyr::select(df_diff, {{ cols_to_select }}) + + df_combined <- dplyr::bind_rows(df_combined, df_diff) df_combined$dataset <- factor( df_combined$dataset, - levels = c(as.character(old), "Difference", as.character(new)) + levels = c( + as.character(touchstone_old), + "Difference", + as.character(touchstone_new) + ) ) df_combined @@ -79,99 +146,142 @@ prep_plot_vax_gavi <- function( #' @name plot_prep_impact_diagnostics #' -#' @param df description +#' @param data description +#' +#' @param outcome +#' +#' @param disease +#' +#' @param touchstone_old +#' +#' @param touchstone_new #' #' @export -prep_plot_cumul <- function(df, outcome, disease_filter) { - outcome_cols <- names(df)[str_detect(names(df), paste0("^", outcome, "_"))] - - outcome_sym <- sym(outcome) - cum_col <- paste0("cum_", outcome) - avg_col <- paste0("avg_", outcome) - - col_old <- paste0(outcome, "_old") - col_new <- paste0(outcome, "_new") - - combined2 <- df %>% - select( - country, - country_name, - disease, - vaccine, - activity_type, - year, - modelling_group, - all_of(outcome_cols) - ) %>% - pivot_longer( - cols = all_of(outcome_cols), - names_to = "touchstone", - values_to = "value" - ) %>% - mutate( - touchstone = str_remove(touchstone, paste0("^", outcome, "_")), - touchstone = recode( - touchstone, - "old" = as.character(old), - "new" = as.character(new), - .default = touchstone - ), - touchstone = factor( - touchstone, - levels = c(as.character(old), as.character(new)) - ) +prep_plot_cumul <- function( + data, + outcome, + disease, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) { + outcome_cols <- colnames(data)[stringr::str_detect( + colnames(data), + glue::glue("^{outcome}_") + )] + + cum_col <- glue::glue("cum_{outcome}") + avg_col <- glue::glue("avg_{outcome}") + + combined2 <- dplyr::select( + data, + {{ COLNAMES_KEY_PRESSURE_TEST }}, + {{ outcome_cols }} + ) + + combined2 <- tidyr::pivot_longer( + combined2, + cols = dplyr::all_of(outcome_cols), + names_to = "touchstone", + values_to = "value" + ) + + combined2 <- dplyr::mutate( + combined2, + touchstone = stringr::str_remove( + .data$touchstone, + glue::glue("^{outcome}_") + ), + touchstone = dplyr::replace_values( + .data$touchstone, + c("old", "new"), + as.character(c(touchstone_old, touchstone_new)), + .default = .data$touchstone + ), + touchstone = factor( + .data$touchstone, + levels = as.character(c(touchstone_old, touchstone_new)) ) + ) + # Cumulative values by modelling group - df_cum <- combined2 %>% - filter(disease == disease_filter) %>% - group_by(modelling_group, touchstone) %>% - complete(year = full_seq(year, 1)) %>% - arrange(year) %>% - mutate( - first_valid = min(year[!is.na(value)]), - !!cum_col := ifelse( - year < first_valid, - NA, - cumsum(replace_na(value, 0)) - ) - ) %>% - select(-first_valid) %>% - ungroup() %>% - mutate(modelling_group = paste(modelling_group, touchstone, sep = "-")) + df_cum <- dplyr::filter(combined2, .data$disease == disease) + df_cum <- dplyr::group_by( + df_cum, + .data$modelling_group, + .data$touchstone + ) + df_cum <- tidyr::complete( + df_cum, + year = tidyr::full_seq(.data$year, 1) + ) + df_cum <- dplyr::arrange(df_cum, .data$year) + df_cum <- dplyr::mutate( + df_cum, + first_valid = min(.data$year[!is.na(data$value)]), + {{ cum_col }} := dplyr::if_else( + .data$year < .data$first_valid, + NA_real_, + cumsum(tidyr::replace_na(.data$value, 0.0)) + ) + ) + + df_cum$first_valid <- NULL + df_cum <- dplyr::ungroup(df_cum) + df_cum <- dplyr::mutate( + df_cum, + modelling_group = glue::glue("{.data$modelling_group}-{.data$touchstone}") + ) # Model average - df_avg <- df_cum %>% - group_by(year, touchstone) %>% - summarise( - !!avg_col := mean(!!sym(cum_col), na.rm = TRUE), - n_models = sum(!is.na(!!sym(cum_col))), - .groups = "drop" - ) %>% - filter(n_models >= 1) %>% - mutate(modelling_group = paste("Model Average", touchstone, sep = "-")) + df_avg <- dplyr::summarise( + df_cum, + {{ avg_col }} := mean({{ cum_col }}, na.rm = TRUE), + n_models = sum(!is.na({{ cum_col }})), + .groups = c("year", "touchstone") + ) + df_avg <- dplyr::filter( + df_avg, + .data$n_models >= 1 + ) + df_avg <- dplyr::mutate( + df_avg, + modelling_group = glue::glue( + "Model Average-{.data$touchstone}" + ) + ) # Combine for plot - df_plot <- bind_rows( - df_cum %>% - select(year, modelling_group, touchstone, value = !!sym(cum_col)), - df_avg %>% - select(year, modelling_group, touchstone, value = !!sym(avg_col)) - ) - - df_plot <- df_plot %>% - group_by(modelling_group) %>% - filter(sum(value, na.rm = TRUE) > 0) %>% - ungroup() %>% - mutate( - line_type = ifelse( - grepl("Model Average", modelling_group), - "dashed", - "solid" - ) + cols_to_select <- c("year", "modelling_group", "touchstone") + df_plot <- dplyr::bind_rows( + dplyr::select( + df_cum, + {{ cols_to_select }}, + value = {{ cum_col }} + ), + dplyr::select( + df_avg, + {{ cols_to_select }}, + value = {{ avg_col }} + ) + ) + + df_plot <- dplyr::group_by(df_plot, .data$modelling_group) + df_plot <- dplyr::filter( + df_plot, + sum(.data$value, na.rm = TRUE) > 0 + ) + df_plot <- dplyr::ungroup(df_plot) + df_plot <- dplyr::mutate( + df_plot, + line_type = dplyr::if_else( + grepl("Model Average", .data$modelling_group, fixed = TRUE), + "dashed", + "solid" ) + ) if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { - message("No non-zero data to plot for ", disease_filter, ". Skipping plot.") + message("No non-zero data to plot for ", disease, ". Skipping plot.") return(NULL) } From 0424f171c438e63ae256283c71e455e004259b56 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 22 Apr 2026 15:55:43 +0100 Subject: [PATCH 15/17] Fixes from initial test run --- R/constants.R | 97 +++++++++++++ R/fn_helpers.R | 11 +- R/fn_plotting_burden_diagnostics.R | 2 +- R/fn_plotting_impact_diagnostics.R | 133 ++++++++++++------ R/fn_plotting_prep_impact_diagnostics.R | 128 +++++++++++------ R/fn_pressure_testing.R | 87 ++++++++---- inst/WORDLIST | 5 +- man/basic_burden_sanity.Rd | 2 +- man/check_demography_alignment.Rd | 2 +- man/compare_natl_subreg.Rd | 2 +- man/constants.Rd | 53 ++++++- man/flag_large_diffs.Rd | 4 +- man/gen_combined_df.Rd | 2 +- man/gen_national_iqr.Rd | 2 +- man/generate_diffs.Rd | 2 +- ...plotting.Rd => plot_burden_diagnostics.Rd} | 8 +- man/plot_impact_diagnostics.Rd | 66 +++++++++ ...rep.Rd => plot_prep_burden_diagnostics.Rd} | 10 +- man/plot_prep_impact_diagnostics.Rd | 82 +++++++++++ man/plotting_theme.Rd | 2 +- man/pres_test_filter_data.Rd | 9 +- man/save_outputs.Rd | 4 +- man/significant_diff_plot.Rd | 12 -- man/validate_complete_incoming_files.Rd | 2 +- man/validate_file_dict_template.Rd | 2 +- man/validate_template_alignment.Rd | 2 +- 26 files changed, 578 insertions(+), 153 deletions(-) rename man/{plotting.Rd => plot_burden_diagnostics.Rd} (91%) create mode 100644 man/plot_impact_diagnostics.Rd rename man/{plotting_prep.Rd => plot_prep_burden_diagnostics.Rd} (87%) create mode 100644 man/plot_prep_impact_diagnostics.Rd delete mode 100644 man/significant_diff_plot.Rd diff --git a/R/constants.R b/R/constants.R index 0dc3a3f..e4c3532 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,10 +1,17 @@ #' Package constants #' +#' @description +#' Constant values used in _vimcheck_. See the **Examples** section for the +#' constant values. +#' #' @name constants #' @rdname constants #' #' @keywords constants #' +#' @examples +#' file_dict_colnames +#' #' @export file_dict_colnames <- c( "scenario_type", @@ -15,6 +22,11 @@ file_dict_colnames <- c( ) #' @name constants +#' +#' @examples +#' scenario_data_colnames +#' +#' @export scenario_data_colnames <- c( "scenario_type", "scenario_type_description", @@ -23,6 +35,11 @@ scenario_data_colnames <- c( ) #' @name constants +#' +#' @examples +#' burden_outcome_names +#' +#' @export burden_outcome_names <- c( "cases", "deaths", @@ -37,6 +54,11 @@ burden_outcome_names <- c( ) #' @name constants +#' +#' @examples +#' colnames_plot_demog_compare +#' +#' @export colnames_plot_demog_compare <- c( "variable", "scenario", @@ -48,6 +70,11 @@ colnames_plot_demog_compare <- c( ) #' @name constants +#' +#' @examples +#' colnames_df_missing_cols +#' +#' @export colnames_df_missing_cols <- c( "country_name", "vaccine", @@ -57,6 +84,11 @@ colnames_df_missing_cols <- c( ) #' @name constants +#' +#' @examples +#' COLNAMES_KEY_PRESSURE_TEST +#' +#' @export COLNAMES_KEY_PRESSURE_TEST <- c( "country", "country_name", @@ -68,6 +100,11 @@ COLNAMES_KEY_PRESSURE_TEST <- c( ) #' @name constants +#' +#' @examples +#' COLNAMES_INTEREST_PRESSURE_TEST +#' +#' @export COLNAMES_INTEREST_PRESSURE_TEST <- union( COLNAMES_KEY_PRESSURE_TEST, c( @@ -82,39 +119,99 @@ COLNAMES_INTEREST_PRESSURE_TEST <- union( ) #' @name constants +#' +#' @examples +#' IMPACT_OUTCOMES +#' +#' @export IMPACT_OUTCOMES <- c("deaths_averted", "dalys_averted") IMPACT_GROUP_VARS <- c("activity_type", "vaccine") #' @name constants +#' +#' @examples +#' EXCLUDED_DISEASES +#' +#' @export EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") #' @name constants +#' +#' @examples +#' N_TS_MIN_CHARS +#' +#' @export N_TS_MIN_CHARS <- 6L #' @name constants +#' +#' @examples +#' N_TS_YEAR_CHARS +#' +#' @export N_TS_YEAR_CHARS <- 4L #' @name constants +#' +#' @examples +#' MIN_TS_YEAR +#' +#' @export MIN_TS_YEAR <- 2000 #' @name constants +#' +#' @examples +#' MAX_TS_YEAR +#' +#' @export MAX_TS_YEAR <- 2100 #' @name constants +#' +#' @examples +#' MIN_TS_MONTH +#' +#' @export MIN_TS_MONTH <- 1 #' @name constants +#' +#' @examples +#' MAX_TS_MONTH +#' +#' @export MAX_TS_MONTH <- 12 #' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_OLD +#' +#' @export DEF_TOUCHSTONE_OLD <- "201910" #' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_NEW +#' +#' @export DEF_TOUCHSTONE_NEW <- "202310" #' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_OLD_OLD +#' +#' @export DEF_TOUCHSTONE_OLD_OLD <- "202110" #' @name constants +#' +#' @examples +#' COLOUR_VIMC +#' +#' @export COLOUR_VIMC <- "#008080" diff --git a/R/fn_helpers.R b/R/fn_helpers.R index 5159361..d3997f8 100644 --- a/R/fn_helpers.R +++ b/R/fn_helpers.R @@ -86,7 +86,8 @@ round_numeric <- function(df) { dplyr::mutate( df, dplyr::across( - .cols = dplyr::where(is.numeric) & !matches("year", ignore.case = TRUE), + .cols = dplyr::where(is.numeric) & + !dplyr::matches("year", ignore.case = TRUE), .fns = adaptive_round ) ) @@ -106,10 +107,10 @@ validate_ts_year <- function(x) { min.chars = N_TS_MIN_CHARS ) if (!has_n_chars) { - n_chars <- nchars(x) + n_chars <- nchar(x) # nolint used in cli cli::cli_abort( - "Touchstone year string should have at least {N_TS_MIN_CHARS}, but got \ - {n_chars} characters." + "Touchstone year should be a string with at least {N_TS_MIN_CHARS} \ + characters, but got class {.cls {class(x)}} with {n_chars} characters." ) } @@ -170,7 +171,7 @@ add_campaign_id <- function(df, key_cols) { must.include = key_cols ) if (!has_cols) { - missing_cols <- setdiff(colnames(df), key_cols) + missing_cols <- setdiff(colnames(df), key_cols) # nolint used in cli cli::cli_abort( "Expected {.code df} to have columns {.str {key_cols}} but columns \ {.str {missing_cols}} are missing." diff --git a/R/fn_plotting_burden_diagnostics.R b/R/fn_plotting_burden_diagnostics.R index 71b7d54..b9a30a6 100644 --- a/R/fn_plotting_burden_diagnostics.R +++ b/R/fn_plotting_burden_diagnostics.R @@ -10,7 +10,7 @@ #' @importFrom rlang .data #' #' @description -#' Plotting functions for burden and impact diagnostics. All functions operate +#' Plotting functions for burden diagnostics. All functions operate #' on data prepared for plotting by a corresponding #' [plotting-preparation function][plot_prep_burden_diagnostics]. #' diff --git a/R/fn_plotting_impact_diagnostics.R b/R/fn_plotting_impact_diagnostics.R index bc07b25..dfb1e8f 100644 --- a/R/fn_plotting_impact_diagnostics.R +++ b/R/fn_plotting_impact_diagnostics.R @@ -1,5 +1,11 @@ #' Create impact diagnostics plots #' +#' @description +#' Functions that create impact diagnostics plots (or plotting objects). All +#' functions are associated with one other upstream data processing function, +#' and can be used in a pipe with that function. Where appropriate, outcome +#' selection and label preparation is automated to reduce function arguments. +#' #' @name plot_impact_diagnostics #' @rdname plot_impact_diagnostics #' @@ -14,9 +20,26 @@ #' [plotting-preparation functions][plot_prep_impact_diagnostics] for a set of #' helper functions that prepare impact diagnostics for plotting. #' -#' @param data A data.frame that gives the +#' @param data A data.frame suitable for plotting. +#' +#' - `plot_sig_diff()`: Output of +#' [`flag_large_diff()`][plot_prep_impact_diagnostics]. +#' +#' - `plot_diff()`: Output of +#' [`gen_combined_df()`][plot_prep_impact_diagnostics]. +#' +#' - `plot_modelling_group_variation()`: Output of +#' [`plot_prep_mod_grp_varn()`][plot_prep_impact_diagnostics]. +#' +#' - `plot_vaccine_gavi()`: Output of +#' [`plot_prep_vax_gavi()`][plot_prep_impact_diagnostics] +#' +#' - `plot_cumul()`: Output of +#' [`plot_prep_cumul()`][plot_prep_impact_diagnostics] #' -#' @param outcome +#' @param outcome A string for the impact outcome. One of [IMPACT_OUTCOMES]. +#' +#' @return A `` object that can be viewed or saved. #' #' @export plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) { @@ -25,14 +48,14 @@ plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) { # retained here as this is a small df and a small operation data$label <- glue::glue( - "{df$country_name} | {df$vaccine} | {df$activity_type} | {df$year}" + "{data$country_name} | {data$vaccine} | {data$activity_type} | {data$year}" ) ggplot( - df, + data, aes( .data$diff, - reorder(.data$label, .data$diff), + stats::reorder(.data$label, .data$diff), color = .data$modelling_group ) ) + @@ -55,13 +78,13 @@ plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) { #' @name plot_impact_diagnostics #' #' @param group_vars A single string for the grouping variables. May be any of -#' `"activity_type"` and `"vaccine"`. +#' [IMPACT_OUTCOMES], which are `"activity_type"` and `"vaccine"`. #' #' @param touchstone_old A string for the previous touchstone in -#' format `"YYYYMM"`. +#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_OLD]. #' #' @param touchstone_new A string for the current or new touchstone in -#' format `"YYYYMM"`. +#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_NEW]. #' #' @export plot_diff <- function( @@ -73,7 +96,10 @@ plot_diff <- function( ) { checkmate::assert_tibble(data) outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) - group_vars <- rlang::arg_match(group_vars, IMPACT_GROUP_VARS, multiple = TRUE) + checkmate::assert_subset( + group_vars, + IMPACT_GROUP_VARS + ) touchstone_old <- validate_ts_year(touchstone_old) touchstone_new <- validate_ts_year(touchstone_new) @@ -82,18 +108,24 @@ plot_diff <- function( y_var <- glue::glue("{outcome}_old") # small operations retained - combined <- dplyr::filter( - combined, - dplyr::filter( - !is.na({{ x_var }}), - !is.na({{ y_var }}) + # NOTE: data masking using `{{` does not appear to work + # see last example in https://dplyr.tidyverse.org/reference/filter.html + # + # NOTE: exclude values < 1 to prevent log transform errors + data <- dplyr::filter_out( + data, + dplyr::when_any( + is.na(.data[[x_var]]), + is.na(.data[[y_var]]), + .data[[x_var]] < 1, + .data[[y_var]] < 1 ) ) # nolint start n_facets <- nrow( dplyr::distinct( - combined, + data, .data$activity_type, .data$vaccine ) @@ -109,29 +141,28 @@ plot_diff <- function( ) p <- ggplot( - combined, - aes({{ x_var }}, {{ y_var }}) + data, + aes(.data[[x_var]], .data[[y_var]]) ) + ggplot2::geom_point(alpha = 0.5, colour = COLOUR_VIMC) + ggplot2::geom_abline(slope = 1, intercept = 0, linetype = "dashed") + facet_wrap( - facets = c("activity_type", "vaccine,"), + facets = c("activity_type", "vaccine"), scales = "free", ncol = ncol_dynamic ) + ggplot2::scale_x_log10() + ggplot2::scale_y_log10() + - # TODO: check if VIMC theme okay here - theme_vimc() + + theme_vimc(0) + theme( strip.text = ggplot2::element_text(size = 7), panel.spacing = ggplot2::unit(0.05, "lines"), axis.text = ggplot2::element_text(size = 6.5) ) + labs( - title = glue::glue("{variable}: Current vs Previous Report"), - x = glue::glue("{new} - {variable}"), - y = glue::glue("{old} - {variable}") + title = glue::glue("{outcome}: Current vs Previous Report"), + x = glue::glue("{touchstone_new} - {outcome}"), + y = glue::glue("{touchstone_old} - {outcome}") ) p @@ -140,12 +171,29 @@ plot_diff <- function( #' @name plot_impact_diagnostics #' #' @export -plot_modelling_group_variation <- function(data, outcome) { +plot_modelling_group_variation <- function(data) { + checkmate::assert_tibble(data, min.rows = 1L, min.cols = 1L) + + outcome <- unique(data[["outcome_name"]]) + checkmate::assert_string(outcome) + + outcome_short <- stringr::word(outcome, sep = "_") + outcome_short <- dplyr::if_else( + outcome_short == "dalys", + stringr::str_to_upper(outcome_short), + outcome_short + ) + x_lab <- glue::glue("Burden averted ({outcome_short})") + + # for scales formatting + .x <- NULL + + # TODO: should NA-producing values (< 1) be removed? ggplot(data) + aes( fill = as.character(.data$mod_num), x = .data$adj_outc, - y = reorder(.data$vaccine, .data$mean_outc) + y = stats::reorder(.data$vaccine, .data$mean_outc) ) + ggridges::geom_density_ridges( alpha = 0.5, @@ -154,22 +202,18 @@ plot_modelling_group_variation <- function(data, outcome) { draw_baseline = FALSE ) + facet_grid(cols = ggplot2::vars("activity_type"), scales = "fixed") + - theme_vimc + - theme( - legend.position = "none", - axis.text.x = ggplot2::element_text(angle = 90, hjust = 1) - ) + ggplot2::scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + ggplot2::scale_fill_viridis_d() + + theme_vimc() + + theme( + legend.position = "none", + axis.text.x = ggplot2::element_text(angle = 90, hjust = 1) + ) + labs( - x = paste0( - "Burden averted (", - ifelse(outcome == "dalys", "DALYs", outcome), - ")" - ), + x = x_lab, y = "Vaccine" ) } @@ -179,11 +223,14 @@ plot_modelling_group_variation <- function(data, outcome) { #' @name plot_impact_diagnostics #' #' @export -plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { +plot_vaccine_gavi <- function(data) { + checkmate::assert_tibble(data) + outcome <- unique(data[["outcome_name"]]) + ggplot( - df, + data, aes( - x = reorder(.data$disease, .data$yearly_outcome), + x = stats::reorder(.data$disease, .data$yearly_outcome), y = .data$yearly_outcome, fill = factor(.data$year) ) @@ -208,9 +255,13 @@ plot_vaccine_gavi <- function(df, outcome = "deaths_averted") { #' @name plot_impact_diagnostics #' #' @export -plot_cumul <- function(df, outcome, disease_filter) { +plot_cumul <- function(data) { + checkmate::assert_tibble(data) + outcome <- unique(data[["outcome_name"]]) + disease <- unique(data[["disease"]]) + p <- ggplot( - df, + data, aes( x = .data$year, y = .data$value, @@ -229,7 +280,7 @@ plot_cumul <- function(df, outcome, disease_filter) { x = "Year", y = paste("Cumulative", outcome), color = "Modelling Group", - title = paste("Cumulative", outcome, "Over Time –", disease_filter) + title = paste("Cumulative", outcome, "Over Time -", disease) ) + theme(legend.position = "bottom") diff --git a/R/fn_plotting_prep_impact_diagnostics.R b/R/fn_plotting_prep_impact_diagnostics.R index b999d33..d529d7d 100644 --- a/R/fn_plotting_prep_impact_diagnostics.R +++ b/R/fn_plotting_prep_impact_diagnostics.R @@ -5,24 +5,69 @@ #' #' @description #' A suite of helper functions that sit between impact diagnostics functions and -#' plotting functions. These functions have some basic checks on the input data, -#' but otherwise assume that users will not modify inputs. +#' plotting functions. These functions transform and aggregate impact estimates +#' to prepare them for visualisation. Functions have basic checks on input data +#' but otherwise assume users will not modify inputs. #' -#' @param data A data.frame of impact estimates. +#' @param df2 A `` of impact estimates with at least columns +#' `modelling_group`, `vaccine`, outcome variable, and `fvps` (doses +#' delivered). Used as the primary data source for calculations in +#' [prep_plot_mod_grp_varn()]. #' -#' @param comparison A data.frame of impact estimates used as a comparator for -#' `comparison`. +#' @param df3 A `` of modelling group and vaccine combinations, +#' typically with one row per modelling group per vaccine. Joined with `df2` +#' to ensure complete group coverage in [prep_plot_mod_grp_varn()]. #' -#' @param outcome A string for the impact outcome; may be one of -#' "deaths_averted" or "dalys_averted". +#' @param data A `` of impact estimates with columns including at least +#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and +#' potentially outcome-specific columns (for [prep_plot_cumul()]). Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @param prev_data A `` of impact estimates from a previous touchstone, +#' used as a comparison baseline in [prep_plot_vax_gavi()]. Should have the +#' same structure as `data`. +#' +#' @param outcome A character string for the impact outcome. Must be one of +#' `"deaths_averted"` or `"dalys_averted"`. For [prep_plot_cumul()], +#' `data` must include columns named `{outcome}_old` and `{outcome}_new`. +#' +#' @param disease A character string specifying a single disease for filtering +#' in [prep_plot_cumul()]. +#' +#' @param touchstone_old A six-character touchstone identifier (YYYYMM format) +#' for the previous dataset. Defaults to [DEF_TOUCHSTONE_OLD]. Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @param touchstone_new A six-character touchstone identifier (YYYYMM format) +#' for the current dataset. Defaults to [DEF_TOUCHSTONE_NEW]. Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @importFrom rlang := #' #' @return #' -#' - [prep_plot_mod_grp_varn()] returns a `` TODO add +#' - [prep_plot_mod_grp_varn()] returns a grouped `` (grouped by +#' `vaccine`) with all columns from `df2` and `df3` plus derived columns: +#' `adj_outc` (adjusted outcome with small offset), `outcome_name` (input +#' outcome), and `mean_outc` (vaccine-level weighted mean outcome). +#' +#' - [prep_plot_vax_gavi()] returns a `` with columns `disease`, +#' `year`, `yearly_outcome`, `dataset` (factor with levels for old touchstone, +#' "Difference", and new touchstone), and `outcome_name`. Summarizes outcomes +#' by disease and year across two touchstones. +#' +#' - [prep_plot_cumul()] returns a `` with columns `year`, +#' `modelling_group`, `touchstone`, `value` (cumulative or average outcome), +#' `line_type` ("solid" for individual models, "dashed" for model average), +#' and `outcome_name`. Returns `NULL` if the specified disease has no non-zero +#' data to plot. #' #' @export prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) { - # TODO: df2 and df3 need informative names + checkmate::assert_tibble(df2, min.rows = 1L, min.cols = 1L) + checkmate::assert_tibble(df3, min.rows = 1L, min.cols = 1L) + + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) offset_manual <- 1e-6 df_combined <- dplyr::left_join( @@ -33,7 +78,8 @@ prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) { df_combined <- dplyr::mutate( df_combined, - adj_outc = {{ outcome }} + offset_manual + adj_outc = .data[[outcome]] + offset_manual, + outcome_name = outcome ) df_combined <- dplyr::group_by( @@ -51,17 +97,12 @@ prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) { #' @name plot_prep_impact_diagnostics #' -#' @param data description -#' -#' @param outcome -#' -#' @param disease -#' -#' @param touchstone_old -#' -#' @param touchstone_new +#' @param data A `` of impact estimates with columns including at least +#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and +#' potentially other columns for analysis. #' -#' @return [prep_plot_vax_gavi()] returns a ... TODO add +#' @param prev_data A `` of impact estimates from a previous touchstone, +#' used as a comparison baseline. Should have the same structure as `data`. #' #' @export prep_plot_vax_gavi <- function( @@ -89,15 +130,19 @@ prep_plot_vax_gavi <- function( df <- dplyr::filter( df, - dplyr::between(.data$year, 2021, 2024), - Negate(grepl("COVID", .data$disease, ignore.case = TRUE)) + dplyr::between(.data$year, 2021, 2024) + ) + + df <- dplyr::filter_out( + df, + grepl("COVID", .data$disease, ignore.case = TRUE) ) df <- dplyr::group_by(df, .data$disease, .data$year) df <- dplyr::summarise( df, - yearly_outcome = sum({{ outcome }}, na.rm = TRUE), + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), .groups = "drop" ) @@ -141,20 +186,15 @@ prep_plot_vax_gavi <- function( ) ) + df_combined$outcome_name <- outcome + df_combined } #' @name plot_prep_impact_diagnostics #' -#' @param data description -#' -#' @param outcome -#' -#' @param disease -#' -#' @param touchstone_old -#' -#' @param touchstone_new +#' @param disease A character string specifying a single disease for filtering +#' and analysis. #' #' @export prep_plot_cumul <- function( @@ -164,6 +204,12 @@ prep_plot_cumul <- function( touchstone_old = DEF_TOUCHSTONE_OLD, touchstone_new = DEF_TOUCHSTONE_NEW ) { + checkmate::assert_tibble(data) + checkmate::assert_subset( + outcome, + IMPACT_OUTCOMES + ) + outcome_cols <- colnames(data)[stringr::str_detect( colnames(data), glue::glue("^{outcome}_") @@ -177,6 +223,7 @@ prep_plot_cumul <- function( {{ COLNAMES_KEY_PRESSURE_TEST }}, {{ outcome_cols }} ) + combined2 <- combined2[combined2$disease == disease, ] combined2 <- tidyr::pivot_longer( combined2, @@ -193,9 +240,8 @@ prep_plot_cumul <- function( ), touchstone = dplyr::replace_values( .data$touchstone, - c("old", "new"), - as.character(c(touchstone_old, touchstone_new)), - .default = .data$touchstone + from = c("old", "new"), + to = as.character(c(touchstone_old, touchstone_new)) ), touchstone = factor( .data$touchstone, @@ -204,9 +250,8 @@ prep_plot_cumul <- function( ) # Cumulative values by modelling group - df_cum <- dplyr::filter(combined2, .data$disease == disease) df_cum <- dplyr::group_by( - df_cum, + combined2, .data$modelling_group, .data$touchstone ) @@ -217,7 +262,7 @@ prep_plot_cumul <- function( df_cum <- dplyr::arrange(df_cum, .data$year) df_cum <- dplyr::mutate( df_cum, - first_valid = min(.data$year[!is.na(data$value)]), + first_valid = min(.data$year[!is.na(.data$value)]), {{ cum_col }} := dplyr::if_else( .data$year < .data$first_valid, NA_real_, @@ -237,7 +282,7 @@ prep_plot_cumul <- function( df_cum, {{ avg_col }} := mean({{ cum_col }}, na.rm = TRUE), n_models = sum(!is.na({{ cum_col }})), - .groups = c("year", "touchstone") + .by = c("year", "touchstone") ) df_avg <- dplyr::filter( df_avg, @@ -280,10 +325,13 @@ prep_plot_cumul <- function( ) ) + # add outcome name + df_plot$outcome_name <- outcome + if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { message("No non-zero data to plot for ", disease, ". Skipping plot.") return(NULL) } - df_plot + tibble::as_tibble(df_plot) } diff --git a/R/fn_pressure_testing.R b/R/fn_pressure_testing.R index fbcc3fb..c5c9a1f 100644 --- a/R/fn_pressure_testing.R +++ b/R/fn_pressure_testing.R @@ -11,7 +11,8 @@ #' checked for contents #' #' @param threshold A six-digit number that is checked as a valid touchstone -#' identifier (YYYYMM format) using [validate_ts_year()]. +#' identifier (YYYYMM format) using [validate_ts_year()]. Defaults to +#' [DEF_TOUCHSTONE_NEW] (`"202310"`). #' #' @keywords impact_diagnostics #' @@ -31,7 +32,7 @@ #' (`NA` to non-`NA`) removed. #' #' @export -filter_recent_ts <- function(df, threshold = 202310) { +filter_recent_ts <- function(df, threshold = DEF_TOUCHSTONE_NEW) { checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) checkmate::assert_names( names(df), @@ -58,7 +59,10 @@ filter_recent_ts <- function(df, threshold = 202310) { #' @name pres_test_filter_data #' #' @export -filter_excluded_diseases_ts <- function(df, threshold = 202110) { +filter_excluded_diseases_ts <- function( + df, + threshold = DEF_TOUCHSTONE_OLD_OLD +) { checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) checkmate::assert_names( names(df), @@ -123,10 +127,9 @@ filter_invalid_trajectories <- function( ) { checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L) - # TODO: can we assume prev_data is at least the size of df? + # TODO: can we find checks for prev_data size in reln to df? rows? cols? checkmate::assert_data_frame( prev_data, - min.cols = ncol(df), min.rows = nrow(df) ) @@ -226,11 +229,11 @@ generate_diffs <- function( # check interest cols in dfs. key cols are check in `add_campaign_id` checkmate::assert_names( colnames(prev_df), - interest_cols + must.include = interest_cols ) checkmate::assert_names( colnames(curr_df), - interest_cols + must.include = interest_cols ) touchstone <- validate_ts_year(touchstone) @@ -267,7 +270,7 @@ generate_diffs <- function( interest_cols ) - changes + tibble::as_tibble(changes) } #' Generate IQR for key outcomes @@ -300,10 +303,9 @@ gen_national_iqr <- function( checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE) # NOTE: restricting value columns to deaths and dalys averted - value_cols <- rlang::arg_match( + checkmate::assert_subset( value_cols, - c("deaths_averted", "dalys_averted"), - multiple = TRUE + c("deaths_averted", "dalys_averted") ) checkmate::assert_string(prefix) @@ -325,6 +327,8 @@ gen_national_iqr <- function( ), .groups = "drop" ) + + tibble::as_tibble(df) } #' Flag significant changes in impact estimates @@ -341,7 +345,7 @@ gen_national_iqr <- function( #' [gen_national_iqr()]. #' #' @param variable A string specifying the variable of interest. Must be one of -#' "deaths_averted" and "dalys_averted", and must be present as a name and +#' "deaths_averted" or "dalys_averted", and must be present as a name and #' element of `changes_list`. #' #' @inheritParams gen_national_iqr @@ -370,7 +374,7 @@ flag_large_diffs <- function( touchstone_old = DEF_TOUCHSTONE_OLD_OLD, touchstone_new = DEF_TOUCHSTONE_NEW ) { - checkmate::assert_list(changes_list, "data.frame") + checkmate::assert_list(changes_list, c("data.frame", "NULL")) checkmate::assert_data_frame(iqr_df, min.rows = 1L, min.cols = 1L) variable <- rlang::arg_match(variable) @@ -454,7 +458,9 @@ flag_large_diffs <- function( rename_lookup ) - dplyr::arrange(df_compare, dplyr::desc(diff)) + df_compare <- dplyr::arrange(df_compare, dplyr::desc(diff)) + + tibble::as_tibble(df_compare) } #' Combine and align data from two touchstones @@ -492,15 +498,13 @@ gen_combined_df <- function( min.rows = 1L ) - # TODO: df2 needs a better name - prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) - cur_df <- dplyr::select(df2, {{ interest_cols }}) - - combined <- dplyr::full_join( - prev_df, - cur_df, - by = key_cols, - suffix = c("_old", "_new") + checkmate::assert_subset( + interest_cols, + COLNAMES_INTEREST_PRESSURE_TEST + ) + checkmate::assert_subset( + key_cols, + COLNAMES_KEY_PRESSURE_TEST ) cols_to_select <- c( @@ -517,10 +521,37 @@ gen_combined_df <- function( "dalys_averted_new" ) - dplyr::select( + checkmate::assert_names( + colnames(prev_dat), + must.include = c(interest_cols, key_cols) + ) + checkmate::assert_names( + colnames(df2), + must.include = c(interest_cols, key_cols) + ) + + # TODO: df2 needs a better name + prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) + cur_df <- dplyr::select(df2, {{ interest_cols }}) + + combined <- dplyr::full_join( + prev_df, + cur_df, + by = key_cols, + suffix = c("_old", "_new") + ) + + checkmate::assert_names( + colnames(combined), + must.include = cols_to_select + ) + + combined <- dplyr::select( combined, - {{ cols_to_select }} + dplyr::all_of(cols_to_select) ) + + tibble::as_tibble(combined) } #' Compare sub-regional and national estimates @@ -572,7 +603,7 @@ compare_natl_subreg <- function( subregional_summary <- dplyr::summarise( subregional_summary, subregional_mean = mean(.data[[outcome]], na.rm = TRUE), - subregional_iqr = IQR(.data[[outcome]], na.rm = TRUE), + subregional_iqr = stats::IQR(.data[[outcome]], na.rm = TRUE), .groups = "drop" ) @@ -614,7 +645,7 @@ compare_natl_subreg <- function( comparison <- dplyr::select(comparison, {{ cols_to_select }}) comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score)) - comparison + tibble::as_tibble(comparison) } #' Save pressure-testing diagnostics to local file @@ -654,6 +685,8 @@ compare_natl_subreg <- function( #' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the #' `"campaign"` activity type. #' +#' @param output_dir A writeable directory. Defaults to "./outputs". +#' #' @return None. Called for the convenience side-effect of saving data.frames as #' `.Rds` format. #' diff --git a/inst/WORDLIST b/inst/WORDLIST index 69da54f..e9a14dc 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -19,9 +19,12 @@ VIMC WIP WPP YLLs +YYYYMM autogenerated +dalys +erroring facetted ggplot +iqr tibble tibbles -timeseries diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd index 004cffd..035018f 100644 --- a/man/basic_burden_sanity.Rd +++ b/man/basic_burden_sanity.Rd @@ -18,4 +18,4 @@ estimates, with the length of the vector depending on how many checks fail. Helper function for sanity checks on burden estimate values. Checks whether any burden estimates are non-numeric, missing, or negative. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd index 5afb1f0..d5fecd9 100644 --- a/man/check_demography_alignment.Rd +++ b/man/check_demography_alignment.Rd @@ -27,4 +27,4 @@ modelled population size from the WPP-derived population estimates. Check the modelled disease burden data has similar population sizes as the provided population data. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd index ecf5cea..7f4c60e 100644 --- a/man/compare_natl_subreg.Rd +++ b/man/compare_natl_subreg.Rd @@ -27,4 +27,4 @@ impact is considered to be outside the tolerance limit. \description{ Compare sub-regional and national estimates } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/constants.Rd b/man/constants.Rd index 8faa19a..1d93a1f 100644 --- a/man/constants.Rd +++ b/man/constants.Rd @@ -10,6 +10,7 @@ \alias{colnames_df_missing_cols} \alias{COLNAMES_KEY_PRESSURE_TEST} \alias{COLNAMES_INTEREST_PRESSURE_TEST} +\alias{IMPACT_OUTCOMES} \alias{EXCLUDED_DISEASES} \alias{N_TS_MIN_CHARS} \alias{N_TS_YEAR_CHARS} @@ -20,6 +21,7 @@ \alias{DEF_TOUCHSTONE_OLD} \alias{DEF_TOUCHSTONE_NEW} \alias{DEF_TOUCHSTONE_OLD_OLD} +\alias{COLOUR_VIMC} \title{Package constants} \format{ An object of class \code{character} of length 5. @@ -36,6 +38,8 @@ An object of class \code{character} of length 7. An object of class \code{character} of length 14. +An object of class \code{character} of length 2. + An object of class \code{character} of length 4. An object of class \code{integer} of length 1. @@ -54,6 +58,8 @@ An object of class \code{character} of length 1. An object of class \code{character} of length 1. +An object of class \code{character} of length 1. + An object of class \code{character} of length 1. } \usage{ @@ -71,6 +77,8 @@ COLNAMES_KEY_PRESSURE_TEST COLNAMES_INTEREST_PRESSURE_TEST +IMPACT_OUTCOMES + EXCLUDED_DISEASES N_TS_MIN_CHARS @@ -90,9 +98,52 @@ DEF_TOUCHSTONE_OLD DEF_TOUCHSTONE_NEW DEF_TOUCHSTONE_OLD_OLD + +COLOUR_VIMC } \description{ -Package constants +Constant values used in \emph{vimcheck}. See the \strong{Examples} section for the +constant values. +} +\examples{ +file_dict_colnames + +scenario_data_colnames + +burden_outcome_names + +colnames_plot_demog_compare + +colnames_df_missing_cols + +COLNAMES_KEY_PRESSURE_TEST + +COLNAMES_INTEREST_PRESSURE_TEST + +IMPACT_OUTCOMES + +EXCLUDED_DISEASES + +N_TS_MIN_CHARS + +N_TS_YEAR_CHARS + +MIN_TS_YEAR + +MAX_TS_YEAR + +MIN_TS_MONTH + +MAX_TS_MONTH + +DEF_TOUCHSTONE_OLD + +DEF_TOUCHSTONE_NEW + +DEF_TOUCHSTONE_OLD_OLD + +COLOUR_VIMC + } \keyword{constants} \keyword{datasets} diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd index e0379b1..fcc5039 100644 --- a/man/flag_large_diffs.Rd +++ b/man/flag_large_diffs.Rd @@ -22,7 +22,7 @@ interest (see \code{variable}). Usually generated using \code{\link[=generate_di \code{\link[=gen_national_iqr]{gen_national_iqr()}}.} \item{variable}{A string specifying the variable of interest. Must be one of -"deaths_averted" and "dalys_averted", and must be present as a name and +"deaths_averted" or "dalys_averted", and must be present as a name and element of \code{changes_list}.} \item{group_cols}{A character vector of grouping columns. Defaults to @@ -47,4 +47,4 @@ between touchstones is greater than expected. A row is flagged if the difference is greater than \code{threshold} \eqn{\times} the inter-quartile range for cases where the IQR is greater than zero. } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd index 9a329d3..b3f53f0 100644 --- a/man/gen_combined_df.Rd +++ b/man/gen_combined_df.Rd @@ -31,4 +31,4 @@ are disambiguated with the suffixes \code{"_old"} and \code{"_new"}. \description{ Generates a full join of two data.frames, selecting for columns of interest. } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd index c722e66..4257953 100644 --- a/man/gen_national_iqr.Rd +++ b/man/gen_national_iqr.Rd @@ -31,4 +31,4 @@ using string interpolation. \description{ Generate IQR for key outcomes } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd index 17fcf32..4e4607e 100644 --- a/man/generate_diffs.Rd +++ b/man/generate_diffs.Rd @@ -36,4 +36,4 @@ with one list element per element of \code{interest_cols}. \description{ Explore significant changes in deaths and DALYs } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/plotting.Rd b/man/plot_burden_diagnostics.Rd similarity index 91% rename from man/plotting.Rd rename to man/plot_burden_diagnostics.Rd index 7f915a3..8431edd 100644 --- a/man/plotting.Rd +++ b/man/plot_burden_diagnostics.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fn_plotting_burden_diagnostics.R -\name{plotting} -\alias{plotting} +\name{plot_burden_diagnostics} +\alias{plot_burden_diagnostics} \alias{plot_compare_demography} \alias{plot_age_patterns} \alias{plot_global_burden_decades} @@ -55,8 +55,8 @@ A \verb{} object that can be printed to screen in the plot frame or saved to an output device (i.e., saved as an image file). } \description{ -Plotting functions for burden and impact diagnostics. All functions operate +Plotting functions for burden diagnostics. All functions operate on data prepared for plotting by a corresponding -\link[=plotting_prep]{plotting-preparation function}. +\link[=plot_prep_burden_diagnostics]{plotting-preparation function}. } \keyword{plotting} diff --git a/man/plot_impact_diagnostics.Rd b/man/plot_impact_diagnostics.Rd new file mode 100644 index 0000000..c195f97 --- /dev/null +++ b/man/plot_impact_diagnostics.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_plotting_impact_diagnostics.R +\name{plot_impact_diagnostics} +\alias{plot_impact_diagnostics} +\alias{plot_sig_diff} +\alias{plot_diff} +\alias{plot_modelling_group_variation} +\alias{plot_vaccine_gavi} +\alias{plot_cumul} +\title{Create impact diagnostics plots} +\usage{ +plot_sig_diff(data, outcome = IMPACT_OUTCOMES) + +plot_diff( + data, + outcome = IMPACT_OUTCOMES, + group_vars = IMPACT_GROUP_VARS, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) + +plot_modelling_group_variation(data) + +plot_vaccine_gavi(data) + +plot_cumul(data) +} +\arguments{ +\item{data}{A data.frame suitable for plotting. +\itemize{ +\item \code{plot_sig_diff()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{flag_large_diff()}}. +\item \code{plot_diff()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{gen_combined_df()}}. +\item \code{plot_modelling_group_variation()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_mod_grp_varn()}}. +\item \code{plot_vaccine_gavi()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_vax_gavi()}} +\item \code{plot_cumul()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_cumul()}} +}} + +\item{outcome}{A string for the impact outcome. One of \link{IMPACT_OUTCOMES}.} + +\item{group_vars}{A single string for the grouping variables. May be any of +\link{IMPACT_OUTCOMES}, which are \code{"activity_type"} and \code{"vaccine"}.} + +\item{touchstone_old}{A string for the previous touchstone in +format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_OLD}.} + +\item{touchstone_new}{A string for the current or new touchstone in +format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_NEW}.} +} +\value{ +A \verb{} object that can be viewed or saved. +} +\description{ +Functions that create impact diagnostics plots (or plotting objects). All +functions are associated with one other upstream data processing function, +and can be used in a pipe with that function. Where appropriate, outcome +selection and label preparation is automated to reduce function arguments. + +Plotting functions for impact diagnostics. See +\link[=plot_prep_impact_diagnostics]{plotting-preparation functions} for a set of +helper functions that prepare impact diagnostics for plotting. +} diff --git a/man/plotting_prep.Rd b/man/plot_prep_burden_diagnostics.Rd similarity index 87% rename from man/plotting_prep.Rd rename to man/plot_prep_burden_diagnostics.Rd index f8124bc..3afa707 100644 --- a/man/plotting_prep.Rd +++ b/man/plot_prep_burden_diagnostics.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fn_plotting_prep_bur_diag.R -\name{plotting_prep} -\alias{plotting_prep} +\name{plot_prep_burden_diagnostics} +\alias{plot_prep_burden_diagnostics} \alias{prep_plot_demography} \alias{prep_plot_age} \alias{prep_plot_burden_decades} @@ -53,6 +53,8 @@ column "burden_outcome", and a list column of tibbles "burden_data". } \description{ Transform burden estimate data from modelling groups to make them suitable -for plotting using an appropriate \link[=plotting]{plotting function}. Each -preparation function corresponds to a plotting function. +for plotting using an appropriate +\link[=plot_prep_burden_diagnostics]{plotting function}. Each preparation function +corresponds to a plotting function. } +\keyword{plot_prep_burden_diagnostics} diff --git a/man/plot_prep_impact_diagnostics.Rd b/man/plot_prep_impact_diagnostics.Rd new file mode 100644 index 0000000..7afc4f1 --- /dev/null +++ b/man/plot_prep_impact_diagnostics.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_plotting_prep_impact_diagnostics.R +\name{plot_prep_impact_diagnostics} +\alias{plot_prep_impact_diagnostics} +\alias{prep_plot_mod_grp_varn} +\alias{prep_plot_vax_gavi} +\alias{prep_plot_cumul} +\title{Prepare impact diagnostics for plotting} +\usage{ +prep_plot_mod_grp_varn(df2, df3, outcome = IMPACT_OUTCOMES) + +prep_plot_vax_gavi( + data, + prev_data, + outcome = IMPACT_OUTCOMES, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) + +prep_plot_cumul( + data, + outcome, + disease, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) +} +\arguments{ +\item{df2}{A \verb{} of impact estimates with at least columns +\code{modelling_group}, \code{vaccine}, outcome variable, and \code{fvps} (doses +delivered). Used as the primary data source for calculations in +\code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.} + +\item{df3}{A \verb{} of modelling group and vaccine combinations, +typically with one row per modelling group per vaccine. Joined with \code{df2} +to ensure complete group coverage in \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.} + +\item{outcome}{A character string for the impact outcome. Must be one of +\code{"deaths_averted"} or \code{"dalys_averted"}. For \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}, +\code{data} must include columns named \verb{\{outcome\}_old} and \verb{\{outcome\}_new}.} + +\item{data}{A \verb{} of impact estimates with columns including at least +those in \link{COLNAMES_KEY_PRESSURE_TEST}, the outcome variable, and +potentially other columns for analysis.} + +\item{prev_data}{A \verb{} of impact estimates from a previous touchstone, +used as a comparison baseline. Should have the same structure as \code{data}.} + +\item{touchstone_old}{A six-character touchstone identifier (YYYYMM format) +for the previous dataset. Defaults to \link{DEF_TOUCHSTONE_OLD}. Used in +\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.} + +\item{touchstone_new}{A six-character touchstone identifier (YYYYMM format) +for the current dataset. Defaults to \link{DEF_TOUCHSTONE_NEW}. Used in +\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.} + +\item{disease}{A character string specifying a single disease for filtering +and analysis.} +} +\value{ +\itemize{ +\item \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}} returns a grouped \verb{} (grouped by +\code{vaccine}) with all columns from \code{df2} and \code{df3} plus derived columns: +\code{adj_outc} (adjusted outcome with small offset), \code{outcome_name} (input +outcome), and \code{mean_outc} (vaccine-level weighted mean outcome). +\item \code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} returns a \verb{} with columns \code{disease}, +\code{year}, \code{yearly_outcome}, \code{dataset} (factor with levels for old touchstone, +"Difference", and new touchstone), and \code{outcome_name}. Summarizes outcomes +by disease and year across two touchstones. +\item \code{\link[=prep_plot_cumul]{prep_plot_cumul()}} returns a \verb{} with columns \code{year}, +\code{modelling_group}, \code{touchstone}, \code{value} (cumulative or average outcome), +\code{line_type} ("solid" for individual models, "dashed" for model average), +and \code{outcome_name}. Returns \code{NULL} if the specified disease has no non-zero +data to plot. +} +} +\description{ +A suite of helper functions that sit between impact diagnostics functions and +plotting functions. These functions transform and aggregate impact estimates +to prepare them for visualisation. Functions have basic checks on input data +but otherwise assume users will not modify inputs. +} diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd index fe46d78..0e95add 100644 --- a/man/plotting_theme.Rd +++ b/man/plotting_theme.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_plotting_burden_diagnostics.R +% Please edit documentation in R/fn_plotting_helpers.R \name{plotting_theme} \alias{plotting_theme} \alias{theme_vimc} diff --git a/man/pres_test_filter_data.Rd b/man/pres_test_filter_data.Rd index 003617a..27b1d43 100644 --- a/man/pres_test_filter_data.Rd +++ b/man/pres_test_filter_data.Rd @@ -8,9 +8,9 @@ \alias{filter_invalid_trajectories} \title{Filter data for touchstones or diseases} \usage{ -filter_recent_ts(df, threshold = 202310) +filter_recent_ts(df, threshold = DEF_TOUCHSTONE_NEW) -filter_excluded_diseases_ts(df, threshold = 202110) +filter_excluded_diseases_ts(df, threshold = DEF_TOUCHSTONE_OLD_OLD) filter_duplicates(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) @@ -25,7 +25,8 @@ filter_invalid_trajectories( checked for contents} \item{threshold}{A six-digit number that is checked as a valid touchstone -identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}.} +identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}. Defaults to +\link{DEF_TOUCHSTONE_NEW} (\code{"202310"}).} \item{key_cols}{Key columns in \code{df} to check for duplicates.} @@ -53,4 +54,4 @@ to the \link{EXCLUDED_DISEASES}, when the touchstone year in \code{df} is less t A pair of helper functions allowing filtering out of recent touchstone values and excluded diseases. } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd index db41a66..27e513d 100644 --- a/man/save_outputs.Rd +++ b/man/save_outputs.Rd @@ -47,6 +47,8 @@ with the outcome \code{"dalys_averted"}.} \item{subregional_flags_dalys_rout}{A data.frame that is the output of \code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the \code{"campaign"} activity type.} + +\item{output_dir}{A writeable directory. Defaults to "./outputs".} } \value{ None. Called for the convenience side-effect of saving data.frames as @@ -57,4 +59,4 @@ Save pressure-testing diagnostics data.frames to local compressed files in the \code{.Rds} format. Input data.frames are generated by other package functions and are not checked here. } -\keyword{pressure_testing} +\keyword{impact_diagnostics} diff --git a/man/significant_diff_plot.Rd b/man/significant_diff_plot.Rd deleted file mode 100644 index 11b7cb4..0000000 --- a/man/significant_diff_plot.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_plotting_pressure_testing.R -\name{significant_diff_plot} -\alias{significant_diff_plot} -\title{Plot significant changes} -\usage{ -significant_diff_plot(df, outcome) -} -\description{ -Plot significant changes -} -\keyword{internal} diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd index 75d3cd9..dcdfb7b 100644 --- a/man/validate_complete_incoming_files.Rd +++ b/man/validate_complete_incoming_files.Rd @@ -20,4 +20,4 @@ This function expects that incoming burden files are in the directory given by \code{path_burden}, which holds a file dictionary which maps each data file to a specific scenario. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd index af2d770..e84f3f7 100644 --- a/man/validate_file_dict_template.Rd +++ b/man/validate_file_dict_template.Rd @@ -25,4 +25,4 @@ scenarios i.e. the number of files that we expect from a model. Users should populate the file column to match the scenario-file. This function will run if a \code{file_dictionary.csv} file does not exist } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/validate_template_alignment.Rd b/man/validate_template_alignment.Rd index 7bbccc6..c6c9e36 100644 --- a/man/validate_template_alignment.Rd +++ b/man/validate_template_alignment.Rd @@ -19,4 +19,4 @@ against \code{template}, with information on missing and extra data. \description{ Identify extra and missing columns and rows in burden data. } -\keyword{diagnostics} +\keyword{burden_diagnostics} From 386be9b6a1aa618626cf0f0a1eee5adc76a6ddb8 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 22 Apr 2026 15:58:57 +0100 Subject: [PATCH 16/17] Update infra, bump to v0.0.4 --- DESCRIPTION | 6 +++++- NAMESPACE | 31 ++++++++++++++++++++++++++++++- NEWS.md | 8 ++++++++ inst/WORDLIST | 1 + 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 93969a5..d96f7e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: vimcheck Title: Diagnostics for Vaccine Impact Modelling Consortium Burden and Impact Estimates -Version: 0.0.3 +Version: 0.0.4 Authors@R: c( person("Pratik", "Gupte", , "p.gupte24@imperial.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5294-7819")), @@ -24,13 +24,17 @@ Depends: Imports: checkmate, cli, + diffdf, dplyr, forcats, ggplot2, + ggridges, glue, + here, readr, rlang, scales, + stats, stringr, tidyr Suggests: diff --git a/NAMESPACE b/NAMESPACE index 6a9f7c8..7caa5f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,24 @@ # Generated by roxygen2: do not edit by hand +export(COLNAMES_INTEREST_PRESSURE_TEST) +export(COLNAMES_KEY_PRESSURE_TEST) +export(COLOUR_VIMC) +export(DEF_TOUCHSTONE_NEW) +export(DEF_TOUCHSTONE_OLD) +export(DEF_TOUCHSTONE_OLD_OLD) +export(EXCLUDED_DISEASES) +export(IMPACT_OUTCOMES) +export(MAX_TS_MONTH) +export(MAX_TS_YEAR) +export(MIN_TS_MONTH) +export(MIN_TS_YEAR) +export(N_TS_MIN_CHARS) +export(N_TS_YEAR_CHARS) export(basic_burden_sanity) +export(burden_outcome_names) export(check_demography_alignment) +export(colnames_df_missing_cols) +export(colnames_plot_demog_compare) export(compare_natl_subreg) export(file_dict_colnames) export(filter_duplicates) @@ -15,17 +32,25 @@ export(generate_diffs) export(plot_age_patterns) export(plot_compare_demography) export(plot_coverage_set) +export(plot_cumul) +export(plot_diff) export(plot_fvp) export(plot_global_burden) export(plot_global_burden_decades) +export(plot_modelling_group_variation) +export(plot_sig_diff) +export(plot_vaccine_gavi) export(prep_plot_age) export(prep_plot_burden_decades) export(prep_plot_coverage_set) +export(prep_plot_cumul) export(prep_plot_demography) export(prep_plot_fvp) export(prep_plot_global_burden) +export(prep_plot_mod_grp_varn) +export(prep_plot_vax_gavi) export(save_outputs) -export(significant_diff_plot) +export(scenario_data_colnames) export(theme_vimc) export(theme_vimc_noxaxis) export(validate_complete_incoming_files) @@ -37,6 +62,8 @@ importFrom(ggplot2,facet_grid) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_col) importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) importFrom(ggplot2,ggplot) importFrom(ggplot2,label_wrap_gen) importFrom(ggplot2,labeller) @@ -44,5 +71,7 @@ importFrom(ggplot2,labs) importFrom(ggplot2,scale_fill_distiller) importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) importFrom(ggplot2,vars) +importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md index 81b8136..b7f6fc8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# vimcheck 0.0.4 + +- Added impact diagnostics functions in `R/fn_impact_diagnostics.R`. + +- Added plotting preparation functions and plotting functions in `R/fn_plotting_prep_impact_diagnostics.R` and `R/fn_plotting_impact_diagnostics.R`. + +- Added dependencies _diffdf_ and _here_. + # vimcheck 0.0.3 - Separated data-prep for plotting from plotting functions. diff --git a/inst/WORDLIST b/inst/WORDLIST index e9a14dc..bc3ac78 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -22,6 +22,7 @@ YLLs YYYYMM autogenerated dalys +diffdf erroring facetted ggplot From 43753fb05464dd0b45026651af99ba470f89e237 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 22 Apr 2026 16:10:24 +0100 Subject: [PATCH 17/17] Rename fn file, fix arg name --- ...sure_testing.R => fn_impact_diagnostics.R} | 21 +++++++++---------- man/compare_natl_subreg.Rd | 2 +- ...t_filter_data.Rd => filter_impact_data.Rd} | 6 +++--- man/flag_large_diffs.Rd | 2 +- man/gen_combined_df.Rd | 8 +++---- man/gen_national_iqr.Rd | 2 +- man/generate_diffs.Rd | 2 +- man/save_outputs.Rd | 2 +- 8 files changed, 22 insertions(+), 23 deletions(-) rename R/{fn_pressure_testing.R => fn_impact_diagnostics.R} (98%) rename man/{pres_test_filter_data.Rd => filter_impact_data.Rd} (94%) diff --git a/R/fn_pressure_testing.R b/R/fn_impact_diagnostics.R similarity index 98% rename from R/fn_pressure_testing.R rename to R/fn_impact_diagnostics.R index c5c9a1f..a79a22d 100644 --- a/R/fn_pressure_testing.R +++ b/R/fn_impact_diagnostics.R @@ -1,7 +1,7 @@ #' Filter data for touchstones or diseases #' -#' @name pres_test_filter_data -#' @rdname pres_test_filter_data +#' @name filter_impact_data +#' @rdname filter_impact_data #' #' @description #' A pair of helper functions allowing filtering out of recent touchstone values @@ -56,7 +56,7 @@ filter_recent_ts <- function(df, threshold = DEF_TOUCHSTONE_NEW) { } } -#' @name pres_test_filter_data +#' @name filter_impact_data #' #' @export filter_excluded_diseases_ts <- function( @@ -81,7 +81,7 @@ filter_excluded_diseases_ts <- function( } } -#' @name pres_test_filter_data +#' @name filter_impact_data #' #' @param key_cols Key columns in `df` to check for duplicates. #' @@ -111,7 +111,7 @@ filter_duplicates <- function(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) { dplyr::filter(df, .data$n_key > 1) } -#' @name pres_test_filter_data +#' @name filter_impact_data #' #' @param prev_data A `` holding data from a previous touchstone for #' the same scenarios as `df`. @@ -471,7 +471,7 @@ flag_large_diffs <- function( #' @param prev_dat A data.frame of impact estimates corresponding to an earlier #' touchstone. #' -#' @param df2 A data.frame of impact estimates corresponding to a more recent +#' @param df_clean A data.frame of impact estimates corresponding to a more recent #' touchstone. #' #' @param interest_cols A character vector of columns of interest. Defaults to @@ -480,7 +480,7 @@ flag_large_diffs <- function( #' @param key_cols A character vector of columns of interest. Defaults to #' [COLNAMES_KEY_PRESSURE_TEST]. #' -#' @return A data.frame which is a full join of `prev_dat` and `df2`. Columns +#' @return A data.frame which is a full join of `prev_dat` and `df_clean`. Columns #' are disambiguated with the suffixes `"_old"` and `"_new"`. #' #' @keywords impact_diagnostics @@ -488,7 +488,7 @@ flag_large_diffs <- function( #' @export gen_combined_df <- function( prev_dat, - df2, + df_clean, interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, key_cols = COLNAMES_KEY_PRESSURE_TEST ) { @@ -526,13 +526,12 @@ gen_combined_df <- function( must.include = c(interest_cols, key_cols) ) checkmate::assert_names( - colnames(df2), + colnames(df_clean), must.include = c(interest_cols, key_cols) ) - # TODO: df2 needs a better name prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) - cur_df <- dplyr::select(df2, {{ interest_cols }}) + cur_df <- dplyr::select(df_clean, {{ interest_cols }}) combined <- dplyr::full_join( prev_df, diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd index 7f4c60e..395d116 100644 --- a/man/compare_natl_subreg.Rd +++ b/man/compare_natl_subreg.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{compare_natl_subreg} \alias{compare_natl_subreg} \title{Compare sub-regional and national estimates} diff --git a/man/pres_test_filter_data.Rd b/man/filter_impact_data.Rd similarity index 94% rename from man/pres_test_filter_data.Rd rename to man/filter_impact_data.Rd index 27b1d43..da36793 100644 --- a/man/pres_test_filter_data.Rd +++ b/man/filter_impact_data.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R -\name{pres_test_filter_data} -\alias{pres_test_filter_data} +% Please edit documentation in R/fn_impact_diagnostics.R +\name{filter_impact_data} +\alias{filter_impact_data} \alias{filter_recent_ts} \alias{filter_excluded_diseases_ts} \alias{filter_duplicates} diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd index fcc5039..f54488d 100644 --- a/man/flag_large_diffs.Rd +++ b/man/flag_large_diffs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{flag_large_diffs} \alias{flag_large_diffs} \title{Flag significant changes in impact estimates} diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd index b3f53f0..13dd05d 100644 --- a/man/gen_combined_df.Rd +++ b/man/gen_combined_df.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{gen_combined_df} \alias{gen_combined_df} \title{Combine and align data from two touchstones} \usage{ gen_combined_df( prev_dat, - df2, + df_clean, interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, key_cols = COLNAMES_KEY_PRESSURE_TEST ) @@ -15,7 +15,7 @@ gen_combined_df( \item{prev_dat}{A data.frame of impact estimates corresponding to an earlier touchstone.} -\item{df2}{A data.frame of impact estimates corresponding to a more recent +\item{df_clean}{A data.frame of impact estimates corresponding to a more recent touchstone.} \item{interest_cols}{A character vector of columns of interest. Defaults to @@ -25,7 +25,7 @@ touchstone.} \link{COLNAMES_KEY_PRESSURE_TEST}.} } \value{ -A data.frame which is a full join of \code{prev_dat} and \code{df2}. Columns +A data.frame which is a full join of \code{prev_dat} and \code{df_clean}. Columns are disambiguated with the suffixes \code{"_old"} and \code{"_new"}. } \description{ diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd index 4257953..853a8b0 100644 --- a/man/gen_national_iqr.Rd +++ b/man/gen_national_iqr.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{gen_national_iqr} \alias{gen_national_iqr} \title{Generate IQR for key outcomes} diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd index 4e4607e..03a8500 100644 --- a/man/generate_diffs.Rd +++ b/man/generate_diffs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{generate_diffs} \alias{generate_diffs} \title{Explore significant changes in deaths and DALYs} diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd index 27e513d..0b2afbe 100644 --- a/man/save_outputs.Rd +++ b/man/save_outputs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fn_pressure_testing.R +% Please edit documentation in R/fn_impact_diagnostics.R \name{save_outputs} \alias{save_outputs} \title{Save pressure-testing diagnostics to local file}