From 809243771bcafd2db1c480d4a3dbc0c02ac330dc Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Thu, 18 Dec 2025 13:58:43 +0000 Subject: [PATCH 1/6] Import standalone tools from reside.utils --- R/import-standalone-utils-assert-path.R | 157 +++++++++++++++ R/import-standalone-utils-assert.R | 251 ++++++++++++++++++++++++ 2 files changed, 408 insertions(+) create mode 100644 R/import-standalone-utils-assert-path.R create mode 100644 R/import-standalone-utils-assert.R diff --git a/R/import-standalone-utils-assert-path.R b/R/import-standalone-utils-assert-path.R new file mode 100644 index 0000000..673fb8e --- /dev/null +++ b/R/import-standalone-utils-assert-path.R @@ -0,0 +1,157 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert-path.R +# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert-path") +# ---------------------------------------------------------------------- +# +# --- +# repo: reside/reside.utils +# file: standalone-utils-assert-path.R +# dependencies: standalone-utils-assert.R +# imports: [cli, fs] +# --- +assert_file_exists <- function(files, name = "File", call = parent.frame(), + arg = NULL) { + err <- !file.exists(files) + ## TODO: throughout this file it would be nice to use cli's '.file' + ## class and ector contraction, *but* it renders poorly on default + ## black backgfrounds (dark blue) and makes testing a bit harder + ## because the rendering depends on cli options. + ## + ## TODO: add a canonical case check, as for the relative path bit. + if (any(err)) { + ## Because we interpolate both 'name' and the file list, we need + ## to disambiguate the quantity. + n <- cli::qty(sum(err)) + cli::cli_abort( + "{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + call = call, arg = arg) + } +} + + +assert_file_exists_relative <- function(files, workdir, name, + call = parent.frame(), + arg = NULL) { + assert_relative_path(files, name, workdir, call) + + assert_character(files, name, call = call) + err <- !file_exists(files, workdir = workdir) + if (any(err)) { + n <- cli::qty(sum(err)) + cli::cli_abort( + c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + i = "Looked within directory '{workdir}'"), + call = call) + } + + files_canonical <- file_canonical_case(files, workdir) + err <- is.na(files_canonical) | fs::path(files) != files_canonical + if (any(err)) { + i <- err & !is.na(files_canonical) + hint_case <- sprintf("For '%s', did you mean '%s'?", + files[i], files_canonical[i]) + names(hint_case) <- rep("i", length(hint_case)) + n <- cli::qty(sum(err)) + cli::cli_abort( + c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + hint_case, + i = paste("If you don't use the canonical case for a file, your code", + "is not portable across different platforms"), + i = "Looked within directory '{workdir}'"), + call = call) + } +} + + +assert_is_directory <- function(path, name = "Directory", call = parent.frame(), + arg = NULL) { + assert_scalar_character(path, arg = arg, call = call) + assert_file_exists(path, name = name, arg = arg, call = call) + if (!fs::is_dir(path)) { + cli::cli_abort("Path exists but is not a directory: {path}", + call = call, arg = arg) + } +} + + +assert_relative_path <- function(files, name, workdir, call = parent.frame(), + arg = NULL) { + err <- fs::is_absolute_path(files) + if (any(err)) { + n <- cli::qty(sum(err)) + files_err <- files[err] + names(files_err) <- rep("x", length(files_err)) + cli::cli_abort( + c("{name}{n}{?s} must be {?a/} relative path{?s}", + files_err, + i = "Path was relative to directory '{workdir}'"), + call = call, arg = arg) + } + + err <- vapply(fs::path_split(files), function(x) any(x == ".."), TRUE) + if (any(err)) { + n <- cli::qty(sum(err)) + files_err <- files[err] + names(files_err) <- rep("x", length(files_err)) + cli::cli_abort( + c("{name}{n}{?s} must not contain '..' (parent directory) components", + files_err, + i = "Path was relative to directory '{workdir}'"), + call = call, arg = arg) + } +} + + +assert_directory_does_not_exist <- function(x, name = "Directory", arg = NULL, + call = parent.frame()) { + ok <- !fs::dir_exists(x) + if (!all(ok)) { + cli::cli_abort("{name}{?s} already exists: {format_file_list(x[!ok])}", + call = call, arg = arg) + } + invisible(x) +} + + +file_canonical_case <- function(path, workdir) { + if (length(path) != 1) { + return(vapply(path, file_canonical_case, "", workdir, USE.NAMES = FALSE)) + } + stopifnot(!fs::is_absolute_path(path)) + path_split <- fs::path_split(path)[[1]] + base <- workdir + ret <- character(length(path_split)) + for (i in seq_along(path_split)) { + pos <- dir(base, all.files = TRUE, no.. = TRUE) + el <- path_split[[i]] + j <- which(tolower(el) == tolower(pos)) + if (length(j) == 1) { + el <- pos[[j]] + } else if (el %in% pos) { + # We might want to warn here? + # message("Multiple casings present; this is not portable") + } else { + return(NA_character_) + } + ret[[i]] <- el + base <- file.path(base, el) + } + paste(ret, collapse = "/") +} + + +file_exists <- function(..., workdir = NULL) { + files <- c(...) + if (!is.null(workdir)) { + assert_scalar_character(workdir) + owd <- setwd(workdir) # nolint + on.exit(setwd(owd)) # nolint + } + fs::file_exists(files) +} + + +format_file_list <- function(x) { + cli::cli_vec(sprintf("'%s'", x), + style = list("vec-sep2" = ", ", "vec-last" = ", ")) +} diff --git a/R/import-standalone-utils-assert.R b/R/import-standalone-utils-assert.R new file mode 100644 index 0000000..441503d --- /dev/null +++ b/R/import-standalone-utils-assert.R @@ -0,0 +1,251 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert.R +# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert") +# ---------------------------------------------------------------------- +# +# --- +# repo: reside/reside.utils +# file: standalone-utils-assert.R +# imports: cli +# --- +assert_scalar <- function(x, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (length(x) != 1) { + cli::cli_abort( + c("'{name}' must be a scalar", + i = "{name} has length {length(x)}"), + call = call, arg = arg) + } + invisible(x) +} + + +assert_character <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.character(x)) { + cli::cli_abort("Expected '{name}' to be character", call = call, arg = arg) + } + invisible(x) +} + + +assert_numeric <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.numeric(x)) { + cli::cli_abort("Expected '{name}' to be numeric", call = call, arg = arg) + } + invisible(x) +} + + +assert_integer <- function(x, name = deparse(substitute(x)), + tolerance = NULL, arg = name, + call = parent.frame()) { + if (is.numeric(x)) { + rx <- round(x) + if (is.null(tolerance)) { + tolerance <- sqrt(.Machine$double.eps) + } + if (!isTRUE(all.equal(x, rx, tolerance = tolerance))) { + cli::cli_abort( + c("Expected '{name}' to be integer", + i = paste("{cli::qty(length(x))}The provided", + "{?value was/values were} numeric, but not very close", + "to integer values")), + arg = arg, call = call) + } + x <- as.integer(rx) + } else { + cli::cli_abort("Expected '{name}' to be integer", call = call, arg = arg) + } + invisible(x) +} + + +assert_logical <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.logical(x)) { + cli::cli_abort("Expected '{name}' to be logical", arg = arg, call = call) + } + invisible(x) +} + + +assert_nonmissing <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (anyNA(x)) { + cli::cli_abort("Expected '{name}' to be non-NA", arg = arg, call = call) + } + invisible(x) +} + + +assert_scalar_character <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_character(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_numeric <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_numeric(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_integer <- function(x, name = deparse(substitute(x)), + tolerance = NULL, allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_integer(x, name, tolerance = tolerance, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_logical <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_logical(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_size <- function(x, allow_zero = TRUE, allow_null = FALSE, + name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar_integer(x, name = name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) + min <- if (allow_zero) 0 else 1 + if (x < min) { + cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) + } + invisible(x) +} + + +assert_length <- function(x, len, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (length(x) != len) { + cli::cli_abort( + "Expected '{name}' to have length {len}, but was length {length(x)}", + arg = arg, call = call) + } + invisible(x) +} + + +assert_is <- function(x, what, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (!inherits(x, what)) { + cli::cli_abort("Expected '{name}' to be a '{what}' object", + arg = arg, call = call) + } + invisible(x) +} + + +assert_list <- function(x, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (!is.list(x)) { + cli::cli_abort("Expected '{name}' to be a list", + arg = arg, call = call) + } + invisible(x) +} + + +assert_scalar_positive_numeric <- function(x, allow_zero = TRUE, + name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + assert_scalar_numeric(x, name = name, call = call) + if (allow_zero) { + if (x < 0) { + cli::cli_abort("'{name}' must be at least 0", arg = arg, call = call) + } + } else { + if (x <= 0) { + cli::cli_abort("'{name}' must be greater than 0", arg = arg, call = call) + } + } + invisible(x) +} + + +assert_scalar_positive_integer <- function(x, allow_zero = TRUE, + name = deparse(substitute(x)), + tolerance = NULL, arg = name, + call = parent.frame()) { + assert_scalar_integer(x, name, tolerance = tolerance, arg = arg, call = call) + min <- if (allow_zero) 0 else 1 + if (x < min) { + cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) + } + invisible(x) +} + + +assert_raw <- function(x, len = NULL, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.raw(x)) { + cli::cli_abort("'{name}' must be a raw vector", arg = arg, call = call) + } + if (!is.null(len)) { + assert_length(x, len, name = name, call = call) + } + invisible(x) +} + + +assert_named <- function(x, unique = FALSE, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + nms <- names(x) + if (is.null(nms)) { + cli::cli_abort("'{name}' must be named", call = call, arg = arg) + } + if (anyNA(nms) || any(nms == "")) { + cli::cli_abort("All elements of '{name}' must be named", + call = call, arg = arg) + } + if (unique && anyDuplicated(names(x))) { + dups <- sprintf("'%s'", unique(names(x)[duplicated(names(x))])) + cli::cli_abort( + c("'{name}' must have unique names", + i = "Found {length(dups)} duplicate{?s}: {dups}"), + call = call, arg = arg) + } + invisible(x) +} + + +match_value <- function(x, choices, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + assert_scalar_character(x, call = call, name = name, arg = arg) + if (!(x %in% choices)) { + choices_str <- paste(sprintf("'%s'", choices), collapse = ", ") + cli::cli_abort(c("'{name}' must be one of {choices_str}", + i = "Instead we were given '{x}'"), call = call, + arg = arg) + } + x +} From beb7189718a1419e272219b77004608b0d491a62 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Thu, 18 Dec 2025 14:15:48 +0000 Subject: [PATCH 2/6] Add initial diagnostic and plotting functions --- .Rbuildignore | 1 + .gitignore | 1 + DESCRIPTION | 9 + NAMESPACE | 16 + R/burden_diagnositics.R | 512 ++++++++++++++++++++++++ R/constants.R | 14 + R/helpers.R | 36 ++ R/plotting.R | 315 +++++++++++++++ R/vimcheck.R | 4 +- _pkgdown.yml | 22 + man/basic_burden_sanity.Rd | 21 + man/check_demography_alignment.Rd | 19 + man/check_template_alignment.Rd | 17 + man/constants.Rd | 17 + man/helpers.Rd | 23 ++ man/plotting.Rd | 49 +++ man/plotting_theme.Rd | 23 ++ man/validate_complete_incoming_files.Rd | 23 ++ man/validate_file_dict_template.Rd | 28 ++ man/vimcheck-package.Rd | 2 +- 20 files changed, 1150 insertions(+), 2 deletions(-) create mode 100644 R/burden_diagnositics.R create mode 100644 R/constants.R create mode 100644 R/helpers.R create mode 100644 R/plotting.R create mode 100644 man/basic_burden_sanity.Rd create mode 100644 man/check_demography_alignment.Rd create mode 100644 man/check_template_alignment.Rd create mode 100644 man/constants.Rd create mode 100644 man/helpers.Rd create mode 100644 man/plotting.Rd create mode 100644 man/plotting_theme.Rd create mode 100644 man/validate_complete_incoming_files.Rd create mode 100644 man/validate_file_dict_template.Rd diff --git a/.Rbuildignore b/.Rbuildignore index b5a39b8..42d9471 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^pkgdown$ ^tools$ ^scratch\.R$ +^scratch$ diff --git a/.gitignore b/.gitignore index d224dad..1ecff28 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,4 @@ rsconnect/ inst/doc docs scratch.R +scratch/ diff --git a/DESCRIPTION b/DESCRIPTION index 2f7f186..ce362e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,3 +31,12 @@ Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +Imports: + cli, + dplyr, + fs, + ggplot2, + glue, + readr, + rlang, + tidyr diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..2d4bceb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,18 @@ # Generated by roxygen2: do not edit by hand +export(basic_burden_sanity) +export(check_demography_alignment) +export(check_template_alignment) +export(const_data_colnames) +export(impact_check) +export(plot_age_patterns) +export(plot_compare_demography) +export(plot_coverage_set) +export(plot_fvp) +export(plot_global_burden) +export(plot_global_burden_decades) +export(theme_vimc) +export(transfrom_coverage_fvps) +export(validate_complete_incoming_files) +export(validate_file_dict_template) +importFrom(ggplot2,ggplot) diff --git a/R/burden_diagnositics.R b/R/burden_diagnositics.R new file mode 100644 index 0000000..eac40de --- /dev/null +++ b/R/burden_diagnositics.R @@ -0,0 +1,512 @@ +#' Validate file dictionary template +#' +#' @description +#' Function to create a `file_dictionary` template. +#' It maps to touchstone disease scenarios and you will see expected number of +#' scenarios i.e. the number of files that we expect from a model. +#' Users should populate the file column to match the scenario-file. +#' This function will run if a `file_dictionary.csv` file does not exist +#' +#' @param disease A disease identifier. +#' +#' @param path_burden A directory with burden estimate data. +#' +#' @return Nothing; called primarily for its side-effets. +#' If the file `path_burden/file_dictionary.csv` does not exist, a file +#' dictionary CSV file is written to the same location. +#' Prints a message to screen informing the user whether any action has been +#' taken. +#' +#' @keywords diagnostics +#' +#' @examples +#' +#' @export +validate_file_dict_template <- function( + disease, + path_burden = "incoming_burden_estimates" +) { + # TODO: check conditions on arg disease - what is the original source `pars`? + + assert_is_directory(path_burden) + template <- file.path(path_burden, "file_dictionary.csv") + + if (file.exists(template)) { + cli::cli_inform( + "File dictionary found at {.file {template}}, no action needed." + ) + } else { + # TODO: resolve magic strings + scenario_dir <- "model_inputs" + checkmate::assert_directory_exists(scenario_dir) + scenario_data <- file.path(scenario_dir, "scenario.csv") + checkmate::assert_file_exists(scenario_data) + + sce <- readr::read_csv(scenario_data, show_col_types = FALSE) + + # NOTE: consider wrapping into check function + checkmate::assert_data_frame( + sce, + any.missing = FALSE, + min.cols = length(const_data_colnames), + ) + checkmate::assert_names( + colnames(sce), + must.include = const_data_colnames + ) + + # get distinct scenario entries and add no-vax if needed + sce <- dplyr::select(sce, {{ const_data_colnames }}) + sce <- dplyr::distinct(sce) + + novax_scenario <- "novac" + + if (!checkmate::test_subset(novax_scenario, sce$scenario_type)) { + sce <- dplyr::bind_rows( + sce, + make_novax_scenario(disease) + ) + } + + sce$file <- NA_character_ # TODO: investigate this further + readr::write_csv(sce, template) + + cli::cli_inform( + "No file dictionary found at {.file {template}}; created a file \\ + dictionary and wrote it to {.file {template}}." + ) + } + + # no return +} + +#' Validate files in a burden estimate +#' +#' @description +#' Check that incoming data files in a burden estimate are complete, and that +#' no extra files have been included. +#' This function expects that incoming burden files are in the +#' directory given by `path_burden`, which holds a file dictionary which maps +#' each data file to a specific scenario. +#' +#' @inheritParams validate_file_dict_template +#' +#' @return A `` of the scenario file dictionary in `path_burden` if all +#' checks pass. Otherwise, exits with informative errors on failed checks. +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +validate_complete_incoming_files <- function( + path_burden = "incoming_burden_estimates" +) { + assert_is_directory(path_burden) + + files <- list.files(path_burden, full.names = TRUE) + file_dict <- file.path(path_burden, "file_dictionary.csv") + + # checks on the file dictionary + if (file.exists(file_dict)) { + df_dict <- readr::read_csv( + file_dict, + show_col_types = FALSE + ) + + col_filenames <- "file" # TODO: remove/explain magic colnm + scenario_filenames <- df_dict[[col_filenames]] + df_dict <- dplyr::select(df_dict, -{{ col_filenames }}) + + duplicate_rows <- anyDuplicated(df_dict) + duplicate_filenames <- anyDuplicated(scenario_filenames) + + if (duplicate_rows) { + cli::cli_abort( + "Expected file dictionary {.file {file_dict}} to have non-duplicate \\ + entries, but found {duplicate_rows} duplicated rows!" + ) + } + if (duplicate_filenames) { + cli::cli_abort( + "Expected file dictionary {.file {file_dict}} to have non-duplicate \\ + scenario filenames, but found {duplicate_filenames} duplicated \\ + filenames!" + ) + } + + # expect scenario files in path_burden are the same as scenario_filenames + sce_files <- files[files != file_dict] + scenario_filenames <- file.path(path_burden, scenario_filenames) + are_good_scefiles <- checkmate::test_permutation( + scenario_filenames, + sce_files + ) + + if (!are_good_scefiles) { + extra_files <- setdiff(sce_files, scenario_filenames) + n_extra_files <- length(extra_files) + missing_files <- setdiff(scenario_filenames, sce_files) + n_missing_files <- length(missing_files) + + cli::cli_abort( + c( + "Expected as many scenario data files as scenarios \\ + ({length(scenario_filenames)}), but found \\ + {.emph {length(sce_files)}} instead.", + i = "Found {cli::no(n_extra_files)} extra files{? /}\\ + {.file {basename(extra_files)}}, {cli::qty(n_missing_files)} + {?but could not find/and} \\ + {cli::no(n_missing_files)} missing files{? /} \\ + {.file {basename(missing_files)}}", + i = "Directory searched: {.file {path_burden}}" + ), + ) + } + } else { + cli::cli_abort( + "Expected a file dictionary {.file {file_dict}}, but it was not found!" + ) + } + + df_dict +} + +#' Check incoming burden set against template +#' +#' @description +#' +#' @param burden_set +#' +#' @param template +#' +#' @return +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +check_template_alignment <- function(burden_set, template) { + # TODO: figure out what the args are expected to be: dfs? lists, vecs? + expected <- names(template) + provided <- names(burden_set) + + missing_cols_in_burden <- setdiff(expected, provided) + extra_cols_in_burden <- setdiff(provided, expected) + + # explicitly check each length + burden_cols_matches_template <- length(missing_cols_in_burden) + + length(extra_cols_in_burden) == + 0L + + # TODO: make magic strings constants + key_cols <- c("disease", "country", "year", "age") + template_grid <- dplyr::distinct( + template, + dplyr::across({ + key_cols + }) + ) + burden_grid <- dplyr::distinct( + burden_set, + dplyr::across({ + key_cols + }) + ) + + # TODO: if these are data.frames, this might not be the best way to check + # for differences + missing_grid_in_burden <- setdiff(template_grid, burden_grid) + extra_grid_in_burden <- setdiff(burden_grid, template_grid) + burden_grid_matches_template <- all( + c( + nrow(missing_grid_in_burden), + nrow(extra_grid_in_burden) + ) == + 0L + ) + + list( + missing_cols_in_burden = missing_cols_in_burden, + extra_cols_in_burden = extra_cols_in_burden, + burden_cols_matches_template = burden_cols_matches_template, + missing_grid_in_burden = missing_grid_in_burden, + extra_grid_in_burden = extra_grid_in_burden, + burden_grid_matches_template = burden_grid_matches_template + ) +} + +#' Check incoming burden cohort size against interpolated population +#' +#' @description +#' +#' @param burden_set +#' +#' @param wpp +#' +#' @param gender +#' +#' @return +#' +#' @keywords diagnostics +#' +#' @export +check_demography_alignment <- function(burden_set, wpp, gender = "both") { + # TODO: input checks + + # TODO: check if these can be made constants + cols_to_select <- c("country", "year", "age", "cohort_size") + provided <- dplyr::select( + burden_set, + {{ cols_to_select }} + ) + provided <- dplyr::mutate( + provided = cohort_size # check if this can be made a string const + ) + + # TODO: explain what expected is + # TODO: replace with a right-join? + expected <- dplyr::filter( + wpp, + country %in% + provided$country & + year %in% provided$year & + age %in% provided$age, + gender == {{ gender }} + ) + + cols_to_select <- c("country", "year", "age", "value") + expected <- dplyr::select( + expected, + {{ cols_to_select }} + ) + expected <- dplyr::rename( + expected, + expected = value + ) # TODO: prefer not to use NSE + + # return left join + alignment <- dplyr::left_join( + provided, + expected, + by = c("country", "year", "age") + ) + alignment <- dplyr::mutate( + alignment, + difference = provided - expected, + abs_diff = abs(difference), + prop_diff = difference / expected + ) + + alignment +} + +#' Sanity checks on burden estimates +#' +#' @description Helper function for sanity checks on burden estimate values. +#' Checks whether any burden estimates are non-numeric, missing, or negative. +#' +#' @param burden A `` of disease burden estimates. Must have +#' at least a single column named `"value"` of numeric burden estimates. +#' +#' @return A character vector of messages generated by checks on burden +#' estimates, with the length of the vector depending on how many checks fail. +#' +#' @keywords diagnostics +#' +#' @export +basic_burden_sanity <- function(burden) { + # TODO: expectations on burden + mes <- "Basic sanity check for burden estimates:" + + value_col <- "value" + value <- burden[[value_col]] + + if (is.numeric(burden$value)) { + if (anyNA(burden$value)) { + mes_any_missing <- glue::glue( + "Warning: Burden estimates should not have missing values, but some \\ + values are missing. Fix missing values by converting to zeros!" + ) + + mes <- c(mes, mes_any_missing) + } + + if (any(burden$value < 0, na.rm = TRUE)) { + mes_any_negative <- glue::glue( + "Warning: Burden estimates should all be positive or zero, but found \\ + some negative estimates!" + ) + + mes <- c(mes, mes_any_negative) + } + } else { + mes_not_numeric <- glue::glue( + "Warning: Burden estimates should be of type `numeric`, but are not. \\ + Convert burden estimates to `numeric`." + ) + + mes <- c(mes, mes_not_numeric) + } + + if (length(mes) == 1L) { + mes <- c(mes, "PASS.") + } + + mes +} + +#' @title +#' +#' @description +#' A short description... +#' +#' @param coverage +#' +#' @param wpp +#' +#' @return +#' +#' @examples +#' # example code +#' +#' @keywords diagnostics +#' +#' @export +transfrom_coverage_fvps <- function(coverage, wpp) { + # TODO: checks on coverage + # TODO: checks on wpp + + cols_to_select <- c("age_from", "age_to", "gender") + todo_list <- dplyr::select( + coverage, + cols_to_select + ) + todo_list <- dplyr::distinct(todo_list) + todo_list <- dplyr::mutate( + todo_list, + job = seq_along(.data$gender) + ) + + # TODO: THIS NEEDS TO BE CLEANED UP + # TODO: clarify structure of `coverage` and mapping of gender to age + pop_all <- list() + for (i in seq_along(todo_list$age_from)) { + pop_all[[i]] <- wpp %>% + x <- dplyr::filter( + wpp, + .data$age >= todo_list$age_from[i], + .data$age <= todo_list$age_to[i], + .data$gender == todo_list$gender[i] + ) + x <- dplyr::group_by(x, .data$country, .data$year) + x <- dplyr::summarise( + x, + target_wpp = sum(.data$value), + .groups = "drop" + ) + x <- dplyr::mutate( + x, + job = todo_list$job[i] + ) + } + pop_all <- dplyr::bind_rows(pop_all) + + # TODO: add comments or explain in fn docs + d <- dplyr::left_join( + coverage, + pop_all, + by = c("country", "year") + ) + d <- dplyr::mutate( + d, + target = dplyr::coalesce( + .data$target, + .data$target_wpp # replace NAs in target with target_wpp + ), + fvps = .data$target * .data$coverage, + fvps_adjusted = dplyr::if_else( + .data$fvps > .data$target_wpp, + .data$target_wpp, + .data$fvps + ), + target_adjusted = dplyr::if_else( + .data$target > .data$target_wpp, + .data$target_wpp, + .data$target + ), + coverage_adjusted = .data$fvps_adjusted / .data$target_adjusted + ) + d[["target_wpp"]] <- NULL + + d +} + +# TODO: fill out fn docs +#' @title +#' +#' @description +#' +#' @param burden +#' +#' @param scenario_order +#' +#' @return +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +impact_check <- function(burden, scenario_order) { + # TODO: input checks + scenario_cols <- c("scenario", "scenario_order") + scenario_order <- dplyr::select(scenario_order, {{ scenario_cols }}) + + d <- dplyr::summarise( + millions = sum(.data$value) / 1e6, + .by = c("scenario", "burden_outcome"), + .groups = "drop" # probably unnecessary as grouping is temporary + ) + + d <- dplyr::left_join( + d, + scenario_order, + by = "scenario" + ) + + d <- dplyr::mutate( + d, + scenario_order = glue::glue("{.data$scenario_order}:{.data$scenario}") + ) + + d$scenario <- NULL + + d <- tidyr::pivot_wider( + d, + names_from = "scenario_order", + values_from = "million" + ) + + # TODO: CLEAN THIS UP + for (i in 2:nrow(scenario_order)) { + for (j in 1:(i - 1)) { + if (any(d[i + 1] > d[j + 1])) { + cat(sprintf( + "    **Warning**: provided less disease burden in lower coverage scenario (%s) compared to higher coverage scenario (%s).", + names(d)[j + 1], + names(d)[i + 1] + )) + cat("
") + } else { + cat(sprintf( + "    **PASS**: Provided higher disease burden in scenarios with fewer FVPs." + )) + cat("
") + } + } + } + + d +} diff --git a/R/constants.R b/R/constants.R new file mode 100644 index 0000000..a375556 --- /dev/null +++ b/R/constants.R @@ -0,0 +1,14 @@ +#' Package constants +#' +#' @name constants +#' @rdname constants +#' +#' @keywords constants +#' +#' @export +const_data_colnames <- c( + "scenario_type", + "scenario_type_description", + "scenario", + "scenario_description" +) diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 0000000..eb06f52 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,36 @@ +#' Make data for a no-vaccination scenario +#' +#' @name helpers +#' @rdname helpers +#' +#' @description +#' Helper functions for burden diagnostics. +#' +#' @inheritParams validate_file_dict_template +#' +#' @keywords internal +#' +#' @return +#' +#' - `make_novax_scenario()` returns a tibble with the minimum required column +#' names, and entries corresponding to a 'no-vaccination' scenario for +#' `disease`. +make_novax_scenario <- function(disease) { + v <- c( + "novac", + "No Vaccination", + glue::glue("{disease}-no-vaccination"), + "No vaccination" + ) + + # internal function without input checking + df <- dplyr::tibble( + variable = need_colnames, + value = v + ) + + tidyr::pivot_wider( + df, + names_from = "variable" + ) +} diff --git a/R/plotting.R b/R/plotting.R new file mode 100644 index 0000000..80c9b43 --- /dev/null +++ b/R/plotting.R @@ -0,0 +1,315 @@ +#' Plotting theme for vimcheck +#' +#' @description +#' A simple plotting theme building on [ggplot2::theme_bw()]. +#' +#' +#' @name plotting_theme +#' @rdname plotting_theme +#' +#' @param x_text_angle +#' +#' @param y_text_angle +#' +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to +#' [ggplot2::theme()]. +#' +#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. +#' +#' @export +theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { + ggplot2::theme_bw() + + ggplot2::theme( + axis.text.x = ggplot2::element_text( + size = 10, + angle = x_text_angle + ), + strip.text.y = ggplot2::element_text( + angle = y_text_angle + ), + plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), + ... + ) +} + +#' Plot burden and impact diagostics +#' +#' @name plotting +#' @rdname plotting +#' +#' @importFrom ggplot2 ggplot +#' +#' @description +#' Plotting functions for burden and impact diagnostics. This documentation +#' holds all plotting functions for now. +#' +#' @param d +#' +#' @param fig_number +#' +#' @return A `` object that can be printed to screen in the plot frame +#' or saved to an output device (i.e., saved as an image file). +#' +#' @examples +#' +#' @export +plot_compare_demography <- function(d, fig_number) { + num_countries <- length(unique(d$country)) + names_melting_data <- c( + "scenario", + "age", + "year", + "expected", + "provided", + "difference" + ) + names_melting_by <- c("scenario", "age", "year") + + # TODO: use tidyr; BIG PICTURE: NO DATA PREP IN PLOTTING FUNCTIONS! + tot <- reshape2::melt(d[, names_melting_data], id.vars = names_melting_by) + dat <- tot %>% + dplyr::group_by(variable, scenario, year, age) %>% + dplyr::summarise(value = sum(value)) %>% + dplyr::mutate(millions = value / 1e6) %>% + dplyr::arrange(age) + + # TODO: use .data and .vars, import or namespace functions + g <- ggplot(data = dat, aes(x = year, y = millions, fill = age)) + + geom_bar(stat = "identity") + + facet_wrap(~ scenario + variable, ncol = 3) + + scale_fill_distiller(palette = "PuOr") + + geom_hline(yintercept = 0, color = 'red') + + labs( + x = "calendar year", + y = glue::glue("people (in millions"), + title = glue::glue( + "Fig. {fig_number}. Comparison between interpolated population and \\ + cohort size ({num_countries} countries)." + ) + ) + + theme_vimc() + + g +} + +#' @name plotting +#' +#' @param burden +#' +#' @export +plot_age_patterns <- function(burden, fig_number) { + # TODO: REMOVE DATA PREP FROM PLOTTING FNS + d <- burden %>% + group_by(scenario, burden_outcome, age) %>% + summarise(millions = sum(value) / 1e6) + + g <- ggplot(d, aes(x = age, y = millions)) + + geom_bar(stat = "identity") + + facet_grid( + burden_outcome ~ scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) + ) + + coord_cartesian(xlim = c(0, max(d$age) + 1)) + + labs( + y = "people (in millions)", + title = glue::glue( + "Fig. {fig_number}. Burden age patterns across all countries and years" + ) + ) + + theme_vimc() + + g +} + +#' @name plotting +#' +#' @param year_max +#' +#' @export +plot_global_burden_decades <- function(burden, year_max, fig_number) { + # TODO: prefer moving these conditional checks elsewhere + # TODO: prefer moving data prep outside plotting fn + stopifnot(year_max %% 10 == 0) + d <- burden %>% + filter(year <= year_max) %>% + mutate(year2 = ifelse(year == year_max, year_max - 1, year)) %>% + mutate(decade = floor(year2 / 10) * 10) %>% + mutate( + decade_label = ifelse( + decade == year_max - 10, + paste0(decade, "-", decade + 10), + paste0(decade, "-", decade + 9) + ) + ) %>% + group_by(scenario, burden_outcome, decade_label) %>% + summarise(millions = sum(value) / 1e6) + + g <- ggplot(d, aes(x = scenario, y = millions, fill = scenario)) + + geom_bar(stat = "identity") + + facet_grid(burden_outcome ~ decade_label, scales = "free_y") + + labs( + y = "people (in millions)", + title = glue::glue( + "Fig. {fig_number}. Global disease burden trends" + ) + ) + + + # TODO: reconcile with theme_vimc() or make new theme + theme_bw() %+replace% + theme( + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank() + ) + + g +} + +#' @name plotting +#' +#' @param d +#' +#' @param outcome +#' +#' @export +plot_global_burden <- function(d, outcome, fig_number) { + data_ <- dplyr::filter( + d, + .data$burden_outcome == outcome + ) + + g <- ggplot( + data, + aes(x = year, y = millions, fill = age) + ) + + geom_bar(stat = "identity", aes(fill = age)) + + facet_grid( + ~scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) # TODO: avoid magic numbers + ) + + scale_fill_distiller(palette = "Spectral") + + labs( + x = "calendar year", + y = paste(outcomes_list[i], "(in millions)"), # TODO: where is outcomes_list!? + title = glue::glue( + "Fig. {fig_number}: Global trends of disease burden ({outcome})." + ) + ) + + theme_vimc(x_text_angle = 90) + + g +} + +#' @name plotting +#' +#' @param cov +#' +#' @export +plot_coverage_set <- function(cov, figure_number) { + # TODO: remove data prep + no_vacc <- expand_grid( + year = unique(cov$year), + country = unique(cov$country) + ) %>% + mutate( + coverage = 0, + delivery = "none", + scenario_description = "No vaccination" + ) + + cov1 <- cov %>% + mutate(delivery = paste(vaccine, activity_type, sep = "-")) %>% + select(scenario_description, delivery, country, year, coverage) %>% + bind_rows(no_vacc) + + g <- ggplot(cov1, aes(x = year, y = coverage, fill = delivery)) + + geom_bar( + stat = "identity", + position = position_dodge(width = 0.9), + alpha = 0.7 + ) + + facet_grid( + "country ~ scenario_description", + scales = "free_y", + labeller = labeller(scenario_description = label_wrap_gen(10)) + ) + + scale_x_continuous(breaks = pretty(unique(cov1$year))) + + labs( + x = "calendar year", + y = "Proportion of target population", + title = glue::glue( + "Fig. {figure_number}: Coverage sets for {cov$disease[1]}" + ) + ) + + theme_vimc(90) + + g +} + +#' @name plotting +#' +#' @param fvp +#' +#' @param year_min +#' +#' @param year_max +#' +#' @export +plot_fvp <- function(fvp, year_min, year_max, figure_number) { + # TODO: PREFER TO REMOVE DATA PREP CODE + no_vacc <- expand_grid( + year = unique(fvp$year), + country = unique(fvp$country) + ) %>% + mutate(fvps_adjusted = 0, scenario_description = "No vaccination") + + fvp_final <- bind_rows(fvp, no_vacc) %>% + filter(year >= year_min & year <= year_max) %>% + mutate(scenario = as.factor(scenario_description)) + + fvp_final$scenario <- relevel(fvp_final$scenario, "No vaccination") + fvp_final$scenario <- gsub(tolower(fvp$disease[1L]), "", fvp_final$scenario) + fvp_final$scenario <- gsub("-", " ", fvp_final$scenario) + + scenario_order <- c(names(sort(tapply( + fvp_final$fvps_adjusted, + fvp_final$scenario, + sum, + na.rm = TRUE + )))) + + fvp_final$scenario <- forcats::fct_relevel(fvp_final$scenario, scenario_order) + + fvp_agg <- + fvp_final %>% + dplyr::group_by(year, scenario, disease) %>% + dplyr::summarise(fvp = sum(fvps_adjusted, na.rm = TRUE)) + + # TODO: prefer to use a scale transform rather than touching data + g <- ggplot(fvp_agg, aes(x = year, y = fvp / 1e6, fill = scenario)) + + geom_bar( + stat = "identity", + colour = "midnightblue", + fill = "midnightblue" + ) + + facet_grid( + ~scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) + ) + + scale_x_continuous(breaks = pretty(unique(fvp_agg$year))) + + ylab(paste("FVP (in millions)")) + + labs( + x = "calendar year", + y = "FVP (in millions)", + title = glue::glue( + "Fig. {figure_number}: Fully Vaccinated Persons at global level by \\ + scenario for {fvp$disease[1L]}" + ) + ) + + theme_vimc(90, legend.position = "none") + + g +} diff --git a/R/vimcheck.R b/R/vimcheck.R index a65cf64..9428bd0 100644 --- a/R/vimcheck.R +++ b/R/vimcheck.R @@ -1,6 +1,8 @@ -#' @keywords internal +#' @keywords package_doc "_PACKAGE" ## usethis namespace: start ## usethis namespace: end NULL + +#' @import rlang diff --git a/_pkgdown.yml b/_pkgdown.yml index ca4c87b..79b8dfd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,3 +3,25 @@ url: ~ template: bootstrap: 5 + +reference: + - title: Package-level documentation + contents: + - has_keyword("package_doc") + - title: Diagnostic functions + desc: Package diagnostic functions. + contents: + - has_keyword("diagnostics") + - title: Plotting functions + desc: Package plotting functions. + contents: + - plotting + - plotting_theme + - title: Internal functions + desc: Internal helper functions. + contents: + - has_keyword("internal") + - title: Constants + desc: Package constants. + contents: + - has_keyword("constants") diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd new file mode 100644 index 0000000..1abd7fe --- /dev/null +++ b/man/basic_burden_sanity.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{basic_burden_sanity} +\alias{basic_burden_sanity} +\title{Sanity checks on burden estimates} +\usage{ +basic_burden_sanity(burden) +} +\arguments{ +\item{burden}{A \verb{} of disease burden estimates. Must have +at least a single column named \code{"value"} of numeric burden estimates.} +} +\value{ +A character vector of messages generated by checks on burden +estimates, with the length of the vector depending on how many checks fail. +} +\description{ +Helper function for sanity checks on burden estimate values. +Checks whether any burden estimates are non-numeric, missing, or negative. +} +\keyword{diagnostics} diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd new file mode 100644 index 0000000..31e7338 --- /dev/null +++ b/man/check_demography_alignment.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{check_demography_alignment} +\alias{check_demography_alignment} +\title{Check incoming burden cohort size against interpolated population} +\usage{ +check_demography_alignment(burden_set, wpp, gender = "both") +} +\arguments{ +\item{burden_set}{} + +\item{wpp}{} + +\item{gender}{} +} +\description{ +Check incoming burden cohort size against interpolated population +} +\keyword{diagnostics} diff --git a/man/check_template_alignment.Rd b/man/check_template_alignment.Rd new file mode 100644 index 0000000..b22480e --- /dev/null +++ b/man/check_template_alignment.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{check_template_alignment} +\alias{check_template_alignment} +\title{Check incoming burden set against template} +\usage{ +check_template_alignment(burden_set, template) +} +\arguments{ +\item{burden_set}{} + +\item{template}{} +} +\description{ +Check incoming burden set against template +} +\keyword{diagnostics} diff --git a/man/constants.Rd b/man/constants.Rd new file mode 100644 index 0000000..e883e95 --- /dev/null +++ b/man/constants.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constants.R +\docType{data} +\name{constants} +\alias{constants} +\alias{const_data_colnames} +\title{Package constants} +\format{ +An object of class \code{character} of length 4. +} +\usage{ +const_data_colnames +} +\description{ +Package constants +} +\keyword{constants} diff --git a/man/helpers.Rd b/man/helpers.Rd new file mode 100644 index 0000000..fb9c4df --- /dev/null +++ b/man/helpers.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{helpers} +\alias{helpers} +\alias{make_novax_scenario} +\title{Make data for a no-vaccination scenario} +\usage{ +make_novax_scenario(disease) +} +\arguments{ +\item{disease}{A disease identifier.} +} +\value{ +\itemize{ +\item \code{make_novax_scenario()} returns a tibble with the minimum required column +names, and entries corresponding to a 'no-vaccination' scenario for +\code{disease}. +} +} +\description{ +Helper functions for burden diagnostics. +} +\keyword{internal} diff --git a/man/plotting.Rd b/man/plotting.Rd new file mode 100644 index 0000000..213634f --- /dev/null +++ b/man/plotting.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plotting} +\alias{plotting} +\alias{plot_compare_demography} +\alias{plot_age_patterns} +\alias{plot_global_burden_decades} +\alias{plot_global_burden} +\alias{plot_coverage_set} +\alias{plot_fvp} +\title{Plot burden and impact diagostics} +\usage{ +plot_compare_demography(d, fig_number) + +plot_age_patterns(burden, fig_number) + +plot_global_burden_decades(burden, year_max, fig_number) + +plot_global_burden(d, outcome, fig_number) + +plot_coverage_set(cov, figure_number) + +plot_fvp(fvp, year_min, year_max, figure_number) +} +\arguments{ +\item{d}{} + +\item{fig_number}{} + +\item{burden}{} + +\item{year_max}{} + +\item{outcome}{} + +\item{cov}{} + +\item{fvp}{} + +\item{year_min}{} +} +\value{ +A \verb{} object that can be printed to screen in the plot frame +or saved to an output device (i.e., saved as an image file). +} +\description{ +Plotting functions for burden and impact diagnostics. This documentation +holds all plotting functions for now. +} diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd new file mode 100644 index 0000000..80fcf2e --- /dev/null +++ b/man/plotting_theme.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plotting_theme} +\alias{plotting_theme} +\alias{theme_vimc} +\title{Plotting theme for vimcheck} +\usage{ +theme_vimc(x_text_angle = 45, y_text_angle = 0, ...) +} +\arguments{ +\item{x_text_angle}{} + +\item{y_text_angle}{} + +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Other arguments passed to +\code{\link[ggplot2:theme]{ggplot2::theme()}}.} +} +\value{ +A \code{ggplot2} theme that can be added to \code{ggplot2} plots or objects. +} +\description{ +A simple plotting theme building on \code{\link[ggplot2:ggtheme]{ggplot2::theme_bw()}}. +} diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd new file mode 100644 index 0000000..9a1ecd8 --- /dev/null +++ b/man/validate_complete_incoming_files.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{validate_complete_incoming_files} +\alias{validate_complete_incoming_files} +\title{Validate files in a burden estimate} +\usage{ +validate_complete_incoming_files(path_burden = "incoming_burden_estimates") +} +\arguments{ +\item{path_burden}{A directory with burden estimate data.} +} +\value{ +A \verb{} of the scenario file dictionary in \code{path_burden} if all +checks pass. Otherwise, exits with informative errors on failed checks. +} +\description{ +Check that incoming data files in a burden estimate are complete, and that +no extra files have been included. +This function expects that incoming burden files are in the +directory given by \code{path_burden}, which holds a file dictionary which maps +each data file to a specific scenario. +} +\keyword{diagnostics} diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd new file mode 100644 index 0000000..525b4a5 --- /dev/null +++ b/man/validate_file_dict_template.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{validate_file_dict_template} +\alias{validate_file_dict_template} +\title{Validate file dictionary template} +\usage{ +validate_file_dict_template(disease, path_burden = "incoming_burden_estimates") +} +\arguments{ +\item{disease}{A disease identifier.} + +\item{path_burden}{A directory with burden estimate data.} +} +\value{ +Nothing; called primarily for its side-effets. +If the file \code{path_burden/file_dictionary.csv} does not exist, a file +dictionary CSV file is written to the same location. +Prints a message to screen informing the user whether any action has been +taken. +} +\description{ +Function to create a \code{file_dictionary} template. +It maps to touchstone disease scenarios and you will see expected number of +scenarios i.e. the number of files that we expect from a model. +Users should populate the file column to match the scenario-file. +This function will run if a \code{file_dictionary.csv} file does not exist +} +\keyword{diagnostics} diff --git a/man/vimcheck-package.Rd b/man/vimcheck-package.Rd index c26fbd9..c8369de 100644 --- a/man/vimcheck-package.Rd +++ b/man/vimcheck-package.Rd @@ -33,4 +33,4 @@ Other contributors: } } -\keyword{internal} +\keyword{package_doc} From 5f65f68daf01377f4ab560785f6e4a38f5edc9f0 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 7 Jan 2026 16:28:28 +0000 Subject: [PATCH 3/6] Rm RESIDE tools --- R/import-standalone-utils-assert-path.R | 157 --------------- R/import-standalone-utils-assert.R | 251 ------------------------ 2 files changed, 408 deletions(-) delete mode 100644 R/import-standalone-utils-assert-path.R delete mode 100644 R/import-standalone-utils-assert.R diff --git a/R/import-standalone-utils-assert-path.R b/R/import-standalone-utils-assert-path.R deleted file mode 100644 index 673fb8e..0000000 --- a/R/import-standalone-utils-assert-path.R +++ /dev/null @@ -1,157 +0,0 @@ -# Standalone file: do not edit by hand -# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert-path.R -# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert-path") -# ---------------------------------------------------------------------- -# -# --- -# repo: reside/reside.utils -# file: standalone-utils-assert-path.R -# dependencies: standalone-utils-assert.R -# imports: [cli, fs] -# --- -assert_file_exists <- function(files, name = "File", call = parent.frame(), - arg = NULL) { - err <- !file.exists(files) - ## TODO: throughout this file it would be nice to use cli's '.file' - ## class and ector contraction, *but* it renders poorly on default - ## black backgfrounds (dark blue) and makes testing a bit harder - ## because the rendering depends on cli options. - ## - ## TODO: add a canonical case check, as for the relative path bit. - if (any(err)) { - ## Because we interpolate both 'name' and the file list, we need - ## to disambiguate the quantity. - n <- cli::qty(sum(err)) - cli::cli_abort( - "{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", - call = call, arg = arg) - } -} - - -assert_file_exists_relative <- function(files, workdir, name, - call = parent.frame(), - arg = NULL) { - assert_relative_path(files, name, workdir, call) - - assert_character(files, name, call = call) - err <- !file_exists(files, workdir = workdir) - if (any(err)) { - n <- cli::qty(sum(err)) - cli::cli_abort( - c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", - i = "Looked within directory '{workdir}'"), - call = call) - } - - files_canonical <- file_canonical_case(files, workdir) - err <- is.na(files_canonical) | fs::path(files) != files_canonical - if (any(err)) { - i <- err & !is.na(files_canonical) - hint_case <- sprintf("For '%s', did you mean '%s'?", - files[i], files_canonical[i]) - names(hint_case) <- rep("i", length(hint_case)) - n <- cli::qty(sum(err)) - cli::cli_abort( - c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", - hint_case, - i = paste("If you don't use the canonical case for a file, your code", - "is not portable across different platforms"), - i = "Looked within directory '{workdir}'"), - call = call) - } -} - - -assert_is_directory <- function(path, name = "Directory", call = parent.frame(), - arg = NULL) { - assert_scalar_character(path, arg = arg, call = call) - assert_file_exists(path, name = name, arg = arg, call = call) - if (!fs::is_dir(path)) { - cli::cli_abort("Path exists but is not a directory: {path}", - call = call, arg = arg) - } -} - - -assert_relative_path <- function(files, name, workdir, call = parent.frame(), - arg = NULL) { - err <- fs::is_absolute_path(files) - if (any(err)) { - n <- cli::qty(sum(err)) - files_err <- files[err] - names(files_err) <- rep("x", length(files_err)) - cli::cli_abort( - c("{name}{n}{?s} must be {?a/} relative path{?s}", - files_err, - i = "Path was relative to directory '{workdir}'"), - call = call, arg = arg) - } - - err <- vapply(fs::path_split(files), function(x) any(x == ".."), TRUE) - if (any(err)) { - n <- cli::qty(sum(err)) - files_err <- files[err] - names(files_err) <- rep("x", length(files_err)) - cli::cli_abort( - c("{name}{n}{?s} must not contain '..' (parent directory) components", - files_err, - i = "Path was relative to directory '{workdir}'"), - call = call, arg = arg) - } -} - - -assert_directory_does_not_exist <- function(x, name = "Directory", arg = NULL, - call = parent.frame()) { - ok <- !fs::dir_exists(x) - if (!all(ok)) { - cli::cli_abort("{name}{?s} already exists: {format_file_list(x[!ok])}", - call = call, arg = arg) - } - invisible(x) -} - - -file_canonical_case <- function(path, workdir) { - if (length(path) != 1) { - return(vapply(path, file_canonical_case, "", workdir, USE.NAMES = FALSE)) - } - stopifnot(!fs::is_absolute_path(path)) - path_split <- fs::path_split(path)[[1]] - base <- workdir - ret <- character(length(path_split)) - for (i in seq_along(path_split)) { - pos <- dir(base, all.files = TRUE, no.. = TRUE) - el <- path_split[[i]] - j <- which(tolower(el) == tolower(pos)) - if (length(j) == 1) { - el <- pos[[j]] - } else if (el %in% pos) { - # We might want to warn here? - # message("Multiple casings present; this is not portable") - } else { - return(NA_character_) - } - ret[[i]] <- el - base <- file.path(base, el) - } - paste(ret, collapse = "/") -} - - -file_exists <- function(..., workdir = NULL) { - files <- c(...) - if (!is.null(workdir)) { - assert_scalar_character(workdir) - owd <- setwd(workdir) # nolint - on.exit(setwd(owd)) # nolint - } - fs::file_exists(files) -} - - -format_file_list <- function(x) { - cli::cli_vec(sprintf("'%s'", x), - style = list("vec-sep2" = ", ", "vec-last" = ", ")) -} diff --git a/R/import-standalone-utils-assert.R b/R/import-standalone-utils-assert.R deleted file mode 100644 index 441503d..0000000 --- a/R/import-standalone-utils-assert.R +++ /dev/null @@ -1,251 +0,0 @@ -# Standalone file: do not edit by hand -# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert.R -# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert") -# ---------------------------------------------------------------------- -# -# --- -# repo: reside/reside.utils -# file: standalone-utils-assert.R -# imports: cli -# --- -assert_scalar <- function(x, name = deparse(substitute(x)), arg = name, - call = parent.frame()) { - if (length(x) != 1) { - cli::cli_abort( - c("'{name}' must be a scalar", - i = "{name} has length {length(x)}"), - call = call, arg = arg) - } - invisible(x) -} - - -assert_character <- function(x, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (!is.character(x)) { - cli::cli_abort("Expected '{name}' to be character", call = call, arg = arg) - } - invisible(x) -} - - -assert_numeric <- function(x, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (!is.numeric(x)) { - cli::cli_abort("Expected '{name}' to be numeric", call = call, arg = arg) - } - invisible(x) -} - - -assert_integer <- function(x, name = deparse(substitute(x)), - tolerance = NULL, arg = name, - call = parent.frame()) { - if (is.numeric(x)) { - rx <- round(x) - if (is.null(tolerance)) { - tolerance <- sqrt(.Machine$double.eps) - } - if (!isTRUE(all.equal(x, rx, tolerance = tolerance))) { - cli::cli_abort( - c("Expected '{name}' to be integer", - i = paste("{cli::qty(length(x))}The provided", - "{?value was/values were} numeric, but not very close", - "to integer values")), - arg = arg, call = call) - } - x <- as.integer(rx) - } else { - cli::cli_abort("Expected '{name}' to be integer", call = call, arg = arg) - } - invisible(x) -} - - -assert_logical <- function(x, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (!is.logical(x)) { - cli::cli_abort("Expected '{name}' to be logical", arg = arg, call = call) - } - invisible(x) -} - - -assert_nonmissing <- function(x, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (anyNA(x)) { - cli::cli_abort("Expected '{name}' to be non-NA", arg = arg, call = call) - } - invisible(x) -} - - -assert_scalar_character <- function(x, name = deparse(substitute(x)), - allow_null = FALSE, - arg = name, call = parent.frame()) { - if (allow_null && is.null(x)) { - return(invisible(x)) - } - assert_scalar(x, name, arg = arg, call = call) - assert_character(x, name, arg = arg, call = call) - assert_nonmissing(x, name, arg = arg, call = call) -} - - -assert_scalar_numeric <- function(x, name = deparse(substitute(x)), - allow_null = FALSE, - arg = name, call = parent.frame()) { - if (allow_null && is.null(x)) { - return(invisible(x)) - } - assert_scalar(x, name, arg = arg, call = call) - assert_numeric(x, name, arg = arg, call = call) - assert_nonmissing(x, name, arg = arg, call = call) -} - - -assert_scalar_integer <- function(x, name = deparse(substitute(x)), - tolerance = NULL, allow_null = FALSE, - arg = name, call = parent.frame()) { - if (allow_null && is.null(x)) { - return(invisible(x)) - } - assert_scalar(x, name, arg = arg, call = call) - assert_integer(x, name, tolerance = tolerance, arg = arg, call = call) - assert_nonmissing(x, name, arg = arg, call = call) -} - - -assert_scalar_logical <- function(x, name = deparse(substitute(x)), - allow_null = FALSE, - arg = name, call = parent.frame()) { - if (allow_null && is.null(x)) { - return(invisible(x)) - } - assert_scalar(x, name, arg = arg, call = call) - assert_logical(x, name, arg = arg, call = call) - assert_nonmissing(x, name, arg = arg, call = call) -} - - -assert_scalar_size <- function(x, allow_zero = TRUE, allow_null = FALSE, - name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (allow_null && is.null(x)) { - return(invisible(x)) - } - assert_scalar_integer(x, name = name, arg = arg, call = call) - assert_nonmissing(x, name, arg = arg, call = call) - min <- if (allow_zero) 0 else 1 - if (x < min) { - cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) - } - invisible(x) -} - - -assert_length <- function(x, len, name = deparse(substitute(x)), arg = name, - call = parent.frame()) { - if (length(x) != len) { - cli::cli_abort( - "Expected '{name}' to have length {len}, but was length {length(x)}", - arg = arg, call = call) - } - invisible(x) -} - - -assert_is <- function(x, what, name = deparse(substitute(x)), arg = name, - call = parent.frame()) { - if (!inherits(x, what)) { - cli::cli_abort("Expected '{name}' to be a '{what}' object", - arg = arg, call = call) - } - invisible(x) -} - - -assert_list <- function(x, name = deparse(substitute(x)), arg = name, - call = parent.frame()) { - if (!is.list(x)) { - cli::cli_abort("Expected '{name}' to be a list", - arg = arg, call = call) - } - invisible(x) -} - - -assert_scalar_positive_numeric <- function(x, allow_zero = TRUE, - name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - assert_scalar_numeric(x, name = name, call = call) - if (allow_zero) { - if (x < 0) { - cli::cli_abort("'{name}' must be at least 0", arg = arg, call = call) - } - } else { - if (x <= 0) { - cli::cli_abort("'{name}' must be greater than 0", arg = arg, call = call) - } - } - invisible(x) -} - - -assert_scalar_positive_integer <- function(x, allow_zero = TRUE, - name = deparse(substitute(x)), - tolerance = NULL, arg = name, - call = parent.frame()) { - assert_scalar_integer(x, name, tolerance = tolerance, arg = arg, call = call) - min <- if (allow_zero) 0 else 1 - if (x < min) { - cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) - } - invisible(x) -} - - -assert_raw <- function(x, len = NULL, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - if (!is.raw(x)) { - cli::cli_abort("'{name}' must be a raw vector", arg = arg, call = call) - } - if (!is.null(len)) { - assert_length(x, len, name = name, call = call) - } - invisible(x) -} - - -assert_named <- function(x, unique = FALSE, name = deparse(substitute(x)), - arg = name, call = parent.frame()) { - nms <- names(x) - if (is.null(nms)) { - cli::cli_abort("'{name}' must be named", call = call, arg = arg) - } - if (anyNA(nms) || any(nms == "")) { - cli::cli_abort("All elements of '{name}' must be named", - call = call, arg = arg) - } - if (unique && anyDuplicated(names(x))) { - dups <- sprintf("'%s'", unique(names(x)[duplicated(names(x))])) - cli::cli_abort( - c("'{name}' must have unique names", - i = "Found {length(dups)} duplicate{?s}: {dups}"), - call = call, arg = arg) - } - invisible(x) -} - - -match_value <- function(x, choices, name = deparse(substitute(x)), arg = name, - call = parent.frame()) { - assert_scalar_character(x, call = call, name = name, arg = arg) - if (!(x %in% choices)) { - choices_str <- paste(sprintf("'%s'", choices), collapse = ", ") - cli::cli_abort(c("'{name}' must be one of {choices_str}", - i = "Instead we were given '{x}'"), call = call, - arg = arg) - } - x -} From 0b862a3160f5524d167d2278a9e897017107f368 Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 7 Jan 2026 16:28:47 +0000 Subject: [PATCH 4/6] WIP tweaks on diagnostics fns --- R/burden_diagnositics.R | 63 +++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/R/burden_diagnositics.R b/R/burden_diagnositics.R index eac40de..a8b5bcf 100644 --- a/R/burden_diagnositics.R +++ b/R/burden_diagnositics.R @@ -28,17 +28,48 @@ validate_file_dict_template <- function( ) { # TODO: check conditions on arg disease - what is the original source `pars`? - assert_is_directory(path_burden) + checkmate::assert_directory_exists(path_burden) template <- file.path(path_burden, "file_dictionary.csv") if (file.exists(template)) { - cli::cli_inform( - "File dictionary found at {.file {template}}, no action needed." + # TODO: check that file_dictionary entries are acceptable? + data_dict <- readr::read_csv( + template, + show_col_types = FALSE ) + is_good_df <- checkmate::test_data_frame( + data_dict, + any.missing = FALSE, + min.cols = length(file_dict_colnames), + min.rows = 2L # needs a no-vax and a vax scenario? + ) && + checkmate::test_names( + colnames(data_dict), + must.include = file_dict_colnames + ) + + if (is_good_df) { + cli::cli_inform( + "File dictionary found at {.file {template}}, no action needed." + ) + } else { + cli::cli_abort( + c( + "File dictionary found at {.file {template}}, \\ + but it is not well formed. Please check info below.", + i = "Column names: {.str {colnames(data_dict)}}, \\ + expected min. column names: {.str {file_dict_colnames}}; + No. rows: {nrow(data_dict)}, expected min. no. rows: 2." + ) + ) + } } else { - # TODO: resolve magic strings - scenario_dir <- "model_inputs" + # TODO: explain why this branch of the decision tree triggers in fn docs + + # NOTE: see expected dir structure in `tests/testthat/testdata/` + scenario_dir <- file.path(path_burden, "model_inputs") checkmate::assert_directory_exists(scenario_dir) + scenario_data <- file.path(scenario_dir, "scenario.csv") checkmate::assert_file_exists(scenario_data) @@ -47,16 +78,16 @@ validate_file_dict_template <- function( # NOTE: consider wrapping into check function checkmate::assert_data_frame( sce, - any.missing = FALSE, - min.cols = length(const_data_colnames), + any.missing = TRUE, # allowing missing as contained in examples + min.cols = length(scenario_data_colnames), ) checkmate::assert_names( colnames(sce), - must.include = const_data_colnames + must.include = scenario_data_colnames ) # get distinct scenario entries and add no-vax if needed - sce <- dplyr::select(sce, {{ const_data_colnames }}) + sce <- dplyr::select(sce, {{ scenario_data_colnames }}) sce <- dplyr::distinct(sce) novax_scenario <- "novac" @@ -68,7 +99,7 @@ validate_file_dict_template <- function( ) } - sce$file <- NA_character_ # TODO: investigate this further + sce$file <- NA_character_ # NOTE: remove file readr::write_csv(sce, template) cli::cli_inform( @@ -102,7 +133,7 @@ validate_file_dict_template <- function( validate_complete_incoming_files <- function( path_burden = "incoming_burden_estimates" ) { - assert_is_directory(path_burden) + checkmate::assert_directory_exists(path_burden) files <- list.files(path_burden, full.names = TRUE) file_dict <- file.path(path_burden, "file_dictionary.csv") @@ -371,6 +402,8 @@ basic_burden_sanity <- function(burden) { #' @examples #' # example code #' +#' @importFrom dplyr .data +#' #' @keywords diagnostics #' #' @export @@ -426,13 +459,11 @@ transfrom_coverage_fvps <- function(coverage, wpp) { .data$target_wpp # replace NAs in target with target_wpp ), fvps = .data$target * .data$coverage, - fvps_adjusted = dplyr::if_else( - .data$fvps > .data$target_wpp, + fvps_adjusted = pmin( .data$target_wpp, .data$fvps ), - target_adjusted = dplyr::if_else( - .data$target > .data$target_wpp, + target_adjusted = pmin( .data$target_wpp, .data$target ), @@ -454,6 +485,8 @@ transfrom_coverage_fvps <- function(coverage, wpp) { #' #' @return #' +#' @importFrom dplyr .data +#' #' @examples #' #' @keywords diagnostics From 9278beb32cf58dc29274fadf8b1c85c36b932f2c Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 7 Jan 2026 16:29:02 +0000 Subject: [PATCH 5/6] Add constants --- R/constants.R | 11 ++++++++++- R/helpers.R | 9 +++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/R/constants.R b/R/constants.R index a375556..f435b5d 100644 --- a/R/constants.R +++ b/R/constants.R @@ -6,7 +6,16 @@ #' @keywords constants #' #' @export -const_data_colnames <- c( +file_dict_colnames <- c( + "scenario_type", + "scenario_type_description", + "scenario", + "scenario_description", + "file" +) + +#' @name constants +scenario_data_colnames <- c( "scenario_type", "scenario_type_description", "scenario", diff --git a/R/helpers.R b/R/helpers.R index eb06f52..7557d02 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -20,17 +20,18 @@ make_novax_scenario <- function(disease) { "novac", "No Vaccination", glue::glue("{disease}-no-vaccination"), - "No vaccination" + "No vaccination", + "no-vaccination.csv" ) # internal function without input checking - df <- dplyr::tibble( - variable = need_colnames, + df_ <- dplyr::tibble( + variable = file_dict_colnames, value = v ) tidyr::pivot_wider( - df, + df_, names_from = "variable" ) } From 975c289d2ad3eaa3f44a0d7225d04cbeaedf221a Mon Sep 17 00:00:00 2001 From: Pratik Gupte Date: Wed, 7 Jan 2026 16:29:23 +0000 Subject: [PATCH 6/6] Add prelim tests with dummy test data --- .../file_dictionary.csv | 3 + .../model_inputs/scenario.csv | 12 +++ tests/testthat/test-burden_diagnostics.R | 92 +++++++++++++++++++ tests/testthat/test-helpers.R | 13 +++ 4 files changed, 120 insertions(+) create mode 100644 tests/testthat/incoming_burden_estimates/file_dictionary.csv create mode 100644 tests/testthat/incoming_burden_estimates/model_inputs/scenario.csv create mode 100644 tests/testthat/test-burden_diagnostics.R create mode 100644 tests/testthat/test-helpers.R diff --git a/tests/testthat/incoming_burden_estimates/file_dictionary.csv b/tests/testthat/incoming_burden_estimates/file_dictionary.csv new file mode 100644 index 0000000..0334584 --- /dev/null +++ b/tests/testthat/incoming_burden_estimates/file_dictionary.csv @@ -0,0 +1,3 @@ +scenario_type,scenario_type_description,scenario,scenario_description,file +disease-rout,Routine Only,disease-rout,disease-rout,no-vaccination.csv +novac,No Vaccination,disease-no-vaccination,No vaccination,with-vaccination.csv diff --git a/tests/testthat/incoming_burden_estimates/model_inputs/scenario.csv b/tests/testthat/incoming_burden_estimates/model_inputs/scenario.csv new file mode 100644 index 0000000..0998d8a --- /dev/null +++ b/tests/testthat/incoming_burden_estimates/model_inputs/scenario.csv @@ -0,0 +1,12 @@ +"scenario_type","scenario_type_description","scenario","scenario_description","coverage_set","gavi_support_level","source_from","disease","vaccine","activity_type","year","country","age_from","age_to","age_range_verbatim","target","coverage","gender","proportion_risk" +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2030,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2031,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2032,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2033,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2034,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2035,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2036,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2037,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2038,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2039,"RFP",0,0,"default routine age","NA",0.8,"Both",1 +"disease_rout","Routine Only","disease_rout","disease_rout","disease : disease , routine ,with: disease_rout","with","scenario_source","disease","disease","routine",2040,"RFP",0,0,"default routine age","NA",0.8,"Both",1 diff --git a/tests/testthat/test-burden_diagnostics.R b/tests/testthat/test-burden_diagnostics.R new file mode 100644 index 0000000..9a671ee --- /dev/null +++ b/tests/testthat/test-burden_diagnostics.R @@ -0,0 +1,92 @@ +test_that("`validate_file_dict_template()` works", { + # set up local tempdir to test missing bad file error msg + wd <- withr::local_tempdir() + + path_burden <- file.path(wd, "incoming_burden_estimates") + dir.create(path_burden) + file_dict <- file.path(path_burden, "file_dictionary.csv") + dummy_data <- data.frame() + readr::write_csv(dummy_data, file_dict) + + success_msg <- glue::glue( + "(File dictionary found at)*({file_dict})*(no action needed)" + ) + fail_msg <- "(File dictionary found)*(but it is not well formed)" + + expect_error( + validate_file_dict_template( + disease, + path_burden + ), + fail_msg + ) + + # check success segment + path_burden <- test_path("incoming_burden_estimates") + expect_message( + validate_file_dict_template( + disease, + path_burden + ), + success_msg + ) + + validate_file_dict_template( + "dummy_disease", + test_path("incoming_burden_estimates") + ) + + # TODO: check other branch of if-else ladder +}) + +test_that("`validate_complete_incoming_files()`", { + # set up local tempdir with bad file dictionary to check errors + wd <- withr::local_tempdir() + + path_burden <- file.path(wd, "incoming_burden_estimates") + dir.create(path_burden) + + fail_msg <- "(Expected a file dictionary)*(but it was not found)" + expect_error( + validate_complete_incoming_files(path_burden), + fail_msg + ) + + file_dict <- file.path(path_burden, "file_dictionary.csv") + dummy_data <- make_novax_scenario("dummy_disease") + dummy_data <- rbind(dummy_data, dummy_data) + n_duplicates <- anyDuplicated(dummy_data) + readr::write_csv(dummy_data, file_dict) + + fail_msg <- glue::glue( + "(Expected)*(non-duplicate entries)*(found {n_duplicates} duplicated rows)" + ) + expect_error( + validate_complete_incoming_files(path_burden), + fail_msg + ) + + # check error on duplicated filenames + dummy_data <- readr::read_csv( + test_path("incoming_burden_estimates", "file_dictionary.csv") + ) + dummy_data$file <- "dummy_file.csv" + readr::write_csv(dummy_data, file_dict) + + fail_msg <- "(Expected)*(non-duplicate scenario filenames)" + expect_error( + validate_complete_incoming_files(path_burden), + fail_msg + ) + + # check error on missing scenario files + dummy_data <- readr::read_csv( + test_path("incoming_burden_estimates", "file_dictionary.csv") + ) + readr::write_csv(dummy_data, file_dict) + fail_msg <- "Expected as many scenario data files as scenarios" + expect_error( + validate_complete_incoming_files(path_burden), + fail_msg + ) +}) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R new file mode 100644 index 0000000..4c7f56d --- /dev/null +++ b/tests/testthat/test-helpers.R @@ -0,0 +1,13 @@ +# Basic checks on internal functions. These are mostly checked via exported +# functions for a smaller testing surface +test_that("`make_novax_scenario()` works", { + disease <- "dummy" + expect_no_condition( + make_novax_scenario(disease) + ) + df <- make_novax_scenario(disease) + expect_named( + df, + file_dict_colnames + ) +})