Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ docs
inst/doc
/data
/temporary-scripts
/cache
27 changes: 0 additions & 27 deletions CLAUDE.md

This file was deleted.

1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Imports:
ggplot2,
ipumsr,
janitor,
jsonlite,
lehdr,
lubridate,
magrittr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(cache_it)
export(convert_table_text_to_dataframe)
export(download_openfema_datasets)
export(estimate_units_per_parcel)
export(estimate_zoning_envelope)
export(get_box_path)
Expand All @@ -28,6 +29,7 @@ export(get_system_username)
export(get_wildfire_burn_zones)
export(inflation_adjust)
export(interpolate_demographics)
export(list_openfema_endpoints)
export(polygons_to_linestring)
export(qualtrics_define_missing)
export(qualtrics_format_metadata)
Expand Down
7 changes: 0 additions & 7 deletions R/CLAUDE.md

This file was deleted.

183 changes: 183 additions & 0 deletions R/download_openfema_datasets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#' @title List available OpenFEMA dataset endpoints
#'
#' @description Queries the OpenFEMA metadata API and returns a tibble of all
#' available datasets with their names, versions, record counts, and download
#' URLs by format.
#'
#' @returns A tibble with columns:
#' \describe{
#' \item{name}{The API endpoint name (e.g., "DisasterDeclarationsSummaries").}
#' \item{title}{Human-readable dataset title.}
#' \item{version}{API version number.}
#' \item{record_count}{Number of records in the dataset.}
#' \item{formats}{Comma-separated list of available download formats.}
#' \item{url_parquet}{Download URL for parquet format (NA if unavailable).}
#' \item{url_csv}{Download URL for CSV format (NA if unavailable).}
#' }
#'
#' @export
#'
#' @examples
#' \dontrun{
#' endpoints <- list_openfema_endpoints()
#' endpoints |> dplyr::filter(stringr::str_detect(name, "Nfip"))
#' }
list_openfema_endpoints <- function() {

metadata_url <- "https://www.fema.gov/api/open/v1/OpenFemaDataSets"
response <- jsonlite::fromJSON(metadata_url, simplifyDataFrame = FALSE)

records <- response[["OpenFemaDataSets"]]
if (is.null(records)) {
stop("Unexpected API response structure from OpenFEMA metadata endpoint.")
}

rows <- purrr::map(records, function(rec) {
dist <- rec[["distribution"]]
if (is.null(dist)) dist <- list()

format_urls <- purrr::map(dist, function(d) {
list(format = tolower(d[["format"]] %||% ""), url = d[["accessURL"]] %||% NA_character_)
})

all_formats <- purrr::map_chr(format_urls, "format")
get_url <- function(fmt) {
match <- purrr::keep(format_urls, ~ .x$format == fmt)
if (length(match) > 0) match[[1]]$url else NA_character_
}

tibble::tibble(
name = rec[["name"]] %||% NA_character_,
title = rec[["title"]] %||% NA_character_,
version = rec[["version"]] %||% NA_integer_,
record_count = rec[["recordCount"]] %||% NA_integer_,
formats = paste(all_formats, collapse = ", "),
url_parquet = get_url("parquet"),
url_csv = get_url("csv")
)
})

dplyr::bind_rows(rows)
}


#' @title Download full OpenFEMA datasets
#'
#' @description Downloads full data files for OpenFEMA API endpoints. Prefers
#' parquet format, falling back to CSV. Uses the OpenFEMA metadata API to
#' dynamically resolve download URLs.
#'
#' @param endpoints A character vector of dataset endpoint names (e.g.,
#' `"DisasterDeclarationsSummaries"`). Use [list_openfema_endpoints()] to see
#' available names. Default `NULL` downloads all endpoints.
#' @param download_directory Directory path where files will be saved. Created if it does
#' not exist. Defaults to `"."`.
#' @param format_preference Character vector specifying format preference order.
#' The first available format is used. Defaults to `c("parquet", "csv")`.
#' @param overwrite Logical. If `FALSE` (default), skips files that already
#' exist in `download_directory`.
#'
#' @returns A tibble summarizing the results with columns: `name`, `format`,
#' `file_path`, `status` (one of "downloaded", "skipped", "failed", "no_format").
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Download a single small dataset
#' download_openfema_datasets(
#' endpoints = "DisasterDeclarationsSummaries",
#' download_directory = "data/openfema")
#'
#' # Download all datasets
#' download_openfema_datasets(download_directory = "data/openfema")
#'
#' # See what's available first
#' list_openfema_endpoints()
#' }
download_openfema_datasets <- function(
endpoints = NULL,
download_directory = ".",
format_preference = c("parquet", "csv"),
overwrite = FALSE) {

metadata <- list_openfema_endpoints()

# Validate requested endpoints
if (!is.null(endpoints)) {
unknown <- setdiff(endpoints, metadata$name)
if (length(unknown) > 0) {
stop(
"Unknown endpoint(s): ", paste(unknown, collapse = ", "),
"\nUse list_openfema_endpoints() to see available names."
)
}
metadata <- metadata |> dplyr::filter(.data$name %in% endpoints)
}

if (!dir.exists(download_directory)) {
dir.create(download_directory, recursive = TRUE)
message("Created directory: ", download_directory)
}

# Process each dataset
results <- purrr::pmap(
list(metadata$name, metadata$url_parquet, metadata$url_csv, metadata$formats),
function(name, url_parquet, url_csv, formats) {

# Build lookup of format -> URL
url_lookup <- c(parquet = url_parquet, csv = url_csv)

# Pick best available format
chosen_format <- NA_character_
chosen_url <- NA_character_
for (fmt in format_preference) {
url_candidate <- url_lookup[[fmt]]
if (!is.na(url_candidate)) {
chosen_format <- fmt
chosen_url <- url_candidate
break
}
}

if (is.na(chosen_format)) {
message("[", name, "] No preferred format available (has: ", formats, "). Skipping.")
return(tibble::tibble(
name = name, format = NA_character_,
file_path = NA_character_, status = "no_format"
))
}

date_stamp <- format(Sys.Date(), "%Y_%m_%d")
safe_name <- gsub("-", "_", name)
file_name <- paste0(safe_name, "_", date_stamp, ".", chosen_format)
file_path <- file.path(download_directory, file_name)

if (file.exists(file_path) && !overwrite) {
message("[", name, "] File already exists, skipping. Use overwrite = TRUE to re-download.")
return(tibble::tibble(
name = name, format = chosen_format,
file_path = file_path, status = "skipped"
))
}

message("[", name, "] Downloading ", chosen_format, " from: ", chosen_url)
old_timeout <- getOption("timeout")
on.exit(options(timeout = old_timeout), add = TRUE)
options(timeout = 1200)
dl_result <- tryCatch({
utils::download.file(chosen_url, destfile = file_path, mode = "wb", quiet = FALSE)
"downloaded"
}, error = function(e) {
warning("[", name, "] Download failed: ", conditionMessage(e))
"failed"
})

tibble::tibble(
name = name, format = chosen_format,
file_path = file_path, status = dl_result)
}
)

dplyr::bind_rows(results)
}
40 changes: 30 additions & 10 deletions R/get_sba_loans.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Author: Will Curran-Groome

#' @importFrom magrittr %>%

#' @title Access SBA data on disaster loans
Expand Down Expand Up @@ -49,7 +47,7 @@ get_sba_loans = function() {
sba_eidl_declaration_number = dplyr::if_else(is.na(sba_eidl_declaration_number), sba_eidl_declaration, sba_eidl_declaration_number),
damaged_property_zip_code = dplyr::if_else(is.na(damaged_property_zip_code), damaged_property_zip, damaged_property_zip_code),
damaged_property_city_name = dplyr::if_else(is.na(damaged_property_city_name), damaged_property_city, damaged_property_city_name),
damaged_property_state_code = dplyr::if_else(is.na(damaged_property_state_code), damaged_property_state, damaged_property_state_code),
damaged_property_state_abbreviation = dplyr::if_else(is.na(damaged_property_state_code), damaged_property_state, damaged_property_state_code),
total_approved_loan_amount = dplyr::if_else(is.na(total_approved_loan_amount), total_approved, total_approved_loan_amount),
approved_amount_real_estate = dplyr::if_else(is.na(approved_amount_real_estate), approved_amount_real, approved_amount_real_estate),
total_verified_loss = dplyr::if_else(is.na(total_verified_loss), total_verified, total_verified_loss)) %>%
Expand Down Expand Up @@ -77,7 +75,7 @@ get_sba_loans = function() {
sba_eidl_declaration_number = dplyr::if_else(is.na(sba_eidl_declaration_number), sba_eidl_declaration, sba_eidl_declaration_number),
damaged_property_zip_code = dplyr::if_else(is.na(damaged_property_zip_code), damaged_property_zip, damaged_property_zip_code),
damaged_property_city_name = dplyr::if_else(is.na(damaged_property_city_name), damaged_property_city, damaged_property_city_name),
damaged_property_state_code = dplyr::if_else(is.na(damaged_property_state_code), damaged_property_state, damaged_property_state_code),
damaged_property_state_abbreviation = dplyr::if_else(is.na(damaged_property_state_code), damaged_property_state, damaged_property_state_code),
total_approved_loan_amount = dplyr::if_else(is.na(total_approved_loan_amount), total_approved, total_approved_loan_amount),
approved_amount_real_estate = dplyr::if_else(is.na(approved_amount_real_estate), approved_amount_real, approved_amount_real_estate),
total_verified_loss = dplyr::if_else(is.na(total_verified_loss), total_verified, total_verified_loss)) %>%
Expand All @@ -87,18 +85,40 @@ get_sba_loans = function() {
approved_amount_real)) %>%
dplyr::rename(
disaster_number_fema = fema_disaster_number,
disaster_number_sba = sba_disaster_number,
disaster_number_sba_physical = sba_physical_declaration_number,
disaster_number_sba_eidl = sba_eidl_declaration_number,
verified_loss_total = total_verified_loss,
approved_amount_total = total_approved_loan_amount)

result = dplyr::bind_rows(
business_loans %>% dplyr::mutate(loan_type = "business"),
home_loans %>% dplyr::mutate(loan_type = "residential")) %>%
## these are weird, meaningless records that are either embedded in the raw data
## or that are accidentally created as rows when data are read-in from file
dplyr::filter(!stringr::str_detect(disaster_number_sba_physical, "Business Data Only|United States Small Business"))

business_loans %>% dplyr::mutate(loan_type = "business"),
home_loans %>% dplyr::mutate(loan_type = "residential")) %>%
suppressWarnings({dplyr::mutate(
## state codes should be characters, not numbers (e.g., "AL")
damaged_property_state_code = dplyr::if_else(!is.na(as.numeric(damaged_property_state_code)), NA, damaged_property_state_code),
damaged_property_zip_code = dplyr::case_when(
## sometimes these are represented as three digits in the raw data, but in PR, all zip codes are prefixed with "00", so padding is safe/correct
damaged_property_state_code == "PR" ~ stringr::str_pad(damaged_property_zip_code, width = 5, side = "left", pad = "0"),
## in the raw data, these columns are transposed in some cases; as.numeric(city) should be NA if city is accurate, so this is safe
is.na(as.numeric(damaged_property_zip_code)) ~ as.numeric(damaged_property_city_name) %>% stringr::str_pad(width = 5, side = "left", pad = "0"),
TRUE ~ damaged_property_zip_code),
damaged_property_state_code = dplyr::case_when(
## we infer state codes from disaster codes, where state codes are prefixed on the disaster number
is.na(damaged_property_state_code) & stringr::str_detect(sba_disaster_number, "^[A-Z]{2}-") ~ stringr::str_sub(sba_disaster_number, 1, 2),
is.na(damaged_property_state_code) & stringr::str_detect(disaster_number_fema, "^[A-Z]{2}-") ~ stringr::str_sub(disaster_number_fema, 1, 2),
## another transposition issue; we only use the city name when it is two, uppercase characters
is.na(damaged_property_state_code) & stringr::str_detect(damaged_property_city_name, "^[A-Z]{2}$") ~ damaged_property_city_name,
TRUE ~ damaged_property_state_code),
damaged_property_city_name = dplyr::case_when(
## yet another raw data transposition issue
stringr::str_detect(damaged_property_city_name, "^[A-Z]{2}$") & !stringr::str_detect(sba_disaster_number, "-") ~ sba_disaster_number,
TRUE ~ damaged_property_city_name))}) %>%
dplyr::filter(
## these are weird, meaningless records that are either embedded in the raw data
## or that are accidentally created as rows when data are read-in from file
!stringr::str_detect(disaster_number_sba_physical, "Business Data Only|United States Small Business|Home Data Only"))

return(result)
}

Expand Down
50 changes: 50 additions & 0 deletions man/download_openfema_datasets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading