UI-Research · wcurrangroome · Oct 30, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 22, 2025
diff --git a/R/get_hazard_mitigation_assistance.R b/R/get_hazard_mitigation_assistance.R
@@ -431,4 +431,4 @@ utils::globalVariables(c(
   "project_counties", "hma_project00", "initial_obligation_amount", "project_counties_multiple_flag",
   "project_fiscal_year", "project_id", "project_identifier", "project_program_area",
   "selection_federal_share_amount", "selection_federal_share_amount_split", "status",
-  "subapplicant_state", "subapplicant_state_abbreviation"))
+  "subapplicant_state", "subapplicant_state_abbreviation", "selection_status"))
diff --git a/R/get_nfip_claims.R b/R/get_nfip_claims.R
@@ -1,118 +1,208 @@
-# Author: Will Curran-Groome
-
-#' @importFrom magrittr %>%
-
-#' @title Access county-level data on NFIP policies
+#' @title Access county-level data on NFIP claims
 #' @param county_geoids A character vector of five-digit county codes. NULL by default; must be non-NULL if `api = TRUE`.
 #' @param file_name The name (not the full path) of the Box file containing the raw data.
 #' @param api If TRUE, query the API. FALSE by default.
 #'
-#' @returns A dataframe comprising county-level data on current NFIP policies
+#' @details
+#' These data are from: https://www.fema.gov/openfema-data-page/fima-nfip-redacted-claims-v2.
+#' Per FEMA: This data set represents more than 2,000,000 NFIP claims transactions. It is
+#' derived from the NFIP system of record, staged in the NFIP reporting platform and
+#' redacted to protect policy holder personally identifiable information. The
+#' dataset includes the 50 states + DC and the following territories: Puerto Rico,
+#' US Virgin Islands, and Guam.
+#'
+#' In order to filter to residential claims, filter out occupancy type: "non-residential".
+#'
+#' Some claims (from multi-unit buildings / condos) are associated with multiple insured units.
+#' When calculating the number of units covered by a claim, the analyst should use the count_units_insured column.
+#'
+#' The example below illustrates and example of how the data set can be summarized to show the total
+#' number of residential claims submitted in two different counties, as well as the total damages
+#' and payments in the same time period.
+#'
+#' @returns A data frame comprising county-level data on current NFIP policies
+#'  \describe{
+#'    \item{state_fips}{A two-digit state identifier.}
+#'    \item{state_abbreviation}{The name of the state.}
+#'    \item{county_geoid}{A five-digit county identifier.}
+#'    \item{county_name}{The name of the county.}
+#'    \item{year_construction}{The original year of the construction of the building.}
+#'    \item{year_loss}{The year in which the flood loss occurred.}
+#'    \item{occupancy_type}{The occupancy type of the primary building associated with the claim.}
+#'    \item{count_units_insured}{The number of insured units associated with the claim.}
+#'    \item{deductible_building}{The total deductible for buildings, main and appurtenant.}
+#'    \item{deductible_contents}{The total deductible for contents.}
+#'    \item{value_building}{The value of the main building as estimated by an adjuster.}
+#'    \item{value_contents}{The value of the contents as estimated by an adjuster.}
+#'    \item{replacement_cost_building}{Estimated cost to replace the building as reported by the insurer.}
+#'    \item{replacement_cost_contents}{Estimated cost to replace the contents as reported by the insurer.}
+#'    \item{insurance_coverage_building}{The total insurance amount on the building.}
+#'    \item{insurance_coverage_contents}{The total insurance amount on the contents.}
+#'    \item{damage_building}{The amount of damage to a main property.}
+#'    \item{damage_contents}{The value of damage to contents.}
+#'    \item{net_payment_building}{Net building payment amount.}
+#'    \item{net_payment_contents}{Net contents payment amount.}
+#'    \item{net_payment_increased_compliance}{Net Increased Cost of Compliance (ICC) payment amount.}
+#'  }
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' get_nfip_claims(
-#'    county_geoids = c("01001", "01003"),
-#'    api = TRUE)
-#' }
+#'
+#' test <- get_nfip_claims(county_geoids = c("01001", "48201")) |>
+#'   dplyr::filter(
+#'     year_of_loss >= 2015,  ### in the past 10 years
+#'     !occupancy_type %in% c("non-residential")) |> ### only residential claims
+#'   dplyr::summarize(
+#'     .by = county_geoid,
+#'     dplyr::across(dplyr::matches("payment"), sum, na.rm = TRUE),
+#'     residential_claims = dplyr::n_distinct(nfip_claim_id))
+#'}
+
 get_nfip_claims = function(
-    county_geoids = NULL,
-    file_name = "fima_nfip_claims_2025_06_08.parquet",
-    api = FALSE) {
+  county_geoids = NULL,
+  file_name = "fima_nfip_claims_2025_09_09.parquet",
+  api = FALSE) {
 
+  ## if reading from disk
   if (isFALSE(api)) {
+    if (is.na(file_name)) stop("If `api = FALSE`, you must provide a `file_name` argument.")
+
     inpath = file.path(
       get_box_path(), "hazards", "fema", "national-flood-insurance-program", "raw", file_name)
 
+    ## check that the cached file exists
     if (! file.exists(inpath)) {
       stop("The provided `file_name` is invalid.") }
 
+    ## if the cached file isn't a parquet file, convert it to one
     if (! (stringr::str_detect(inpath, "parquet"))) {
 
       convert_delimited_to_parquet(
         inpath = inpath,
         delimit_character = ",",
         dataset = "nfip_claims") }
 
-    df1a = arrow::read_parquet(file = inpath) %>%
+    ## read the parquet file
+    df1a = arrow::read_parquet(file = inpath) |>
       janitor::clean_names()
 
   } else {
+
+  ## if querying the API:
+
+    ## if no county_geoids, throw an error
     if (is.null(county_geoids)) {
-      stop("No `county_geoids` value was supplied; for querying the API, you must supply this argument.")
-      county_geoids = tidycensus::fips_codes %>%
-        dplyr::mutate(county_geoid = stringr::str_c(state_code, county_code)) %>%
-        dplyr::distinct(county_geoid) %>%
-        dplyr::pull(county_geoid)
-    }
+      stop("No `county_geoids` value was supplied; for querying the API, you must supply this argument.") }
+
+    if (length(county_geoids > 10)) {
+      warning(stringr::str_c(
+"These data are very large; given the number of supplied `county_geoids`, anticipate ",
+"a slow query. An alternate approach is to download the full dataset and then supply that ",
+"filepath to the parameter `file_name`.")) }
 
     df1a = purrr::map_dfr(
       county_geoids,
       ~ rfema::open_fema(
-        data_set = "fimanfippolicies",
+        data_set = "fimaNfipClaims",
         filters = list(countyCode = .x),
-        ask_before_call = FALSE) %>%
+        ask_before_call = FALSE) |>
         janitor::clean_names())
   }
 
-  df1b = df1a %>%
-    dplyr::mutate(county_geoid = stringr::str_sub(census_tract, 1, 5)) %>%
-    {if (!is.null(county_geoids)) dplyr::filter(., county_geoid %in% county_geoids) else . } %>%
-    dplyr::left_join(
-      tidycensus::fips_codes %>%
-        dplyr::select(state_fips = state_code, state_abb = state, county_fips = county_code, county_name = county) %>%
-        dplyr::mutate(county_geoid = stringr::str_c(state_fips, county_fips)),
-      by = c("county_geoid"))
-
-  ## to save space:
-  rm(df1a)
-
-  result = df1b %>%
-    dplyr::select(
-      nfip_policy_id = id,
+  ## data cleaning:
+  df1b = df1a |>
+    tidytable::mutate(
+      ### taking county_code if it exists, if not extract from census_tract
+      county_geoid = tidytable::if_else(!is.na(county_code), county_code, stringr::str_sub(census_tract, 1, 5)))
+
+  if (!is.null(county_geoids)) {
+    df1b = df1b |>
+      ### filtering to only the selected county_geoids
+      tidytable::filter(county_geoid %in% county_geoids) }
+
+  df1c = df1b |>
+    tidytable::left_join(
+      tidycensus::fips_codes |>
+        tidytable::select(
+          state_fips = state_code, state_abb = state, county_fips = county_code,
+          county_name = county) |>
+        tidytable::mutate(county_geoid = stringr::str_c(state_fips, county_fips)),
+      by = c("county_geoid"),
+      relationship = "many-to-one")
+
+  result = df1c |>
+    dplyr::transmute(
+      #nfip_claim_id = id,
       state_fips,
-      state_abbreviation = state,
+      #state_abbreviation = state, ## this is unreliable--other fields appear to be more consistent
       county_geoid,
       county_name,
-      occupancy_type,
-      count_units_insured = policy_count, ## if a condo, the number of units is stored separately (see below)
-      count_units_condo_insured = number_of_units, ## "the number of residential AND NONRESIDENTIAL units covered by the conominium master policy"
-      cause_of_damage,
-      flood_event_name = flood_event,
-      building_deductible_code,
-      contents_deductible_code,
-      building_property_value,
-      contents_property_value,
-      contents_replacement_cost,
-      building_replacement_cost,
-      flood_zone_firm_current = flood_zone_current,
-      total_building_insurance_coverage,
-      total_contents_insurance_coverage,
-      year_of_loss,
-      dplyr::matches("amount"))
-
-  ## NEED TO ADDRESS THE TWO COUNT VARIABLES SO THAT EACH RECORD REFLECTS A SINGLE CLAIM
-  ## OR HAS A SINGLE VARIABLE DENOTING THE NUMBER OF CLAIMS IT ENCOMPASSES
-  # result %>%
-  #   dplyr::select(dplyr::matches("count")) %>%
-  #   dplyr::count(count_units_condo_insured, count_units_insured, sort = TRUE)
-
-  message("
-These data are from: https://www.fema.gov/openfema-data-page/fima-nfip-redacted-policies-v2.
-Per FEMA: This dataset represents more than 80,000,000 policy transactions. It is
-derived from the NFIP system of record, staged in the NFIP reporting platform and
-redacted to protect policy holder personally identifiable information.")
+      occupancy_type = dplyr::case_when(occupancy_type %in% c(1, 11) ~ "single family",
+                                        occupancy_type %in% c(2, 3, 12, 13, 16, 15) ~ "multi-family",
+                                        occupancy_type %in% c(14) ~ "mobile/manufactured home",
+                                        occupancy_type %in% c(4, 6, 17, 18, 19) ~ "non-residential"),
+      year_loss = year_of_loss,
+      year_construction = lubridate::year(original_construction_date),
+      count_units_insured = policy_count, ## number of insured units associated with the claim
+      #cause_of_damage,
+      #flood_event_name = flood_event,
+      #flood_zone_firm_current = flood_zone_current,
+      deductible_building = dplyr::case_when(building_deductible_code == "0" ~ 500,
+                                      building_deductible_code == "1" ~ 1000,
+                                      building_deductible_code == "2" ~ 2000,
+                                      building_deductible_code == "3" ~ 3000,
+                                      building_deductible_code == "4" ~ 4000,
+                                      building_deductible_code == "5" ~ 5000,
+                                      building_deductible_code == "9" ~ 750,
+                                      building_deductible_code == "A" ~ 10000,
+                                      building_deductible_code == "B" ~ 15000,
+                                      building_deductible_code == "C" ~ 20000,
+                                      building_deductible_code == "D" ~ 25000,
+                                      building_deductible_code == "E" ~ 50000,
+                                      building_deductible_code == "F" ~ 1250,
+                                      building_deductible_code == "G" ~ 500,
+                                      building_deductible_code == "H" ~ 200),
+      deductible_contents = dplyr::case_when(contents_deductible_code == "0" ~ 500,
+                                      contents_deductible_code == "1" ~ 1000,
+                                      contents_deductible_code == "2" ~ 2000,
+                                      contents_deductible_code == "3" ~ 3000,
+                                      contents_deductible_code == "4" ~ 4000,
+                                      contents_deductible_code == "5" ~ 5000,
+                                      contents_deductible_code == "9" ~ 750,
+                                      contents_deductible_code == "A" ~ 10000,
+                                      contents_deductible_code == "B" ~ 15000,
+                                      contents_deductible_code == "C" ~ 20000,
+                                      contents_deductible_code == "D" ~ 25000,
+                                      contents_deductible_code == "E" ~ 50000,
+                                      contents_deductible_code == "F" ~ 1250,
+                                      contents_deductible_code == "G" ~ 500,
+                                      contents_deductible_code == "H" ~ 200),
+      value_building = building_property_value,
+      value_contents = contents_property_value,
+      replacement_cost_building = building_replacement_cost,
+      replacement_cost_contents = contents_replacement_cost,
+      insurance_coverage_building = total_building_insurance_coverage,
+      insurance_coverage_contents = total_contents_insurance_coverage,
+      damage_building = building_damage_amount,
+      damage_contents = contents_damage_amount,
+      net_payment_building = net_building_payment_amount,
+      net_payment_contents = net_contents_payment_amount,
+      net_payment_icc = net_icc_payment_amount)
 
   return(result)
-
 }
 
 utils::globalVariables(c(
-  "census_tract", "county_geoid", "state_code", "state", "county_code", "county",
-  "state_fips", "county_fips", "id", "state_abb", "county_name", "policy_cost",
-  "policy_count",  "building_deductible_code", "building_property_value", "cause_of_damage",
-  "contents_deductible_code", "contents_property_value", "contents_replacement_cost",
-  "count_units_condo_insured", "count_units_insured", "flood_event", "flood_zone_current",
-  "number_of_units", "total_building_insurance_coverage", "total_contents_insurance_coverage",
-  "year_of_loss"))
+    "building_deductible_code", "building_property_value",
+    "building_replacement_cost", "cause_of_damage", "census_tract",
+    "contents_deductible_code", "contents_damage_amount",
+    "contents_property_value", "contents_replacement_cost",
+    "county", "county_code", "county_fips", "county_geoid",
+    "county_name", "flood_event", "flood_zone_current",
+    "id", "net_building_payment_amount", "net_contents_payment_amount",
+    "net_icc_payment_amount", "occupancy_type", "original_construction_date",
+    "original_construction_year", "policy_count", "state", "state_abb",
+    "state_abbreviation", "state_code", "state_fips", "total_building_insurance_coverage",
+    "total_contents_insurance_coverage", "year_of_loss", "building_damage_amount"
+  ))
diff --git a/R/get_public_assistance.R b/R/get_public_assistance.R
@@ -1,3 +1,5 @@
+.datatable.aware=TRUE
+
 #' Get FEMA Public Assistance (PA) funding
 #'
 #' Project- and county-level data on PA funding over time
@@ -28,20 +30,25 @@
 #'
 #' @return A dataframe of project-level funding requests and awards, along with variables that can be aggregated to the county level.
 #'     \describe{
-#'         \item{id}{A unique identifier for each project in the raw data. This field is not unique in the returned data due to crosswalking statewide projects to the county level. Refer to the details for additional information}
+#'         \item{id}{A unique identifier for each project in the raw data.
+#'            This field is not unique in the returned data due to crosswalking
+#'            statewide projects to the county level. Refer to the details for additional information}
 #'         \item{state_fips}{A two-digit state identifier.}
 #'         \item{state_name}{The name of the state.}
 #'         \item{state_abbreviation}{The two-character USPS abbreviation for the state.}
 #'         \item{county_fips}{A five-digit county identifier.}
 #'         \item{county_name}{The name of the county.}
 #'         \item{declaration_year}{The year when the authorizing disaster declaration was made.}
-#'         \item{disaster_number}{The FEMA-created disaster number. This is unique at the disaster-state level; for example, Hurricane Helene has multiple disaster numbers associated with it, one per state that received an associated disaster declaration.}
+#'         \item{disaster_number}{The FEMA-created disaster number. This is unique
+#'            at the disaster-state level; for example, Hurricane Helene has multiple
+#'            disaster numbers associated with it, one per state that received an associated disaster declaration.}
 #'         \item{incident_type}{The type of disaster, e.g., "Hurricane".}
 #'         \item{project_status}{The current status of the funded PA project, e.g., "Active".}
 #'         \item{damage_category_code}{A letter code identifying the category of damages/what funds may be used for.}
 #'         \item{damage_category_description}{A descriptive characteristization of the damage category.}
 #'         \item{pa_federal_funding_obligated}{Obligated federal funding at the project `id` level.}
-#'         \item{pa_federal_funding_obligated_split}{Obligated federal attributed to the `id`-by-county level. Refer to the details for additional information.}
+#'         \item{pa_federal_funding_obligated_split}{Obligated federal attributed
+#'            to the `id`-by-county level. Refer to the details for additional information.}
 #'     }
 #' @export
 #'
@@ -50,8 +57,6 @@
 #'   get_public_assistance(state_abbreviations = "NJ")
 #' }
 
-.datatable.aware=TRUE
-
 get_public_assistance= function(
     file_path = file.path(
       get_box_path(), "hazards", "fema", "public-assistance", "raw",