Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,7 @@ Suggests:
knitr,
qualtRics,
rmarkdown,
testthat (>= 3.0.0),
tidyverse
VignetteBuilder: knitr
Config/testthat/edition: 3
42 changes: 29 additions & 13 deletions R/get_business_patterns.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
#' Obtain County Business Patterns (CBP) Estimates per County
#'
#' @param year The vintage of CBP data desired. Data are available from 1986, though this function likely only supports more recent years (it it tested on 2022-vintage data only). Default is 2022.
#' @param naics_code_digits One of c(2, 3). Default is 2. NAICS codes range in specificity; 2-digit codes describe the highest groupings of industries, while six-digit codes are exceedingly detailed. There are 20 2-digit NAICS codes and 196 3-digit codes.
#' @param naics_codes A vector of NAICS codes to query. If NULL, the function will query all available codes with the specified number of digits. If not NULL, this argument overrides the `naics_code_digits` argument.
#' @return A tibble with data on county-level employees, employers, and aggregate annual payrolls by industry and employer size
#' @param year The vintage of CBP data desired. Data are available from 1986,
#' though this function likely only supports more recent years (it it tested on 2022-vintage data only).
#' Default is 2022.
#' @param naics_code_digits One of c(2, 3). Default is 2. NAICS codes range in
#' specificity; 2-digit codes describe the highest groupings of industries,
#' while six-digit codes are exceedingly detailed. There are 20 2-digit NAICS
#' codes and 196 3-digit codes. If more specific codes are desired, leave this
#' argument as NULL and supply the desired codes as the argument to `naics_codes`.
#' @param naics_codes A vector of NAICS codes to query. If NULL, the function will
#' query all available codes with the specified number of digits. If not NULL,
#' this argument overrides the `naics_code_digits` argument.
#' @return A tibble with data on county-level employees, employers, and aggregate
#' annual payrolls by industry and employer size
#' @export
#'
#' @examples
Expand All @@ -23,10 +32,14 @@ get_business_patterns = function(year = 2022, naics_code_digits = 2, naics_codes
stop("`naics_code_digits` must be one of c(2, 3). For more detailed codes, explicitly pass desired codes to the `naics_codes` parameter.") }

naics_codes_metadata = censusapi::listCensusMetadata(
name = "cbp",
vintage = "2022",
type = "variables",
include_values = TRUE)
name = "cbp",
vintage = "2022",
type = "variables",
include_values = TRUE) %>%
#filter out codes 92 and 95 which do not appear to have data associated and
#don't appear on the census list of naics codes at
#https://www2.census.gov/programs-surveys/cbp/technical-documentation/reference/naics-descriptions/naics2017.txt
dplyr::filter(!stringr::str_starts(values_code, "92|95"))

if (!is.null(naics_codes)) {
naics_code_check = naics_codes_metadata %>%
Expand Down Expand Up @@ -62,15 +75,16 @@ get_business_patterns = function(year = 2022, naics_code_digits = 2, naics_codes
~ tryCatch({
censusapi::getCensus(
name = "cbp",
vintage = 2022,
vintage = year,
vars = c(
"EMP",
"ESTAB",
"PAYANN",
"EMPSZES",
"NAICS2017_LABEL"),
region = "county:*",
NAICS2017 = .x)},
NAICS2017 = .x) %>%
mutate(naics_code = .x)},
error = function(e) {
message("Error in NAICS2017: ", .x)
return(tibble::tibble())})) %>%
Expand All @@ -81,11 +95,13 @@ get_business_patterns = function(year = 2022, naics_code_digits = 2, naics_codes
employers = ESTAB,
annual_payroll = PAYANN,
employee_size_range = EMPSZES,
industry = NAICS2017_LABEL) %>%
industry = NAICS2017_LABEL,
naics_code) %>%
dplyr::mutate(
industry = industry %>%
stringr::str_to_lower() %>%
stringr::str_replace_all(c(" " = "_", ",|\\(|\\)|_for_all_sectors|and_" = "")),
year = year,
## this recoding is mapped from: https://www2.census.gov/programs-surveys/bds/technical-documentation/label_empszes.csv
employee_size_range_label = dplyr::case_when(
employee_size_range == "001" ~ "All establishments",
Expand Down Expand Up @@ -137,12 +153,12 @@ get_business_patterns = function(year = 2022, naics_code_digits = 2, naics_codes
stringr::str_extract(employee_size_range_label, "[0-9]{4}") %>% as.numeric >= 1000 ~ "1000+",
TRUE ~ employee_size_range_label)) %>%
dplyr::rename(employee_size_range_code = employee_size_range) %>%
dplyr::select(state, county, employees, employers, annual_payroll, industry, employee_size_range_label, employee_size_range_code)
dplyr::select(year, state, county, employees, employers, annual_payroll, industry, employee_size_range_label, employee_size_range_code, naics_code)

return(cbp)
}

utils::globalVariables(
c("EMP", "EMPSZES", "ESTAB", "NAICS2017_LABEL", "PAYANN", "annual_payroll",
"employee_size_range", "employee_size_range_code", "employee_size_range_label",
"employees", "employers", "industry", "values_code"))
"employees", "employers", "industry", "values_code", "naics_code"))
38 changes: 25 additions & 13 deletions R/get_lodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,26 @@ rename_lodes_variables = function(.df) {
#' Get LEHD Origin-Destination Employment Statistics (LODES) data
#' Returned data are from LODES Version 8, which is enumerated in 2020-vintage geometries.
#'
#' @param lodes_type One of c("rac", "wac", "od"). "rac" = Residence Area Characteristics, where jobs are associated with employees' residences. "wac" = Workplace Area Characteristics, where jobs are associated with employees' workplaces. "od" = Origin-Destination data, where jobs are associated with both workers' residences and their workplaces.
#' @param jobs_type One of c("all", "primary"). Default is "all", which includes multiple jobs for workers with multiple jobs. "primary" includes only the highest-paying job per worker.
#' @param lodes_type One of c("rac", "wac", "od"). "rac" = Residence Area
#' Characteristics, where jobs are associated with employees' residences.
#' "wac" = Workplace Area Characteristics, where jobs are associated with
#' employees' workplaces. "od" = Origin-Destination data, where jobs are associated
#' with both workers' residences and their workplaces.
#' @param jobs_type One of c("all", "primary"). Default is "all", which includes
#' multiple jobs for workers with multiple jobs. "primary" includes only the
#' highest-paying job per worker.
#' @param states A vector of state abbreviations.
#' @param years A vector of years.
#' @param geography One of c("block", "block group", "tract", "county", "state"). Default is "tract".
#' @param state_part One of c("main", "aux"). Default is "main", which includes only workers who reside inside the state where they work. "aux" returns only workers who work in the specified state but live outside of that state.
#' @param geography One of c("block", "block group", "tract", "county", "state").
#' Default is "tract".
#' @param state_part One of c("main", "aux"). Default is "main", which includes
#' only workers who reside inside the state where they work. "aux" returns
#' only workers who work in the specified state but live outside of that state.
#'
#' @return A tibble with one record per geography per year per job type. Attributes include total jobs and jobs by worker earnings, industry, and demographics; the origin-destination results have more limited demographics compared to the "wac" and "rac" results.
#' @return A tibble with one record per geography per year per job type. Attributes
#' include total jobs and jobs by worker earnings, industry, and demographics;
#' the origin-destination results have more limited demographics compared to
#' the "wac" and "rac" results.
#' @export
get_lodes = function(
lodes_type,
Expand Down Expand Up @@ -202,7 +214,7 @@ Returning for only those states that are available for all specified years.\n")

#https://lehd.ces.census.gov/doc/help/onthemap/LODESDataNote-FedEmp2015.pdf

## geography identifying variables are variably-named across different geography
## geography-identifying variables are variably named across different geography
## parameters; we standardize these to always be "GEOID"
geoid_rename = c("_geocode|_tract|_bg|_county|_state" = "_GEOID")

Expand All @@ -212,12 +224,7 @@ Returning for only those states that are available for all specified years.\n")
jobs_type_all = "JT01"
jobs_type_federal = "JT05" }

# states = "TX"
# years = 2022
# agg_geo = "tract"
# lodes_type = "od"

## else this is noisy
## supress messages/warnings else this is noisy
suppressWarnings({suppressMessages({
lodes_all_jobs = lehdr::grab_lodes(
state = states,
Expand Down Expand Up @@ -255,8 +262,13 @@ as NA.\n") }
dplyr::select(-dplyr::matches("create")) })})

join_by = c("year", "GEOID")

if (lodes_type == "od") {
join_by = c("year", "w_GEOID", "h_GEOID") }
join_by = c("year", "w_GEOID", "h_GEOID") } else if (lodes_type == "rac") {
join_by = c("year", "h_GEOID")
} else if (lodes_type == "wac") {
join_by = c("year", "w_GEOID")
}

## both all jobs and all federal jobs
lodes_all_nonfederal_jobs = lodes_all_jobs %>%
Expand Down
17 changes: 13 additions & 4 deletions man/get_business_patterns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 17 additions & 5 deletions man/get_lodes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
# Where should you do additional test configuration?
# Learn more about the roles of various files in:
# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
# * https://testthat.r-lib.org/articles/special-files.html

library(testthat)
library(climateapi)

test_check("climateapi")
3 changes: 3 additions & 0 deletions tests/testthat/test-get_business_patterns.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
testthat::test_that("naics_code_digits errors clearly when not in c(2,3)", {
testthat::expect_error({get_business_patterns(year = 2022, naics_code_digits = 3)})
})
3 changes: 3 additions & 0 deletions tests/testthat/test-get_lodes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test_that("multiplication works", {
expect_equal(2 * 2, 4)
})