Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/remove-reported-spm-inputs.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove reported SPM WIC, school meals, broadband, and tax inputs from CPS outputs in favor of policyengine-us formulas.
23 changes: 8 additions & 15 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
# Assume zero housing assistance since
cps["pre_subsidy_rent"] = cps["rent"]
cps["housing_assistance"] = np.zeros_like(
cps["spm_unit_capped_housing_subsidy_reported"]
cps["spm_unit_capped_housing_subsidy_data"]
)
cps["real_estate_taxes"] = np.zeros(len(cps["age"]), dtype=float)
cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]
Expand Down Expand Up @@ -633,6 +633,9 @@ def add_takeup(self):
data["age"],
)

for source_anchor in ("snap_reported", "ssi_reported"):
data.pop(source_anchor, None)

self.save_dataset(data)


Expand Down Expand Up @@ -1260,9 +1263,8 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
# The code for strike benefits is 12.
cps["strike_benefits"] = (person.OI_OFF == 12) * person.OI_VAL
cps["child_support_received"] = person.CSP_VAL
# Assume all public assistance / welfare dollars (PAW_VAL) are TANF.
# They could also include General Assistance.
cps["tanf_reported"] = person.PAW_VAL
# CPS SSI receipt anchors SSI take-up and disability alignment inside
# add_takeup; it is dropped before the dataset is saved.
cps["ssi_reported"] = person.SSI_VAL
# Allocate CPS RETCB_VAL (a single bundled retirement contribution
# total) into account-type-specific variables using a proportional
Expand Down Expand Up @@ -1397,15 +1399,8 @@ def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
SPM_RENAMES = dict(
spm_unit_total_income_reported="SPM_TOTVAL",
snap_reported="SPM_SNAPSUB",
spm_unit_capped_housing_subsidy_reported="SPM_CAPHOUSESUB",
free_school_meals_reported="SPM_SCHLUNCH",
spm_unit_energy_subsidy_reported="SPM_ENGVAL",
spm_unit_wic_reported="SPM_WICVAL",
spm_unit_broadband_subsidy_reported="SPM_BBSUBVAL",
spm_unit_payroll_tax_reported="SPM_FICA",
spm_unit_federal_tax_reported="SPM_FEDTAX",
# State tax includes refundable credits.
spm_unit_state_tax_reported="SPM_STTAX",
spm_unit_capped_housing_subsidy_data="SPM_CAPHOUSESUB",
spm_unit_energy_subsidy_data="SPM_ENGVAL",
spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
spm_unit_net_income_reported="SPM_RESOURCES",
spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
Expand All @@ -1425,8 +1420,6 @@ def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
spm_unit.SPM_TENMORTSTATUS.map(tenure_map).fillna("RENTER").astype("S")
)

cps["reduced_price_school_meals_reported"] = cps["free_school_meals_reported"] * 0


@pipeline_node(
PipelineNode(
Expand Down
13 changes: 2 additions & 11 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ def _supports_structural_mortgage_inputs() -> bool:
"social_security_survivors",
# Transfer income
"unemployment_compensation",
"tanf_reported",
"ssi_reported",
"child_support_received",
"veterans_benefits",
"workers_compensation",
Expand All @@ -171,15 +169,8 @@ def _supports_structural_mortgage_inputs() -> bool:
"receives_wic",
# SPM variables
"spm_unit_total_income_reported",
"snap_reported",
"spm_unit_capped_housing_subsidy_reported",
"free_school_meals_reported",
"spm_unit_energy_subsidy_reported",
"spm_unit_wic_reported",
"spm_unit_broadband_subsidy_reported",
"spm_unit_payroll_tax_reported",
"spm_unit_federal_tax_reported",
"spm_unit_state_tax_reported",
"spm_unit_capped_housing_subsidy_data",
"spm_unit_energy_subsidy_data",
"spm_unit_net_income_reported",
"spm_unit_pre_subsidy_childcare_expenses",
# Medical expenses
Expand Down
6 changes: 3 additions & 3 deletions policyengine_us_data/db/etl_national_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
"year": 2024,
},
{
"constraint_variable": "spm_unit_energy_subsidy_reported",
"constraint_variable": "spm_unit_energy_subsidy_data",
"target_variable": "household_count",
"household_count": 5_939_605,
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf",
"notes": "LIHEAP total households served by state programs",
"year": 2023,
},
{
"constraint_variable": "spm_unit_energy_subsidy_reported",
"constraint_variable": "spm_unit_energy_subsidy_data",
"target_variable": "household_count",
"household_count": 5_876_646,
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2024/FY2024_AllStates%28National%29_Profile.pdf",
Expand Down Expand Up @@ -718,7 +718,7 @@ def load_national_targets(
stratum_notes = "National ACA Premium Tax Credit Recipients"
constraint_operation = ">"
constraint_value = "0"
elif constraint_var == "spm_unit_energy_subsidy_reported":
elif constraint_var == "spm_unit_energy_subsidy_data":
stratum_notes = "National LIHEAP Recipient Households"
constraint_operation = ">"
constraint_value = "0"
Expand Down
7 changes: 3 additions & 4 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -1815,10 +1815,9 @@ def _add_snap_metric_columns(
"""
snap_targets = pd.read_csv(CALIBRATION_FOLDER / "snap_state.csv")

snap_cost = sim.calculate("snap_reported", map_to="household").values
snap_hhs = (sim.calculate("snap_reported", map_to="household").values > 0).astype(
int
)
snap = sim.calculate("snap", map_to="household").values
snap_cost = snap
snap_hhs = (snap > 0).astype(int)

state = sim.calculate("state_code", map_to="person").values
state = sim.map_result(state, "person", "household", how="value_from_first_person")
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/utils/national_target_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,9 @@ def classify_national_target(
target_name,
index.match(
variable="household_count",
domain_variable="spm_unit_energy_subsidy_reported",
domain_variable="spm_unit_energy_subsidy_data",
period=period,
constraints=[_constraint("spm_unit_energy_subsidy_reported", ">", 0)],
constraints=[_constraint("spm_unit_energy_subsidy_data", ">", 0)],
),
reason="structured_liheap_target",
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"policyengine-us>=1.691.1",
"policyengine-us>=1.691.3",
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.
Expand Down
10 changes: 2 additions & 8 deletions tests/integration/support/tiny_stage_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
"cps_race",
"detailed_occupation_recode",
"treasury_tipped_occupation_code",
"tanf_reported",
"ssi_reported",
"is_puf_clone",
)
)
Expand All @@ -50,8 +48,7 @@
"tax_unit_is_joint",
"spm_unit_total_income_reported",
"spm_unit_net_income_reported",
"spm_unit_capped_housing_subsidy_reported",
"snap_reported",
"spm_unit_capped_housing_subsidy_data",
"household_is_puf_clone",
)
)
Expand Down Expand Up @@ -224,8 +221,6 @@ def _extended_person_arrays(
person_count,
dtype=np.int16,
),
"tanf_reported": np.zeros(person_count, dtype=np.float32),
"ssi_reported": np.zeros(person_count, dtype=np.float32),
"is_puf_clone": np.concatenate(
[
np.zeros(cps_person_count, dtype=np.bool_),
Expand Down Expand Up @@ -260,12 +255,11 @@ def _extended_group_arrays(
"spm_unit_net_income_reported": np.round(total_income * 0.85, 2).astype(
np.float32
),
"spm_unit_capped_housing_subsidy_reported": np.where(
"spm_unit_capped_housing_subsidy_data": np.where(
arrays["tenure_type"] == b"RENTED",
1_200,
0,
).astype(np.float32),
"snap_reported": np.where(total_income < 50_000, 1_000, 0).astype(np.float32),
"household_is_puf_clone": np.concatenate(
[
np.zeros(cps_household_count, dtype=np.bool_),
Expand Down
45 changes: 44 additions & 1 deletion tests/integration/test_cps_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def fit(self, X_train, predictors, imputed_variables):
cps = {
"age": np.array([40, 12, 70], dtype=np.int32),
"is_household_head": np.array([True, False, True], dtype=bool),
"spm_unit_capped_housing_subsidy_reported": np.zeros(3, dtype=np.float32),
"spm_unit_capped_housing_subsidy_data": np.zeros(3, dtype=np.float32),
}
person = pd.DataFrame({"P_SEQ": [1, 2, 1]})
household = pd.DataFrame({"H_TENURE": [2, 1]})
Expand All @@ -225,3 +225,46 @@ def fit(self, X_train, predictors, imputed_variables):
np.array([0, 0, 4000], dtype=np.int32),
)
assert not dataset.file_path.exists()


def test_add_spm_variables_keeps_formulaic_outputs_out_of_dataset():
from policyengine_us_data.datasets.cps.cps import add_spm_variables

cps = {}
spm_unit = pd.DataFrame(
{
"SPM_TOTVAL": [50_000],
"SPM_RESOURCES": [45_000],
"SPM_SNAPSUB": [1_200],
"SPM_CAPHOUSESUB": [3_000],
"SPM_ENGVAL": [500],
"SPM_SCHLUNCH": [800],
"SPM_WICVAL": [200],
"SPM_BBSUBVAL": [360],
"SPM_FICA": [3_825],
"SPM_FEDTAX": [2_000],
"SPM_STTAX": [1_000],
"SPM_CAPWKCCXPNS": [4_000],
"SPM_CHILDCAREXPNS": [4_500],
"SPM_TENMORTSTATUS": [3],
}
)

add_spm_variables(None, cps, spm_unit)

assert cps["spm_unit_total_income_reported"].tolist() == [50_000]
assert cps["spm_unit_net_income_reported"].tolist() == [45_000]
assert cps["snap_reported"].tolist() == [1_200]
assert cps["spm_unit_capped_housing_subsidy_data"].tolist() == [3_000]
assert cps["spm_unit_energy_subsidy_data"].tolist() == [500]
assert cps["spm_unit_tenure_type"].tolist() == [b"RENTER"]
for variable in (
"free_school_meals_reported",
"reduced_price_school_meals_reported",
"spm_unit_wic_reported",
"spm_unit_broadband_subsidy_reported",
"spm_unit_payroll_tax_reported",
"spm_unit_federal_tax_reported",
"spm_unit_state_tax_reported",
):
assert variable not in cps
2 changes: 1 addition & 1 deletion tests/unit/datasets/test_cps_file_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ class FakeACS_2022:
dataset = FakeDataset()
cps = {
"age": np.array([40], dtype=np.int32),
"spm_unit_capped_housing_subsidy_reported": np.array([0.0]),
"spm_unit_capped_housing_subsidy_data": np.array([0.0]),
# add_id_variables populates this upstream of add_rent in the real
# pipeline; see the policyengine-core#482 workaround override below.
"is_household_head": np.array([True]),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_etl_national_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey

conditional_targets = [
{
"constraint_variable": "spm_unit_energy_subsidy_reported",
"constraint_variable": "spm_unit_energy_subsidy_data",
"target_variable": "household_count",
"household_count": 5_876_646,
"source": "https://example.com/liheap-2024.pdf",
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey
)
for constraint in liheap_stratum.constraints_rel
}
assert ("spm_unit_energy_subsidy_reported", ">", "0") in constraints
assert ("spm_unit_energy_subsidy_data", ">", "0") in constraints

liheap_target = session.exec(
select(Target).where(
Expand Down
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading