PolicyEngine · MaxGhenis · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/changelog.d/remove-reported-spm-inputs.changed b/changelog.d/remove-reported-spm-inputs.changed
@@ -0,0 +1 @@
+Remove reported SPM WIC, school meals, broadband, and tax inputs from CPS outputs in favor of policyengine-us formulas.
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
@@ -444,7 +444,7 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
     # Assume zero housing assistance since
     cps["pre_subsidy_rent"] = cps["rent"]
     cps["housing_assistance"] = np.zeros_like(
-        cps["spm_unit_capped_housing_subsidy_reported"]
+        cps["spm_unit_capped_housing_subsidy_data"]
     )
     cps["real_estate_taxes"] = np.zeros(len(cps["age"]), dtype=float)
     cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]
@@ -633,6 +633,9 @@ def add_takeup(self):
         data["age"],
     )
 
+    for source_anchor in ("snap_reported", "ssi_reported"):
+        data.pop(source_anchor, None)
+
     self.save_dataset(data)
 
 
@@ -1260,9 +1263,8 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
     # The code for strike benefits is 12.
     cps["strike_benefits"] = (person.OI_OFF == 12) * person.OI_VAL
     cps["child_support_received"] = person.CSP_VAL
-    # Assume all public assistance / welfare dollars (PAW_VAL) are TANF.
-    # They could also include General Assistance.
-    cps["tanf_reported"] = person.PAW_VAL
+    # CPS SSI receipt anchors SSI take-up and disability alignment inside
+    # add_takeup; it is dropped before the dataset is saved.
     cps["ssi_reported"] = person.SSI_VAL
     # Allocate CPS RETCB_VAL (a single bundled retirement contribution
     # total) into account-type-specific variables using a proportional
@@ -1397,15 +1399,8 @@ def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
     SPM_RENAMES = dict(
         spm_unit_total_income_reported="SPM_TOTVAL",
         snap_reported="SPM_SNAPSUB",
-        spm_unit_capped_housing_subsidy_reported="SPM_CAPHOUSESUB",
-        free_school_meals_reported="SPM_SCHLUNCH",
-        spm_unit_energy_subsidy_reported="SPM_ENGVAL",
-        spm_unit_wic_reported="SPM_WICVAL",
-        spm_unit_broadband_subsidy_reported="SPM_BBSUBVAL",
-        spm_unit_payroll_tax_reported="SPM_FICA",
-        spm_unit_federal_tax_reported="SPM_FEDTAX",
-        # State tax includes refundable credits.
-        spm_unit_state_tax_reported="SPM_STTAX",
+        spm_unit_capped_housing_subsidy_data="SPM_CAPHOUSESUB",
+        spm_unit_energy_subsidy_data="SPM_ENGVAL",
         spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
         spm_unit_net_income_reported="SPM_RESOURCES",
         spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
@@ -1425,8 +1420,6 @@ def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
             spm_unit.SPM_TENMORTSTATUS.map(tenure_map).fillna("RENTER").astype("S")
         )
 
-    cps["reduced_price_school_meals_reported"] = cps["free_school_meals_reported"] * 0
-
 
 @pipeline_node(
     PipelineNode(

diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -161,8 +161,6 @@ def _supports_structural_mortgage_inputs() -> bool:
     "social_security_survivors",
     # Transfer income
     "unemployment_compensation",
-    "tanf_reported",
-    "ssi_reported",
     "child_support_received",
     "veterans_benefits",
     "workers_compensation",
@@ -171,15 +169,8 @@ def _supports_structural_mortgage_inputs() -> bool:
     "receives_wic",
     # SPM variables
     "spm_unit_total_income_reported",
-    "snap_reported",
-    "spm_unit_capped_housing_subsidy_reported",
-    "free_school_meals_reported",
-    "spm_unit_energy_subsidy_reported",
-    "spm_unit_wic_reported",
-    "spm_unit_broadband_subsidy_reported",
-    "spm_unit_payroll_tax_reported",
-    "spm_unit_federal_tax_reported",
-    "spm_unit_state_tax_reported",
+    "spm_unit_capped_housing_subsidy_data",
+    "spm_unit_energy_subsidy_data",
     "spm_unit_net_income_reported",
     "spm_unit_pre_subsidy_childcare_expenses",
     # Medical expenses

diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
@@ -291,15 +291,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
             "year": 2024,
         },
         {
-            "constraint_variable": "spm_unit_energy_subsidy_reported",
+            "constraint_variable": "spm_unit_energy_subsidy_data",
             "target_variable": "household_count",
             "household_count": 5_939_605,
             "source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf",
             "notes": "LIHEAP total households served by state programs",
             "year": 2023,
         },
         {
-            "constraint_variable": "spm_unit_energy_subsidy_reported",
+            "constraint_variable": "spm_unit_energy_subsidy_data",
             "target_variable": "household_count",
             "household_count": 5_876_646,
             "source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2024/FY2024_AllStates%28National%29_Profile.pdf",
@@ -718,7 +718,7 @@ def load_national_targets(
                 stratum_notes = "National ACA Premium Tax Credit Recipients"
                 constraint_operation = ">"
                 constraint_value = "0"
-            elif constraint_var == "spm_unit_energy_subsidy_reported":
+            elif constraint_var == "spm_unit_energy_subsidy_data":
                 stratum_notes = "National LIHEAP Recipient Households"
                 constraint_operation = ">"
                 constraint_value = "0"

diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
@@ -1815,10 +1815,9 @@ def _add_snap_metric_columns(
     """
     snap_targets = pd.read_csv(CALIBRATION_FOLDER / "snap_state.csv")
 
-    snap_cost = sim.calculate("snap_reported", map_to="household").values
-    snap_hhs = (sim.calculate("snap_reported", map_to="household").values > 0).astype(
-        int
-    )
+    snap = sim.calculate("snap", map_to="household").values
+    snap_cost = snap
+    snap_hhs = (snap > 0).astype(int)
 
     state = sim.calculate("state_code", map_to="person").values
     state = sim.map_result(state, "person", "household", how="value_from_first_person")

diff --git a/policyengine_us_data/utils/national_target_parity.py b/policyengine_us_data/utils/national_target_parity.py
@@ -482,9 +482,9 @@ def classify_national_target(
             target_name,
             index.match(
                 variable="household_count",
-                domain_variable="spm_unit_energy_subsidy_reported",
+                domain_variable="spm_unit_energy_subsidy_data",
                 period=period,
-                constraints=[_constraint("spm_unit_energy_subsidy_reported", ">", 0)],
+                constraints=[_constraint("spm_unit_energy_subsidy_data", ">", 0)],
             ),
             reason="structured_liheap_target",
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
-    "policyengine-us>=1.691.1",
+    "policyengine-us>=1.691.3",
     # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
     # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
     # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.

diff --git a/tests/integration/support/tiny_stage_3.py b/tests/integration/support/tiny_stage_3.py
@@ -35,8 +35,6 @@
             "cps_race",
             "detailed_occupation_recode",
             "treasury_tipped_occupation_code",
-            "tanf_reported",
-            "ssi_reported",
             "is_puf_clone",
         )
     )
@@ -50,8 +48,7 @@
             "tax_unit_is_joint",
             "spm_unit_total_income_reported",
             "spm_unit_net_income_reported",
-            "spm_unit_capped_housing_subsidy_reported",
-            "snap_reported",
+            "spm_unit_capped_housing_subsidy_data",
             "household_is_puf_clone",
         )
     )
@@ -224,8 +221,6 @@ def _extended_person_arrays(
             person_count,
             dtype=np.int16,
         ),
-        "tanf_reported": np.zeros(person_count, dtype=np.float32),
-        "ssi_reported": np.zeros(person_count, dtype=np.float32),
         "is_puf_clone": np.concatenate(
             [
                 np.zeros(cps_person_count, dtype=np.bool_),
@@ -260,12 +255,11 @@ def _extended_group_arrays(
         "spm_unit_net_income_reported": np.round(total_income * 0.85, 2).astype(
             np.float32
         ),
-        "spm_unit_capped_housing_subsidy_reported": np.where(
+        "spm_unit_capped_housing_subsidy_data": np.where(
             arrays["tenure_type"] == b"RENTED",
             1_200,
             0,
         ).astype(np.float32),
-        "snap_reported": np.where(total_income < 50_000, 1_000, 0).astype(np.float32),
         "household_is_puf_clone": np.concatenate(
             [
                 np.zeros(cps_household_count, dtype=np.bool_),

diff --git a/tests/integration/test_cps_generation.py b/tests/integration/test_cps_generation.py
@@ -211,7 +211,7 @@ def fit(self, X_train, predictors, imputed_variables):
     cps = {
         "age": np.array([40, 12, 70], dtype=np.int32),
         "is_household_head": np.array([True, False, True], dtype=bool),
-        "spm_unit_capped_housing_subsidy_reported": np.zeros(3, dtype=np.float32),
+        "spm_unit_capped_housing_subsidy_data": np.zeros(3, dtype=np.float32),
     }
     person = pd.DataFrame({"P_SEQ": [1, 2, 1]})
     household = pd.DataFrame({"H_TENURE": [2, 1]})
@@ -225,3 +225,46 @@ def fit(self, X_train, predictors, imputed_variables):
         np.array([0, 0, 4000], dtype=np.int32),
     )
     assert not dataset.file_path.exists()
+
+
+def test_add_spm_variables_keeps_formulaic_outputs_out_of_dataset():
+    from policyengine_us_data.datasets.cps.cps import add_spm_variables
+
+    cps = {}
+    spm_unit = pd.DataFrame(
+        {
+            "SPM_TOTVAL": [50_000],
+            "SPM_RESOURCES": [45_000],
+            "SPM_SNAPSUB": [1_200],
+            "SPM_CAPHOUSESUB": [3_000],
+            "SPM_ENGVAL": [500],
+            "SPM_SCHLUNCH": [800],
+            "SPM_WICVAL": [200],
+            "SPM_BBSUBVAL": [360],
+            "SPM_FICA": [3_825],
+            "SPM_FEDTAX": [2_000],
+            "SPM_STTAX": [1_000],
+            "SPM_CAPWKCCXPNS": [4_000],
+            "SPM_CHILDCAREXPNS": [4_500],
+            "SPM_TENMORTSTATUS": [3],
+        }
+    )
+
+    add_spm_variables(None, cps, spm_unit)
+
+    assert cps["spm_unit_total_income_reported"].tolist() == [50_000]
+    assert cps["spm_unit_net_income_reported"].tolist() == [45_000]
+    assert cps["snap_reported"].tolist() == [1_200]
+    assert cps["spm_unit_capped_housing_subsidy_data"].tolist() == [3_000]
+    assert cps["spm_unit_energy_subsidy_data"].tolist() == [500]
+    assert cps["spm_unit_tenure_type"].tolist() == [b"RENTER"]
+    for variable in (
+        "free_school_meals_reported",
+        "reduced_price_school_meals_reported",
+        "spm_unit_wic_reported",
+        "spm_unit_broadband_subsidy_reported",
+        "spm_unit_payroll_tax_reported",
+        "spm_unit_federal_tax_reported",
+        "spm_unit_state_tax_reported",
+    ):
+        assert variable not in cps
diff --git a/tests/unit/datasets/test_cps_file_handles.py b/tests/unit/datasets/test_cps_file_handles.py
@@ -479,7 +479,7 @@ class FakeACS_2022:
     dataset = FakeDataset()
     cps = {
         "age": np.array([40], dtype=np.int32),
-        "spm_unit_capped_housing_subsidy_reported": np.array([0.0]),
+        "spm_unit_capped_housing_subsidy_data": np.array([0.0]),
         # add_id_variables populates this upstream of add_rent in the real
         # pipeline; see the policyengine-core#482 workaround override below.
         "is_household_head": np.array([True]),

diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py
@@ -190,7 +190,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey
 
     conditional_targets = [
         {
-            "constraint_variable": "spm_unit_energy_subsidy_reported",
+            "constraint_variable": "spm_unit_energy_subsidy_data",
             "target_variable": "household_count",
             "household_count": 5_876_646,
             "source": "https://example.com/liheap-2024.pdf",
@@ -222,7 +222,7 @@ def test_load_national_targets_supports_liheap_household_counts(tmp_path, monkey
             )
             for constraint in liheap_stratum.constraints_rel
         }
-        assert ("spm_unit_energy_subsidy_reported", ">", "0") in constraints
+        assert ("spm_unit_energy_subsidy_data", ">", "0") in constraints
 
         liheap_target = session.exec(
             select(Target).where(

diff --git a/uv.lock b/uv.lock
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Remove reported SPM WIC, school meals, broadband, and tax inputs from CPS outputs in favor of policyengine-us formulas.