Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/954.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Drop formulaic SPM poverty outputs from cloned and local-area H5 exports.
5 changes: 4 additions & 1 deletion policyengine_us_data/calibration/entity_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from policyengine_us_data.calibration.block_assignment import (
derive_geography_from_blocks,
)
from policyengine_us_data.calibration.formulaic_inputs import (
drop_formulaic_spm_inputs,
)
from policyengine_us_data.datasets.puf.variable_roles import (
PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES,
)
Expand Down Expand Up @@ -270,7 +273,7 @@ def materialize_clone_household_chunk(
vars_to_save = (
set(sim.input_variables) - PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES
)
vars_to_save.discard("spm_unit_spm_threshold")
drop_formulaic_spm_inputs(vars_to_save)
vars_to_save.add("county")
vars_to_save.add("congressional_district_geoid")
for geo_var in [
Expand Down
19 changes: 19 additions & 0 deletions policyengine_us_data/calibration/formulaic_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Formula outputs that must not be persisted as dataset leaf inputs."""

FORMULAIC_SPM_INPUTS_TO_DROP = frozenset(
{
"person_in_poverty",
"in_poverty",
"in_deep_poverty",
"spm_unit_is_in_spm_poverty",
"spm_unit_is_in_deep_spm_poverty",
"spm_unit_spm_threshold",
"spm_unit_geographic_adjustment",
}
)


def drop_formulaic_spm_inputs(variable_names: set[str]) -> None:
"""Remove SPM formula outputs from a mutable variable-name set."""

variable_names.difference_update(FORMULAIC_SPM_INPUTS_TO_DROP)
5 changes: 4 additions & 1 deletion policyengine_us_data/calibration/publish_local_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
from policyengine_us_data.calibration.block_assignment import (
derive_geography_from_blocks,
)
from policyengine_us_data.calibration.formulaic_inputs import (
drop_formulaic_spm_inputs,
)
from policyengine_us_data.datasets.puf.variable_roles import (
PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES,
)
Expand Down Expand Up @@ -517,7 +520,7 @@ def build_h5(
vars_to_save = (
set(sim.input_variables) - PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES
)
vars_to_save.discard("spm_unit_spm_threshold")
drop_formulaic_spm_inputs(vars_to_save)
vars_to_save.add("county")
vars_to_save.add("congressional_district_geoid")
for gv in [
Expand Down
13 changes: 12 additions & 1 deletion tests/unit/calibration/test_entity_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,15 @@ def test_materialize_clone_household_chunk_drops_legacy_spm_threshold_input(
fixture_entity_maps,
):
original_input_variables = list(fixture_sim.input_variables)
fixture_sim.input_variables.append("spm_unit_spm_threshold")
fixture_sim.input_variables.extend(
[
"person_in_poverty",
"in_poverty",
"in_deep_poverty",
"spm_unit_spm_threshold",
"spm_unit_geographic_adjustment",
]
)
monkeypatch.setattr(
"policyengine_us_data.calibration.entity_clone.derive_geography_from_blocks",
_fake_geography_from_blocks,
Expand All @@ -131,6 +139,9 @@ def test_materialize_clone_household_chunk_drops_legacy_spm_threshold_input(
fixture_sim.input_variables = original_input_variables

with h5py.File(output_path, "r") as h5:
assert "person_in_poverty" not in h5
assert "in_poverty" not in h5
assert "in_deep_poverty" not in h5
assert "spm_unit_spm_threshold" not in h5
assert "spm_unit_geographic_adjustment" not in h5

Expand Down
23 changes: 23 additions & 0 deletions tests/unit/calibration/test_formulaic_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from policyengine_us_data.calibration.formulaic_inputs import (
FORMULAIC_SPM_INPUTS_TO_DROP,
drop_formulaic_spm_inputs,
)


def test_drop_formulaic_spm_inputs_removes_poverty_formula_outputs():
variable_names = {
"person_in_poverty",
"in_poverty",
"in_deep_poverty",
"spm_unit_spm_threshold",
"spm_unit_geographic_adjustment",
"household_weight",
}

drop_formulaic_spm_inputs(variable_names)

assert variable_names == {"household_weight"}


def test_formulaic_spm_inputs_includes_person_poverty():
assert "person_in_poverty" in FORMULAIC_SPM_INPUTS_TO_DROP
1 change: 1 addition & 0 deletions tests/unit/test_extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def test_spm_threshold_is_formula_output_not_qrf_imputed(self):
assert "spm_unit_spm_threshold" not in set(CPS_ONLY_IMPUTED_VARIABLES)
assert "spm_unit_spm_threshold" not in ExtendedCPS._keep_formula_vars()
assert "spm_unit_geographic_adjustment" not in ExtendedCPS._keep_formula_vars()
assert "person_in_poverty" not in ExtendedCPS._keep_formula_vars()

def test_weeks_worked_is_preserved_for_future_year_formulas(self):
assert "weeks_worked" in ExtendedCPS._keep_formula_vars()
Expand Down