Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/long_run_projection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ on:
required: false
default: ""
type: string
support_augmentation_sanitize_worker_non_target_income:
description: "Zero worker-donor clone investment and retirement income"
required: false
default: false
type: boolean
support_augmentation_sanitize_clone_non_target_income:
description: "Zero all donor-clone investment and retirement income"
required: false
default: false
type: boolean
allow_validation_failures:
description: "Allow invalid artifacts to be written for diagnostics"
required: false
Expand Down Expand Up @@ -146,6 +156,8 @@ jobs:
SUPPORT_AUGMENTATION_DONORS_PER_TARGET: ${{ inputs.support_augmentation_donors_per_target }}
SUPPORT_AUGMENTATION_MAX_DISTANCE: ${{ inputs.support_augmentation_max_distance }}
SUPPORT_AUGMENTATION_PROFILE: ${{ inputs.support_augmentation_profile }}
SUPPORT_AUGMENTATION_SANITIZE_CLONE_NON_TARGET_INCOME: ${{ inputs.support_augmentation_sanitize_clone_non_target_income }}
SUPPORT_AUGMENTATION_SANITIZE_WORKER_NON_TARGET_INCOME: ${{ inputs.support_augmentation_sanitize_worker_non_target_income }}
SUPPORT_AUGMENTATION_START_YEAR: ${{ inputs.support_augmentation_start_year }}
SUPPORT_AUGMENTATION_TARGET_YEAR: ${{ inputs.support_augmentation_target_year }}
SUPPORT_AUGMENTATION_TOP_N_TARGETS: ${{ inputs.support_augmentation_top_n_targets }}
Expand Down Expand Up @@ -199,6 +211,12 @@ jobs:
if [ -n "${SUPPORT_AUGMENTATION_BLUEPRINT_BASE_WEIGHT_SCALE}" ]; then
cmd+=(--support-augmentation-blueprint-base-weight-scale "${SUPPORT_AUGMENTATION_BLUEPRINT_BASE_WEIGHT_SCALE}")
fi
if [ "${SUPPORT_AUGMENTATION_SANITIZE_WORKER_NON_TARGET_INCOME}" = "true" ]; then
cmd+=(--support-augmentation-sanitize-worker-non-target-income)
fi
if [ "${SUPPORT_AUGMENTATION_SANITIZE_CLONE_NON_TARGET_INCOME}" = "true" ]; then
cmd+=(--support-augmentation-sanitize-clone-non-target-income)
fi
if [ "${ALLOW_VALIDATION_FAILURES}" = "true" ]; then
cmd+=(--allow-validation-failures)
fi
Expand Down
1 change: 1 addition & 0 deletions changelog.d/950.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Harden long-term donor-composite support so the 2100 Social Security, taxable payroll, and TOB calibration path validates with nonnegative weights.
122 changes: 122 additions & 0 deletions modal_app/long_term_publishable_probe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import annotations

import os
import subprocess
import sys
from pathlib import Path

import modal

_baked = "/root/policyengine-us-data"
_local = str(Path(__file__).resolve().parent.parent)
for _p in (_baked, _local):
if _p not in sys.path:
sys.path.insert(0, _p)

from modal_app.images import cpu_image as base_image # noqa: E402

app = modal.App("policyengine-us-data-long-term-probe")

hf_secret = modal.Secret.from_name("huggingface-token")

image = base_image

_LONG_TERM_DIR = (
"/root/policyengine-us-data/policyengine_us_data/datasets/cps/long_term"
)
_VENV_PYTHON = "/root/policyengine-us-data/.venv/bin/python"


def _run_long_term_json_command(script_name: str, *args: str) -> str:
command = [_VENV_PYTHON, f"{_LONG_TERM_DIR}/{script_name}", *args]
result = subprocess.run(
command,
check=True,
capture_output=True,
text=True,
env={
**os.environ,
"PYTHONPATH": _LONG_TERM_DIR,
},
)
return result.stdout


@app.function(
image=image,
timeout=60 * 60,
cpu=8,
memory=65536,
secrets=[hf_secret],
)
def assess_publishable_probe_json(
*,
years_csv: str = "2075",
profile: str = "ss-payroll-tob",
target_source: str = "trustees_2025_current_law",
base_dataset_path: str = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
) -> str:
return _run_long_term_json_command(
"assess_publishable_horizon.py",
"--profile",
profile,
"--target-source",
target_source,
"--years",
years_csv,
"--base-dataset",
base_dataset_path,
)


@app.function(
image=image,
timeout=60 * 60,
cpu=8,
memory=65536,
secrets=[hf_secret],
)
def assess_augmented_publishable_probe_json(
*,
years_csv: str = "2075",
profile: str = "ss-payroll-tob",
target_source: str = "trustees_2025_current_law",
support_augmentation_profile: str = "late-clone-v1",
base_dataset_path: str = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
) -> str:
return _run_long_term_json_command(
"assess_augmented_publishability.py",
"--profile",
profile,
"--target-source",
target_source,
"--years",
years_csv,
"--base-dataset",
base_dataset_path,
"--support-augmentation",
support_augmentation_profile,
)


@app.local_entrypoint()
def main(
years: str = "2075",
profile: str = "ss-payroll-tob",
target_source: str = "trustees_2025_current_law",
support_augmentation_profile: str = "",
) -> None:
if support_augmentation_profile:
payload = assess_augmented_publishable_probe_json.remote(
years_csv=years,
profile=profile,
target_source=target_source,
support_augmentation_profile=support_augmentation_profile,
)
else:
payload = assess_publishable_probe_json.remote(
years_csv=years,
profile=profile,
target_source=target_source,
)
print(payload)
14 changes: 10 additions & 4 deletions policyengine_us_data/datasets/cps/long_term/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ Run projections using `run_household_projection.py`:
python run_household_projection.py 2100 --profile ss-payroll-tob --target-source trustees_2025_current_law --save-h5

# Experimental: role-based donor composites assembled into late-year support
python run_household_projection.py 2075 2100 --profile ss-payroll-tob --target-source trustees_2025_current_law --support-augmentation-profile donor-backed-composite-v1 --support-augmentation-target-year 2100 --support-augmentation-blueprint-base-weight-scale 5.0
python run_household_projection.py 2075 2100 --profile ss-payroll-tob --target-source trustees_2025_current_law --support-augmentation-profile donor-backed-composite-v1 --support-augmentation-target-year 2100 --support-augmentation-blueprint-base-weight-scale 5.0 --support-augmentation-sanitize-clone-non-target-income

# Experimental: target-year blueprint calibration over donor-composite support
python run_household_projection.py 2100 2100 --profile ss-payroll-tob --target-source trustees_2025_current_law --support-augmentation-profile donor-backed-composite-v1 --support-augmentation-target-year 2100 --support-augmentation-blueprint-base-weight-scale 5.0 --save-h5
python run_household_projection.py 2100 2100 --profile ss-payroll-tob --target-source trustees_2025_current_law --support-augmentation-profile donor-backed-composite-v1 --support-augmentation-target-year 2100 --support-augmentation-blueprint-base-weight-scale 5.0 --support-augmentation-sanitize-clone-non-target-income --save-h5

# IPF with only age distribution constraints (faster, less accurate)
python run_household_projection.py 2050 --profile age-only
Expand All @@ -35,7 +35,11 @@ python run_long_term_production.py \
--jobs 4 \
--output-dir ./projected_datasets_production \
--profile ss-payroll-tob \
--target-source trustees_2025_current_law
--target-source trustees_2025_current_law \
--support-augmentation-profile donor-backed-composite-v1 \
--support-augmentation-target-year 2100 \
--support-augmentation-blueprint-base-weight-scale 5.0 \
--support-augmentation-sanitize-clone-non-target-income
```

**Arguments:**
Expand All @@ -53,6 +57,8 @@ python run_long_term_production.py \
- `--support-augmentation-max-distance`: Maximum donor-match distance retained for cloning (default `3.0`).
- `--support-augmentation-clone-weight-scale`: Baseline weight multiplier applied to each donor-backed clone (default `0.1`).
- `--support-augmentation-blueprint-base-weight-scale`: When donor-composite augmentation is active at its target year, scales the original household priors before replacing clone priors with synthetic blueprint shares (default `5.0`).
- `--support-augmentation-sanitize-worker-non-target-income`: For `donor-backed-composite-v1`, zero investment and retirement income on worker-sourced donor clone rows.
- `--support-augmentation-sanitize-clone-non-target-income`: For `donor-backed-composite-v1`, zero investment and retirement income on all donor clone rows.
- `--greg`: Use GREG calibration instead of IPF
- `--use-ss`: Include Social Security benefit totals as calibration target (requires `--greg`)
- `--use-payroll`: Include taxable payroll totals as calibration target (requires `--greg`)
Expand Down Expand Up @@ -141,7 +147,7 @@ python run_long_term_production.py \
- Current status:
- Fixing the long-run payroll-cap bug in `policyengine-us` changed the picture materially. With the correct SSA wage base extended through `2100`, the donor-composite synthetic support is exact-feasible and dense at the archetype level.
- The runner now supports a target-year calibration blueprint for donor-composite augmentation. At the augmentation target year, it applies target age composition to clone rows, uses realized PolicyEngine Social Security and payroll values for the same rows, and keeps synthetic prior shares for clone households.
- In the current `2075` OACT probe, that blueprint path hits actual Social Security, taxable payroll, and TOB targets exactly with nonnegative weights when `--support-augmentation-blueprint-base-weight-scale 5.0`.
- In the current `2100` Trustees current-law probe, that blueprint path hits actual Social Security, taxable payroll, and TOB targets exactly with nonnegative weights when `--support-augmentation-blueprint-base-weight-scale 5.0 --support-augmentation-sanitize-clone-non-target-income`.
- The runner now also has a dynamic mode, `--support-augmentation-align-to-run-year`, that rebuilds donor-composite support for each run year and writes per-year augmentation reports.
- This is still experimental. The blueprint path is now structurally capable of handling year-specific support, but the full `2075-2100` production sweep still needs runtime tuning and caching work.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

import argparse
import json

from assess_publishable_horizon import assess_years
from support_augmentation import build_augmented_dataset


DEFAULT_BASE_DATASET_PATH = (
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"
)


def parse_years(raw: str) -> list[int]:
years = [int(value.strip()) for value in raw.split(",") if value.strip()]
if not years:
raise ValueError("At least one year must be provided.")
return sorted(set(years))


def main() -> None:
parser = argparse.ArgumentParser(
description=(
"Assess publishability for an augmented late-tail dataset under the "
"standard long-run contract."
)
)
parser.add_argument(
"--years",
default="2075",
help="Comma-separated years to assess.",
)
parser.add_argument(
"--profile",
default="ss-payroll-tob",
help="Named calibration profile to assess.",
)
parser.add_argument(
"--target-source",
default="trustees_2025_current_law",
help="Named long-run target source package.",
)
parser.add_argument(
"--support-augmentation",
required=True,
help="Support augmentation profile name.",
)
parser.add_argument(
"--base-dataset",
default=DEFAULT_BASE_DATASET_PATH,
help="Base microsimulation dataset path.",
)
args = parser.parse_args()

augmented_dataset, augmentation_report = build_augmented_dataset(
base_dataset=args.base_dataset,
base_year=2024,
profile=args.support_augmentation,
)

rows = assess_years(
years=parse_years(args.years),
profile_name=args.profile,
target_source=args.target_source,
base_dataset_path=augmented_dataset,
)

payload = {
"years": parse_years(args.years),
"profile": args.profile,
"target_source": args.target_source,
"support_augmentation_profile": args.support_augmentation,
"augmentation_report": augmentation_report,
"rows": rows,
}
print(json.dumps(payload, indent=2, sort_keys=True))


if __name__ == "__main__":
main()
Loading