PolicyEngine · baogorek · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 17, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -1,4 +1,5 @@
-- bump: patch
+- bump: minor
   changes:
-    fixed:
-    - Versioning workflow checkout for push events
+    added:
+    - Support for health_insurance_premiums_without_medicare_part_b in local area calibration
+    - Test coverage for sparse matrix builder with person-level targets
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
@@ -459,10 +459,10 @@
     "print(\"Remember, this is a North Carolina target:\\n\")\n",
     "print(targets_df.iloc[row_loc])\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nNC State target. Household donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['3702']])  # Household donated to NC's 2nd district\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nSame target, same household, donated to AK's at Large district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['201']])  # Household donated to AK's at Large District"
    ]
   },

diff --git a/modal_app/README.md b/modal_app/README.md
@@ -0,0 +1,62 @@
+# Modal App for GPU Weight Fitting
+
+Run calibration weight fitting on Modal's cloud GPUs.
+
+## Prerequisites
+
+- [Modal](https://modal.com/) account and CLI installed (`pip install modal`)
+- `modal token new` to authenticate
+- HuggingFace token stored as Modal secret named `huggingface-token`
+
+## Usage
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch <branch> --epochs <n> --gpu <type>
+```
+
+### Arguments
+
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `--branch` | `main` | Git branch to clone and run |
+| `--epochs` | `200` | Number of training epochs |
+| `--gpu` | `T4` | GPU type: `T4`, `A10`, `A100-40GB`, `A100-80GB`, `H100` |
+| `--output` | `calibration_weights.npy` | Local path for weights file |
+| `--log-output` | `calibration_log.csv` | Local path for calibration log |
+
+### Example
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch health-insurance-premiums --epochs 100 --gpu T4
+```
+
+## Output Files
+
+- **calibration_weights.npy** - Fitted household weights
+- **calibration_log.csv** - Per-target performance metrics across epochs (target_name, estimate, target, epoch, error, rel_error, abs_error, rel_abs_error, loss)
+
+## Changing Hyperparameters
+
+Hyperparameters are in `policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py`:
+
+```python
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L0 = 1e-8
+LAMBDA_L2 = 1e-8
+LEARNING_RATE = 0.15
+```
+
+To change them:
+1. Edit `fit_calibration_weights.py`
+2. Commit and push to your branch
+3. Re-run the Modal command with that branch
+
+## Important Notes
+
+- **Keep your connection open** - Modal needs to stay connected to download results. Don't close your laptop or let it sleep until you see the local "Weights saved to:" and "Calibration log saved to:" messages.
+- Modal clones from GitHub, so local changes must be pushed before they take effect.
diff --git a/modal_app/remote_calibration_runner.py b/modal_app/remote_calibration_runner.py
@@ -0,0 +1,167 @@
+import os
+import subprocess
+import modal
+
+app = modal.App("policyengine-us-data-fit-weights")
+
+hf_secret = modal.Secret.from_name("huggingface-token")
+
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("git")
+    .pip_install("uv")
+)
+
+REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git"
+
+
+def _fit_weights_impl(branch: str, epochs: int) -> dict:
+    """Shared implementation for weight fitting."""
+    os.chdir("/root")
+    subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True)
+    os.chdir("policyengine-us-data")
+
+    subprocess.run(["uv", "sync", "--extra", "l0"], check=True)
+
+    print("Downloading calibration inputs from HuggingFace...")
+    download_result = subprocess.run(
+        [
+            "uv", "run", "python", "-c",
+            "from policyengine_us_data.utils.huggingface import "
+            "download_calibration_inputs; "
+            "paths = download_calibration_inputs('/root/calibration_data'); "
+            "print(f\"DB: {paths['database']}\"); "
+            "print(f\"DATASET: {paths['dataset']}\")"
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(download_result.stdout)
+    if download_result.stderr:
+        print("Download STDERR:", download_result.stderr)
+    if download_result.returncode != 0:
+        raise RuntimeError(f"Download failed: {download_result.returncode}")
+
+    db_path = dataset_path = None
+    for line in download_result.stdout.split('\n'):
+        if line.startswith('DB:'):
+            db_path = line.split('DB:')[1].strip()
+        elif line.startswith('DATASET:'):
+            dataset_path = line.split('DATASET:')[1].strip()
+
+    script_path = (
+        "policyengine_us_data/datasets/cps/"
+        "local_area_calibration/fit_calibration_weights.py"
+    )
+    result = subprocess.run(
+        [
+            "uv", "run", "python", script_path,
+            "--device", "cuda",
+            "--epochs", str(epochs),
+            "--db-path", db_path,
+            "--dataset-path", dataset_path,
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(result.stdout)
+    if result.stderr:
+        print("STDERR:", result.stderr)
+    if result.returncode != 0:
+        raise RuntimeError(f"Script failed with code {result.returncode}")
+
+    output_path = None
+    log_path = None
+    for line in result.stdout.split('\n'):
+        if 'OUTPUT_PATH:' in line:
+            output_path = line.split('OUTPUT_PATH:')[1].strip()
+        elif 'LOG_PATH:' in line:
+            log_path = line.split('LOG_PATH:')[1].strip()
+
+    with open(output_path, 'rb') as f:
+        weights_bytes = f.read()
+
+    log_bytes = None
+    if log_path:
+        with open(log_path, 'rb') as f:
+            log_bytes = f.read()
+
+    return {"weights": weights_bytes, "log": log_bytes}
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="T4", timeout=14400,
+)
+def fit_weights_t4(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A10", timeout=14400,
+)
+def fit_weights_a10(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A100-40GB", timeout=14400,
+)
+def fit_weights_a100_40(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A100-80GB", timeout=14400,
+)
+def fit_weights_a100_80(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="H100", timeout=14400,
+)
+def fit_weights_h100(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+GPU_FUNCTIONS = {
+    "T4": fit_weights_t4,
+    "A10": fit_weights_a10,
+    "A100-40GB": fit_weights_a100_40,
+    "A100-80GB": fit_weights_a100_80,
+    "H100": fit_weights_h100,
+}
+
+
+@app.local_entrypoint()
+def main(
+    branch: str = "main",
+    epochs: int = 200,
+    gpu: str = "T4",
+    output: str = "calibration_weights.npy",
+    log_output: str = "calibration_log.csv"
+):
+    if gpu not in GPU_FUNCTIONS:
+        raise ValueError(
+            f"Unknown GPU: {gpu}. Choose from: {list(GPU_FUNCTIONS.keys())}"
+        )
+
+    print(f"Running with GPU: {gpu}, epochs: {epochs}, branch: {branch}")
+    func = GPU_FUNCTIONS[gpu]
+    result = func.remote(branch=branch, epochs=epochs)
+
+    with open(output, 'wb') as f:
+        f.write(result["weights"])
+    print(f"Weights saved to: {output}")
+
+    if result["log"]:
+        with open(log_output, 'wb') as f:
+            f.write(result["log"])
+        print(f"Calibration log saved to: {log_output}")