Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.1.8] - 2026-01-25

### Added
- **`/push-pr-update` skill** for committing and pushing PR revisions
- Commits local changes to current branch and pushes to remote
- Triggers AI code review automatically
- Robust handling for fork repos, unpushed commits, and upstream tracking

### Fixed
- **TROP estimator methodology alignment** (PR #110)
- Aligned with paper methodology (Equation 5, D matrix semantics)
- NaN propagation and LOOCV warnings improvements
- Rust backend test alignment with new loocv_grid_search return signature
- LOOCV cycling, D matrix validation fixes
- Final estimation infinity handling and edge case fixes
- Absorbing-state gap detection and n_post_periods fix

### Changed
- **`/submit-pr` skill improvements** (PR #111)
- Case-insensitive secret scanning with POSIX ERE regex
- Verify origin ref exists before push
- Dynamic default branch detection with fallback
- Robust handling for unpushed commits, fork repos
- Files count display in PR summary

## [2.1.7] - 2026-01-25

### Fixed
Expand Down Expand Up @@ -542,6 +567,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `to_dict()` and `to_dataframe()` export methods
- `is_significant` and `significance_stars` properties

[2.1.8]: https://github.com/igerber/diff-diff/compare/v2.1.7...v2.1.8
[2.1.7]: https://github.com/igerber/diff-diff/compare/v2.1.6...v2.1.7
[2.1.6]: https://github.com/igerber/diff-diff/compare/v2.1.5...v2.1.6
[2.1.5]: https://github.com/igerber/diff-diff/compare/v2.1.4...v2.1.5
Expand Down
2 changes: 1 addition & 1 deletion diff_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
load_mpdta,
)

__version__ = "2.1.7"
__version__ = "2.1.8"
__all__ = [
# Estimators
"DifferenceInDifferences",
Expand Down
238 changes: 231 additions & 7 deletions docs/tutorials/10_trop.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,33 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Fit TROP with automatic tuning via LOOCV\ntrop_est = TROP(\n lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n n_bootstrap=50, # Reduced bootstrap replications for SE\n seed=42\n)\n\n# Note: TROP infers treatment periods from the treatment indicator column.\n# The 'treated' column should be an absorbing state (D=1 for all periods\n# during and after treatment starts).\n\n# For SDID comparison later, we keep post_periods for SDID\npost_periods = list(range(n_pre, n_pre + n_post))\n\nresults = trop_est.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n)\n\nprint(results.summary())"
"source": [
"# Fit TROP with automatic tuning via LOOCV\n",
"trop_est = TROP(\n",
" lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n",
" lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n",
" lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n",
" n_bootstrap=50, # Reduced bootstrap replications for SE\n",
" seed=42\n",
")\n",
"\n",
"# Note: TROP infers treatment periods from the treatment indicator column.\n",
"# The 'treated' column should be an absorbing state (D=1 for all periods\n",
"# during and after treatment starts).\n",
"\n",
"# For SDID comparison later, we keep post_periods for SDID\n",
"post_periods = list(range(n_pre, n_pre + n_post))\n",
"\n",
"results = trop_est.fit(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treated',\n",
" unit='unit',\n",
" time='period'\n",
")\n",
"\n",
"print(results.summary())"
]
},
{
"cell_type": "code",
Expand Down Expand Up @@ -217,7 +243,33 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Effect of different nuclear norm regularization levels\nprint(\"Effect of nuclear norm regularization (λ_nn):\")\nprint(\"=\"*65)\nprint(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\nprint(\"-\"*65)\n\nfor lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n trop_fixed = TROP(\n lambda_time_grid=[1.0], # Fixed\n lambda_unit_grid=[1.0], # Fixed\n lambda_nn_grid=[lambda_nn], # Vary this\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n )\n \n res = trop_fixed.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n bias = res.att - true_att\n print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
"source": [
"# Effect of different nuclear norm regularization levels\n",
"print(\"Effect of nuclear norm regularization (λ_nn):\")\n",
"print(\"=\"*65)\n",
"print(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\n",
"print(\"-\"*65)\n",
"\n",
"for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n",
" trop_fixed = TROP(\n",
" lambda_time_grid=[1.0], # Fixed\n",
" lambda_unit_grid=[1.0], # Fixed\n",
" lambda_nn_grid=[lambda_nn], # Vary this\n",
" n_bootstrap=20, # Reduced for faster execution\n",
" seed=42\n",
" )\n",
" \n",
" res = trop_fixed.fit(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treated',\n",
" unit='unit',\n",
" time='period'\n",
" )\n",
" \n",
" bias = res.att - true_att\n",
" print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -353,7 +405,56 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# SDID (no factor adjustment)\n# Note: SDID uses 'treat' (unit-level ever-treated indicator)\nsdid = SyntheticDiD(\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\n# SDID still uses post_periods parameter\nsdid_results = sdid.fit(\n df,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=post_periods\n)\n\n# TROP (with factor adjustment)\n# Note: TROP uses 'treated' (observation-level treatment indicator)\n# and infers treatment periods automatically\ntrop_est2 = TROP(\n lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\ntrop_results = trop_est2.fit(\n df,\n outcome='outcome',\n treatment='treated', # Observation-level indicator\n unit='unit',\n time='period'\n)\n\nprint(\"Comparison: SDID vs TROP\")\nprint(\"=\"*60)\nprint(f\"True ATT: {true_att:.4f}\")\nprint()\nprint(f\"Synthetic DiD (no factor adjustment):\")\nprint(f\" ATT: {sdid_results.att:.4f}\")\nprint(f\" SE: {sdid_results.se:.4f}\")\nprint(f\" Bias: {sdid_results.att - true_att:.4f}\")\nprint()\nprint(f\"TROP (with factor adjustment):\")\nprint(f\" ATT: {trop_results.att:.4f}\")\nprint(f\" SE: {trop_results.se:.4f}\")\nprint(f\" Bias: {trop_results.att - true_att:.4f}\")\nprint(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
"source": [
"# SDID (no factor adjustment)\n",
"# Note: SDID uses 'treat' (unit-level ever-treated indicator)\n",
"sdid = SyntheticDiD(\n",
" n_bootstrap=50, # Reduced for faster execution\n",
" seed=42\n",
")\n",
"\n",
"# SDID still uses post_periods parameter\n",
"sdid_results = sdid.fit(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treat', # Unit-level ever-treated indicator\n",
" unit='unit',\n",
" time='period',\n",
" post_periods=post_periods\n",
")\n",
"\n",
"# TROP (with factor adjustment)\n",
"# Note: TROP uses 'treated' (observation-level treatment indicator)\n",
"# and infers treatment periods automatically\n",
"trop_est2 = TROP(\n",
" lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n",
" n_bootstrap=50, # Reduced for faster execution\n",
" seed=42\n",
")\n",
"\n",
"trop_results = trop_est2.fit(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treated', # Observation-level indicator\n",
" unit='unit',\n",
" time='period'\n",
")\n",
"\n",
"print(\"Comparison: SDID vs TROP\")\n",
"print(\"=\"*60)\n",
"print(f\"True ATT: {true_att:.4f}\")\n",
"print()\n",
"print(f\"Synthetic DiD (no factor adjustment):\")\n",
"print(f\" ATT: {sdid_results.att:.4f}\")\n",
"print(f\" SE: {sdid_results.se:.4f}\")\n",
"print(f\" Bias: {sdid_results.att - true_att:.4f}\")\n",
"print()\n",
"print(f\"TROP (with factor adjustment):\")\n",
"print(f\" ATT: {trop_results.att:.4f}\")\n",
"print(f\" SE: {trop_results.se:.4f}\")\n",
"print(f\" Bias: {trop_results.att - true_att:.4f}\")\n",
"print(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
]
},
{
"cell_type": "markdown",
Expand All @@ -369,7 +470,83 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Monte Carlo comparison (reduced for faster tutorial execution)\nn_sims = 5 # Reduced from 20 for faster validation\ntrop_estimates = []\nsdid_estimates = []\n\nprint(f\"Running {n_sims} simulations...\")\n\nfor sim in range(n_sims):\n # Generate new data using the library function\n # (includes both 'treated' and 'treat' columns)\n sim_data = generate_factor_data(\n n_units=50,\n n_pre=10,\n n_post=5,\n n_treated=10,\n n_factors=2,\n treatment_effect=2.0,\n factor_strength=1.5,\n noise_sd=0.5,\n seed=100 + sim\n )\n \n # TROP (uses observation-level 'treated')\n # Note: TROP infers treatment periods from the treatment indicator\n try:\n trop_m = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0],\n lambda_nn_grid=[0.1],\n n_bootstrap=10, \n seed=42 + sim\n )\n trop_res = trop_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n trop_estimates.append(trop_res.att)\n except Exception as e:\n print(f\"TROP failed on sim {sim}: {e}\")\n \n # SDID (uses unit-level 'treat')\n # Note: SDID still uses post_periods parameter\n try:\n sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n sdid_res = sdid_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=list(range(10, 15))\n )\n sdid_estimates.append(sdid_res.att)\n except Exception as e:\n print(f\"SDID failed on sim {sim}: {e}\")\n\nprint(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\nprint(\"=\"*60)\nprint(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\nprint(\"-\"*60)\n\nif trop_estimates:\n trop_mean = np.mean(trop_estimates)\n trop_bias = trop_mean - true_att\n trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n\nif sdid_estimates:\n sdid_mean = np.mean(sdid_estimates)\n sdid_bias = sdid_mean - true_att\n sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
"source": [
"# Monte Carlo comparison (reduced for faster tutorial execution)\n",
"n_sims = 5 # Reduced from 20 for faster validation\n",
"trop_estimates = []\n",
"sdid_estimates = []\n",
"\n",
"print(f\"Running {n_sims} simulations...\")\n",
"\n",
"for sim in range(n_sims):\n",
" # Generate new data using the library function\n",
" # (includes both 'treated' and 'treat' columns)\n",
" sim_data = generate_factor_data(\n",
" n_units=50,\n",
" n_pre=10,\n",
" n_post=5,\n",
" n_treated=10,\n",
" n_factors=2,\n",
" treatment_effect=2.0,\n",
" factor_strength=1.5,\n",
" noise_sd=0.5,\n",
" seed=100 + sim\n",
" )\n",
" \n",
" # TROP (uses observation-level 'treated')\n",
" # Note: TROP infers treatment periods from the treatment indicator\n",
" try:\n",
" trop_m = TROP(\n",
" lambda_time_grid=[1.0],\n",
" lambda_unit_grid=[1.0],\n",
" lambda_nn_grid=[0.1],\n",
" n_bootstrap=10, \n",
" seed=42 + sim\n",
" )\n",
" trop_res = trop_m.fit(\n",
" sim_data,\n",
" outcome='outcome',\n",
" treatment='treated',\n",
" unit='unit',\n",
" time='period'\n",
" )\n",
" trop_estimates.append(trop_res.att)\n",
" except Exception as e:\n",
" print(f\"TROP failed on sim {sim}: {e}\")\n",
" \n",
" # SDID (uses unit-level 'treat')\n",
" # Note: SDID still uses post_periods parameter\n",
" try:\n",
" sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n",
" sdid_res = sdid_m.fit(\n",
" sim_data,\n",
" outcome='outcome',\n",
" treatment='treat', # Unit-level ever-treated indicator\n",
" unit='unit',\n",
" time='period',\n",
" post_periods=list(range(10, 15))\n",
" )\n",
" sdid_estimates.append(sdid_res.att)\n",
" except Exception as e:\n",
" print(f\"SDID failed on sim {sim}: {e}\")\n",
"\n",
"print(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\n",
"print(\"=\"*60)\n",
"print(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\n",
"print(\"-\"*60)\n",
"\n",
"if trop_estimates:\n",
" trop_mean = np.mean(trop_estimates)\n",
" trop_bias = trop_mean - true_att\n",
" trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n",
" print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n",
"\n",
"if sdid_estimates:\n",
" sdid_mean = np.mean(sdid_estimates)\n",
" sdid_bias = sdid_mean - true_att\n",
" sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n",
" print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
]
},
{
"cell_type": "code",
Expand Down Expand Up @@ -407,7 +584,27 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# One-liner estimation with default tuning grid\n# Note: TROP infers treatment periods from the treatment indicator\nquick_results = trop(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period',\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n)\n\nprint(f\"Quick estimation:\")\nprint(f\" ATT: {quick_results.att:.4f}\")\nprint(f\" SE: {quick_results.se:.4f}\")\nprint(f\" λ_time: {quick_results.lambda_time:.2f}\")\nprint(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\nprint(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\nprint(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
"source": [
"# One-liner estimation with default tuning grid\n",
"# Note: TROP infers treatment periods from the treatment indicator\n",
"quick_results = trop(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treated',\n",
" unit='unit',\n",
" time='period',\n",
" n_bootstrap=20, # Reduced for faster execution\n",
" seed=42\n",
")\n",
"\n",
"print(f\"Quick estimation:\")\n",
"print(f\" ATT: {quick_results.att:.4f}\")\n",
"print(f\" SE: {quick_results.se:.4f}\")\n",
"print(f\" λ_time: {quick_results.lambda_time:.2f}\")\n",
"print(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\n",
"print(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\n",
"print(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
]
},
{
"cell_type": "markdown",
Expand All @@ -425,7 +622,34 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Compare variance estimation methods\nprint(\"Variance estimation comparison:\")\nprint(\"=\"*50)\n\nfor method in ['bootstrap', 'jackknife']:\n trop_var = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0], \n lambda_nn_grid=[0.1],\n variance_method=method,\n n_bootstrap=30, # Reduced for faster execution\n seed=42\n )\n \n res = trop_var.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n print(f\"\\n{method.capitalize()}:\")\n print(f\" ATT: {res.att:.4f}\")\n print(f\" SE: {res.se:.4f}\")\n print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
"source": [
"# Compare variance estimation methods\n",
"print(\"Variance estimation comparison:\")\n",
"print(\"=\"*50)\n",
"\n",
"for method in ['bootstrap', 'jackknife']:\n",
" trop_var = TROP(\n",
" lambda_time_grid=[1.0],\n",
" lambda_unit_grid=[1.0], \n",
" lambda_nn_grid=[0.1],\n",
" variance_method=method,\n",
" n_bootstrap=30, # Reduced for faster execution\n",
" seed=42\n",
" )\n",
" \n",
" res = trop_var.fit(\n",
" df,\n",
" outcome='outcome',\n",
" treatment='treated',\n",
" unit='unit',\n",
" time='period'\n",
" )\n",
" \n",
" print(f\"\\n{method.capitalize()}:\")\n",
" print(f\" ATT: {res.att:.4f}\")\n",
" print(f\" SE: {res.se:.4f}\")\n",
" print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -508,4 +732,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "diff-diff"
version = "2.1.7"
version = "2.1.8"
description = "A library for Difference-in-Differences causal inference analysis"
readme = "README.md"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "diff_diff_rust"
version = "2.1.7"
version = "2.1.8"
edition = "2021"
description = "Rust backend for diff-diff DiD library"
license = "MIT"
Expand Down