Skip to content

Commit addd96f

Browse files
authored
Merge pull request #112 from igerber/release/v2.1.8
Release v2.1.8
2 parents 7c99ed1 + 05cade7 commit addd96f

File tree

5 files changed

+260
-10
lines changed

5 files changed

+260
-10
lines changed

CHANGELOG.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [2.1.8] - 2026-01-25
9+
10+
### Added
11+
- **`/push-pr-update` skill** for committing and pushing PR revisions
12+
- Commits local changes to current branch and pushes to remote
13+
- Triggers AI code review automatically
14+
- Robust handling for fork repos, unpushed commits, and upstream tracking
15+
16+
### Fixed
17+
- **TROP estimator methodology alignment** (PR #110)
18+
- Aligned with paper methodology (Equation 5, D matrix semantics)
19+
- NaN propagation and LOOCV warnings improvements
20+
- Rust backend test alignment with new loocv_grid_search return signature
21+
- LOOCV cycling, D matrix validation fixes
22+
- Final estimation infinity handling and edge case fixes
23+
- Absorbing-state gap detection and n_post_periods fix
24+
25+
### Changed
26+
- **`/submit-pr` skill improvements** (PR #111)
27+
- Case-insensitive secret scanning with POSIX ERE regex
28+
- Verify origin ref exists before push
29+
- Dynamic default branch detection with fallback
30+
- Robust handling for unpushed commits, fork repos
31+
- Files count display in PR summary
32+
833
## [2.1.7] - 2026-01-25
934

1035
### Fixed
@@ -542,6 +567,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
542567
- `to_dict()` and `to_dataframe()` export methods
543568
- `is_significant` and `significance_stars` properties
544569

570+
[2.1.8]: https://github.com/igerber/diff-diff/compare/v2.1.7...v2.1.8
545571
[2.1.7]: https://github.com/igerber/diff-diff/compare/v2.1.6...v2.1.7
546572
[2.1.6]: https://github.com/igerber/diff-diff/compare/v2.1.5...v2.1.6
547573
[2.1.5]: https://github.com/igerber/diff-diff/compare/v2.1.4...v2.1.5

diff_diff/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@
136136
load_mpdta,
137137
)
138138

139-
__version__ = "2.1.7"
139+
__version__ = "2.1.8"
140140
__all__ = [
141141
# Estimators
142142
"DifferenceInDifferences",

docs/tutorials/10_trop.ipynb

Lines changed: 231 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,33 @@
161161
"execution_count": null,
162162
"metadata": {},
163163
"outputs": [],
164-
"source": "# Fit TROP with automatic tuning via LOOCV\ntrop_est = TROP(\n lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n n_bootstrap=50, # Reduced bootstrap replications for SE\n seed=42\n)\n\n# Note: TROP infers treatment periods from the treatment indicator column.\n# The 'treated' column should be an absorbing state (D=1 for all periods\n# during and after treatment starts).\n\n# For SDID comparison later, we keep post_periods for SDID\npost_periods = list(range(n_pre, n_pre + n_post))\n\nresults = trop_est.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n)\n\nprint(results.summary())"
164+
"source": [
165+
"# Fit TROP with automatic tuning via LOOCV\n",
166+
"trop_est = TROP(\n",
167+
" lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n",
168+
" lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n",
169+
" lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n",
170+
" n_bootstrap=50, # Reduced bootstrap replications for SE\n",
171+
" seed=42\n",
172+
")\n",
173+
"\n",
174+
"# Note: TROP infers treatment periods from the treatment indicator column.\n",
175+
"# The 'treated' column should be an absorbing state (D=1 for all periods\n",
176+
"# during and after treatment starts).\n",
177+
"\n",
178+
"# For SDID comparison later, we keep post_periods for SDID\n",
179+
"post_periods = list(range(n_pre, n_pre + n_post))\n",
180+
"\n",
181+
"results = trop_est.fit(\n",
182+
" df,\n",
183+
" outcome='outcome',\n",
184+
" treatment='treated',\n",
185+
" unit='unit',\n",
186+
" time='period'\n",
187+
")\n",
188+
"\n",
189+
"print(results.summary())"
190+
]
165191
},
166192
{
167193
"cell_type": "code",
@@ -217,7 +243,33 @@
217243
"execution_count": null,
218244
"metadata": {},
219245
"outputs": [],
220-
"source": "# Effect of different nuclear norm regularization levels\nprint(\"Effect of nuclear norm regularization (λ_nn):\")\nprint(\"=\"*65)\nprint(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\nprint(\"-\"*65)\n\nfor lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n trop_fixed = TROP(\n lambda_time_grid=[1.0], # Fixed\n lambda_unit_grid=[1.0], # Fixed\n lambda_nn_grid=[lambda_nn], # Vary this\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n )\n \n res = trop_fixed.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n bias = res.att - true_att\n print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
246+
"source": [
247+
"# Effect of different nuclear norm regularization levels\n",
248+
"print(\"Effect of nuclear norm regularization (λ_nn):\")\n",
249+
"print(\"=\"*65)\n",
250+
"print(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\n",
251+
"print(\"-\"*65)\n",
252+
"\n",
253+
"for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n",
254+
" trop_fixed = TROP(\n",
255+
" lambda_time_grid=[1.0], # Fixed\n",
256+
" lambda_unit_grid=[1.0], # Fixed\n",
257+
" lambda_nn_grid=[lambda_nn], # Vary this\n",
258+
" n_bootstrap=20, # Reduced for faster execution\n",
259+
" seed=42\n",
260+
" )\n",
261+
" \n",
262+
" res = trop_fixed.fit(\n",
263+
" df,\n",
264+
" outcome='outcome',\n",
265+
" treatment='treated',\n",
266+
" unit='unit',\n",
267+
" time='period'\n",
268+
" )\n",
269+
" \n",
270+
" bias = res.att - true_att\n",
271+
" print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
272+
]
221273
},
222274
{
223275
"cell_type": "markdown",
@@ -353,7 +405,56 @@
353405
"execution_count": null,
354406
"metadata": {},
355407
"outputs": [],
356-
"source": "# SDID (no factor adjustment)\n# Note: SDID uses 'treat' (unit-level ever-treated indicator)\nsdid = SyntheticDiD(\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\n# SDID still uses post_periods parameter\nsdid_results = sdid.fit(\n df,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=post_periods\n)\n\n# TROP (with factor adjustment)\n# Note: TROP uses 'treated' (observation-level treatment indicator)\n# and infers treatment periods automatically\ntrop_est2 = TROP(\n lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\ntrop_results = trop_est2.fit(\n df,\n outcome='outcome',\n treatment='treated', # Observation-level indicator\n unit='unit',\n time='period'\n)\n\nprint(\"Comparison: SDID vs TROP\")\nprint(\"=\"*60)\nprint(f\"True ATT: {true_att:.4f}\")\nprint()\nprint(f\"Synthetic DiD (no factor adjustment):\")\nprint(f\" ATT: {sdid_results.att:.4f}\")\nprint(f\" SE: {sdid_results.se:.4f}\")\nprint(f\" Bias: {sdid_results.att - true_att:.4f}\")\nprint()\nprint(f\"TROP (with factor adjustment):\")\nprint(f\" ATT: {trop_results.att:.4f}\")\nprint(f\" SE: {trop_results.se:.4f}\")\nprint(f\" Bias: {trop_results.att - true_att:.4f}\")\nprint(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
408+
"source": [
409+
"# SDID (no factor adjustment)\n",
410+
"# Note: SDID uses 'treat' (unit-level ever-treated indicator)\n",
411+
"sdid = SyntheticDiD(\n",
412+
" n_bootstrap=50, # Reduced for faster execution\n",
413+
" seed=42\n",
414+
")\n",
415+
"\n",
416+
"# SDID still uses post_periods parameter\n",
417+
"sdid_results = sdid.fit(\n",
418+
" df,\n",
419+
" outcome='outcome',\n",
420+
" treatment='treat', # Unit-level ever-treated indicator\n",
421+
" unit='unit',\n",
422+
" time='period',\n",
423+
" post_periods=post_periods\n",
424+
")\n",
425+
"\n",
426+
"# TROP (with factor adjustment)\n",
427+
"# Note: TROP uses 'treated' (observation-level treatment indicator)\n",
428+
"# and infers treatment periods automatically\n",
429+
"trop_est2 = TROP(\n",
430+
" lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n",
431+
" n_bootstrap=50, # Reduced for faster execution\n",
432+
" seed=42\n",
433+
")\n",
434+
"\n",
435+
"trop_results = trop_est2.fit(\n",
436+
" df,\n",
437+
" outcome='outcome',\n",
438+
" treatment='treated', # Observation-level indicator\n",
439+
" unit='unit',\n",
440+
" time='period'\n",
441+
")\n",
442+
"\n",
443+
"print(\"Comparison: SDID vs TROP\")\n",
444+
"print(\"=\"*60)\n",
445+
"print(f\"True ATT: {true_att:.4f}\")\n",
446+
"print()\n",
447+
"print(f\"Synthetic DiD (no factor adjustment):\")\n",
448+
"print(f\" ATT: {sdid_results.att:.4f}\")\n",
449+
"print(f\" SE: {sdid_results.se:.4f}\")\n",
450+
"print(f\" Bias: {sdid_results.att - true_att:.4f}\")\n",
451+
"print()\n",
452+
"print(f\"TROP (with factor adjustment):\")\n",
453+
"print(f\" ATT: {trop_results.att:.4f}\")\n",
454+
"print(f\" SE: {trop_results.se:.4f}\")\n",
455+
"print(f\" Bias: {trop_results.att - true_att:.4f}\")\n",
456+
"print(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
457+
]
357458
},
358459
{
359460
"cell_type": "markdown",
@@ -369,7 +470,83 @@
369470
"execution_count": null,
370471
"metadata": {},
371472
"outputs": [],
372-
"source": "# Monte Carlo comparison (reduced for faster tutorial execution)\nn_sims = 5 # Reduced from 20 for faster validation\ntrop_estimates = []\nsdid_estimates = []\n\nprint(f\"Running {n_sims} simulations...\")\n\nfor sim in range(n_sims):\n # Generate new data using the library function\n # (includes both 'treated' and 'treat' columns)\n sim_data = generate_factor_data(\n n_units=50,\n n_pre=10,\n n_post=5,\n n_treated=10,\n n_factors=2,\n treatment_effect=2.0,\n factor_strength=1.5,\n noise_sd=0.5,\n seed=100 + sim\n )\n \n # TROP (uses observation-level 'treated')\n # Note: TROP infers treatment periods from the treatment indicator\n try:\n trop_m = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0],\n lambda_nn_grid=[0.1],\n n_bootstrap=10, \n seed=42 + sim\n )\n trop_res = trop_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n trop_estimates.append(trop_res.att)\n except Exception as e:\n print(f\"TROP failed on sim {sim}: {e}\")\n \n # SDID (uses unit-level 'treat')\n # Note: SDID still uses post_periods parameter\n try:\n sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n sdid_res = sdid_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=list(range(10, 15))\n )\n sdid_estimates.append(sdid_res.att)\n except Exception as e:\n print(f\"SDID failed on sim {sim}: {e}\")\n\nprint(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\nprint(\"=\"*60)\nprint(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\nprint(\"-\"*60)\n\nif trop_estimates:\n trop_mean = np.mean(trop_estimates)\n trop_bias = trop_mean - true_att\n trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n\nif sdid_estimates:\n sdid_mean = np.mean(sdid_estimates)\n sdid_bias = sdid_mean - true_att\n sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
473+
"source": [
474+
"# Monte Carlo comparison (reduced for faster tutorial execution)\n",
475+
"n_sims = 5 # Reduced from 20 for faster validation\n",
476+
"trop_estimates = []\n",
477+
"sdid_estimates = []\n",
478+
"\n",
479+
"print(f\"Running {n_sims} simulations...\")\n",
480+
"\n",
481+
"for sim in range(n_sims):\n",
482+
" # Generate new data using the library function\n",
483+
" # (includes both 'treated' and 'treat' columns)\n",
484+
" sim_data = generate_factor_data(\n",
485+
" n_units=50,\n",
486+
" n_pre=10,\n",
487+
" n_post=5,\n",
488+
" n_treated=10,\n",
489+
" n_factors=2,\n",
490+
" treatment_effect=2.0,\n",
491+
" factor_strength=1.5,\n",
492+
" noise_sd=0.5,\n",
493+
" seed=100 + sim\n",
494+
" )\n",
495+
" \n",
496+
" # TROP (uses observation-level 'treated')\n",
497+
" # Note: TROP infers treatment periods from the treatment indicator\n",
498+
" try:\n",
499+
" trop_m = TROP(\n",
500+
" lambda_time_grid=[1.0],\n",
501+
" lambda_unit_grid=[1.0],\n",
502+
" lambda_nn_grid=[0.1],\n",
503+
" n_bootstrap=10, \n",
504+
" seed=42 + sim\n",
505+
" )\n",
506+
" trop_res = trop_m.fit(\n",
507+
" sim_data,\n",
508+
" outcome='outcome',\n",
509+
" treatment='treated',\n",
510+
" unit='unit',\n",
511+
" time='period'\n",
512+
" )\n",
513+
" trop_estimates.append(trop_res.att)\n",
514+
" except Exception as e:\n",
515+
" print(f\"TROP failed on sim {sim}: {e}\")\n",
516+
" \n",
517+
" # SDID (uses unit-level 'treat')\n",
518+
" # Note: SDID still uses post_periods parameter\n",
519+
" try:\n",
520+
" sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n",
521+
" sdid_res = sdid_m.fit(\n",
522+
" sim_data,\n",
523+
" outcome='outcome',\n",
524+
" treatment='treat', # Unit-level ever-treated indicator\n",
525+
" unit='unit',\n",
526+
" time='period',\n",
527+
" post_periods=list(range(10, 15))\n",
528+
" )\n",
529+
" sdid_estimates.append(sdid_res.att)\n",
530+
" except Exception as e:\n",
531+
" print(f\"SDID failed on sim {sim}: {e}\")\n",
532+
"\n",
533+
"print(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\n",
534+
"print(\"=\"*60)\n",
535+
"print(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\n",
536+
"print(\"-\"*60)\n",
537+
"\n",
538+
"if trop_estimates:\n",
539+
" trop_mean = np.mean(trop_estimates)\n",
540+
" trop_bias = trop_mean - true_att\n",
541+
" trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n",
542+
" print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n",
543+
"\n",
544+
"if sdid_estimates:\n",
545+
" sdid_mean = np.mean(sdid_estimates)\n",
546+
" sdid_bias = sdid_mean - true_att\n",
547+
" sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n",
548+
" print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
549+
]
373550
},
374551
{
375552
"cell_type": "code",
@@ -407,7 +584,27 @@
407584
"execution_count": null,
408585
"metadata": {},
409586
"outputs": [],
410-
"source": "# One-liner estimation with default tuning grid\n# Note: TROP infers treatment periods from the treatment indicator\nquick_results = trop(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period',\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n)\n\nprint(f\"Quick estimation:\")\nprint(f\" ATT: {quick_results.att:.4f}\")\nprint(f\" SE: {quick_results.se:.4f}\")\nprint(f\" λ_time: {quick_results.lambda_time:.2f}\")\nprint(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\nprint(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\nprint(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
587+
"source": [
588+
"# One-liner estimation with default tuning grid\n",
589+
"# Note: TROP infers treatment periods from the treatment indicator\n",
590+
"quick_results = trop(\n",
591+
" df,\n",
592+
" outcome='outcome',\n",
593+
" treatment='treated',\n",
594+
" unit='unit',\n",
595+
" time='period',\n",
596+
" n_bootstrap=20, # Reduced for faster execution\n",
597+
" seed=42\n",
598+
")\n",
599+
"\n",
600+
"print(f\"Quick estimation:\")\n",
601+
"print(f\" ATT: {quick_results.att:.4f}\")\n",
602+
"print(f\" SE: {quick_results.se:.4f}\")\n",
603+
"print(f\" λ_time: {quick_results.lambda_time:.2f}\")\n",
604+
"print(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\n",
605+
"print(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\n",
606+
"print(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
607+
]
411608
},
412609
{
413610
"cell_type": "markdown",
@@ -425,7 +622,34 @@
425622
"execution_count": null,
426623
"metadata": {},
427624
"outputs": [],
428-
"source": "# Compare variance estimation methods\nprint(\"Variance estimation comparison:\")\nprint(\"=\"*50)\n\nfor method in ['bootstrap', 'jackknife']:\n trop_var = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0], \n lambda_nn_grid=[0.1],\n variance_method=method,\n n_bootstrap=30, # Reduced for faster execution\n seed=42\n )\n \n res = trop_var.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n print(f\"\\n{method.capitalize()}:\")\n print(f\" ATT: {res.att:.4f}\")\n print(f\" SE: {res.se:.4f}\")\n print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
625+
"source": [
626+
"# Compare variance estimation methods\n",
627+
"print(\"Variance estimation comparison:\")\n",
628+
"print(\"=\"*50)\n",
629+
"\n",
630+
"for method in ['bootstrap', 'jackknife']:\n",
631+
" trop_var = TROP(\n",
632+
" lambda_time_grid=[1.0],\n",
633+
" lambda_unit_grid=[1.0], \n",
634+
" lambda_nn_grid=[0.1],\n",
635+
" variance_method=method,\n",
636+
" n_bootstrap=30, # Reduced for faster execution\n",
637+
" seed=42\n",
638+
" )\n",
639+
" \n",
640+
" res = trop_var.fit(\n",
641+
" df,\n",
642+
" outcome='outcome',\n",
643+
" treatment='treated',\n",
644+
" unit='unit',\n",
645+
" time='period'\n",
646+
" )\n",
647+
" \n",
648+
" print(f\"\\n{method.capitalize()}:\")\n",
649+
" print(f\" ATT: {res.att:.4f}\")\n",
650+
" print(f\" SE: {res.se:.4f}\")\n",
651+
" print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
652+
]
429653
},
430654
{
431655
"cell_type": "markdown",
@@ -508,4 +732,4 @@
508732
},
509733
"nbformat": 4,
510734
"nbformat_minor": 4
511-
}
735+
}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "diff-diff"
7-
version = "2.1.7"
7+
version = "2.1.8"
88
description = "A library for Difference-in-Differences causal inference analysis"
99
readme = "README.md"
1010
license = "MIT"

rust/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "diff_diff_rust"
3-
version = "2.1.7"
3+
version = "2.1.8"
44
edition = "2021"
55
description = "Rust backend for diff-diff DiD library"
66
license = "MIT"

0 commit comments

Comments
 (0)