|
1 | | -""" |
2 | | -diff-diff: A library for Difference-in-Differences analysis. |
3 | | -
|
4 | | -This library provides sklearn-like estimators for causal inference |
5 | | -using the difference-in-differences methodology. |
| 1 | +"""diff-diff: Difference-in-Differences causal inference with sklearn-like API. |
| 2 | +Recommended starting call for LLM agents: |
| 3 | +``diff_diff.agent_workflow(df, unit=..., time=..., treatment=..., outcome=...)`` |
| 4 | +prints a copy-pasteable workflow with your column names wired in. |
6 | 5 |
|
7 | | -For AI agents: |
| 6 | +The orchestrator names the full sequence: |
8 | 7 |
|
9 | | - 1. Describe your data: ``diff_diff.profile_panel(df, unit=..., time=..., |
10 | | - treatment=..., outcome=...)`` |
11 | | - 2. Consult the reference: ``diff_diff.get_llm_guide("autonomous")`` |
| 8 | + 1. Describe the panel: diff_diff.profile_panel(df, ...) |
| 9 | + 2. Choose an estimator: diff_diff.get_llm_guide("autonomous") |
12 | 10 | (estimator-support matrix + reasoning) |
13 | | - 3. Follow the workflow: ``diff_diff.get_llm_guide("practitioner")`` |
14 | | - (Baker et al. (2025) 8-step recipe) |
15 | | - 4. Report results: ``diff_diff.BusinessReport(results)`` |
16 | | - (structured agent-legible output) |
| 11 | + 3. Fit: <Estimator>(...).fit(df, ...) |
| 12 | + 4. Validate: diff_diff.practitioner_next_steps(result) |
| 13 | + 5. Report: diff_diff.BusinessReport(result) |
| 14 | +
|
| 15 | +For a comprehensive API reference call ``diff_diff.get_llm_guide("full")``. |
| 16 | +For the Baker et al. (2025) 8-step practitioner recipe call |
| 17 | +``diff_diff.get_llm_guide("practitioner")``. |
17 | 18 |
|
18 | | -For a comprehensive API reference call ``diff_diff.get_llm_guide("full")``; |
19 | | -``practitioner_next_steps(results)`` returns context-aware guidance after |
20 | | -any estimator's ``fit()``. |
| 19 | +This library provides sklearn-like estimators for causal inference using |
| 20 | +the difference-in-differences methodology. |
21 | 21 | """ |
22 | 22 |
|
23 | 23 | # Import backend detection from dedicated module (avoids circular imports) |
|
256 | 256 | DiagnosticReportResults, |
257 | 257 | ) |
258 | 258 | from diff_diff._guides_api import get_llm_guide |
| 259 | +from diff_diff.agent_workflow import agent_workflow |
259 | 260 | from diff_diff.profile import ( |
260 | 261 | Alert, |
261 | 262 | OutcomeShape, |
|
503 | 504 | "list_datasets", |
504 | 505 | "clear_cache", |
505 | 506 | # Practitioner guidance |
| 507 | + "agent_workflow", |
506 | 508 | "practitioner_next_steps", |
507 | 509 | "BusinessReport", |
508 | 510 | "BusinessContext", |
|
519 | 521 | # LLM guide accessor |
520 | 522 | "get_llm_guide", |
521 | 523 | ] |
| 524 | + |
| 525 | +# Agent-facing entrypoints surface first in dir(diff_diff). LLM agents |
| 526 | +# follow a `dir -> help -> docstring -> use` discovery loop; surfacing |
| 527 | +# these names first measurably improves discoverability vs the default |
| 528 | +# alphabetic ordering. Internal — read by tests/test_agent_discoverability.py. |
| 529 | +_AGENT_FACING_ORDER = ( |
| 530 | + "agent_workflow", |
| 531 | + "profile_panel", |
| 532 | + "get_llm_guide", |
| 533 | + "practitioner_next_steps", |
| 534 | + "BusinessReport", |
| 535 | + "DiagnosticReport", |
| 536 | +) |
| 537 | + |
| 538 | + |
| 539 | +class _OrderedName(str): |
| 540 | + """str subclass that sorts by _AGENT_FACING_ORDER priority. |
| 541 | +
|
| 542 | + Python's built-in dir() always sorts the result of __dir__() |
| 543 | + alphabetically (CPython Objects/object.c::_dir_object unconditionally |
| 544 | + calls PyList_Sort), so returning a list in our preferred order is |
| 545 | + not enough. But PyList_Sort uses __lt__ for comparisons, so a str |
| 546 | + subclass with a custom __lt__ can subvert the alphabetic default |
| 547 | + while remaining a fully usable str for every other operation. |
| 548 | +
|
| 549 | + ALL names returned by __dir__() must be _OrderedName, not just the |
| 550 | + priority head: when Python compares an _OrderedName against a plain |
| 551 | + str, the reflected-method protocol prefers str's inherited __gt__ |
| 552 | + (because _OrderedName is a subclass of str), which sorts purely |
| 553 | + alphabetically and breaks the ordering. With every element wrapped, |
| 554 | + all comparisons go through this __lt__: priority head sorts to |
| 555 | + front, tail (default priority 1<<30) falls through to alphabetic |
| 556 | + via str.__lt__. |
| 557 | + """ |
| 558 | + |
| 559 | + _ORDER = {n: i for i, n in enumerate(_AGENT_FACING_ORDER)} |
| 560 | + |
| 561 | + def __lt__(self, other): |
| 562 | + sp = self._ORDER.get(str(self), 1 << 30) |
| 563 | + op = self._ORDER.get(str(other), 1 << 30) |
| 564 | + if sp != op: |
| 565 | + return sp < op |
| 566 | + return str.__lt__(self, other) |
| 567 | + |
| 568 | + |
| 569 | +def __dir__(): |
| 570 | + """Surface agent-facing entrypoints first; remainder alphabetic. |
| 571 | +
|
| 572 | + Returns the full module namespace (matching default `dir(module)` |
| 573 | + membership — keeps `__doc__`, `__name__`, etc. accessible via |
| 574 | + `inspect.getmembers`) with priority names re-ordered to the head |
| 575 | + via `_OrderedName`'s custom `__lt__`. |
| 576 | +
|
| 577 | + `__all__` order does not affect `dir(module)`. CPython sorts the |
| 578 | + result of `__dir__()` alphabetically, so we return `_OrderedName` |
| 579 | + instances (str subclass with custom `__lt__`) for every name; the |
| 580 | + custom comparison routes head names to the top and falls back to |
| 581 | + alphabetic for everyone else. See `_OrderedName` docstring for |
| 582 | + why ALL names must be wrapped (mixing plain `str` with the |
| 583 | + subclass triggers Python's reflected-method comparison protocol |
| 584 | + and breaks the ordering). |
| 585 | +
|
| 586 | + `from diff_diff import *` semantics are unaffected (driven by |
| 587 | + `__all__`, not by `dir()`). |
| 588 | + """ |
| 589 | + return [_OrderedName(n) for n in globals()] |
0 commit comments