Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 53 additions & 31 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,28 @@ jobs:
# transition it back through provisioning; the catalog/app bindings
# need it AVAILABLE before the next bundle run touches them.
run: |
python -c "
import json, os, sys, time, subprocess
name = os.environ.get('LAKEBASE_NAME') or 'docintel-demo-state-v1'
deadline = time.time() + 600
while True:
out = subprocess.run(['databricks','api','get','/api/2.0/database/instances','--output','json'],
capture_output=True, text=True)
try:
d = json.loads(out.stdout)
except Exception:
d = {}
state = next((i.get('state') for i in d.get('database_instances',[]) if i.get('name')==name), 'UNKNOWN')
print(f'lakebase state: {state}')
if state == 'AVAILABLE':
sys.exit(0)
if time.time() >= deadline:
sys.exit(f'Lakebase {name} did not reach AVAILABLE within 600s (state={state})')
time.sleep(15)
"
python - <<'PY'
import json, os, sys, time, subprocess
name = os.environ.get('LAKEBASE_NAME') or 'docintel-demo-state-v1'
deadline = time.time() + 600
while True:
out = subprocess.run(
['databricks', 'api', 'get', '/api/2.0/database/instances', '--output', 'json'],
capture_output=True,
text=True,
)
try:
d = json.loads(out.stdout)
except Exception:
d = {}
state = next((i.get('state') for i in d.get('database_instances', []) if i.get('name') == name), 'UNKNOWN')
print(f'lakebase state: {state}')
if state == 'AVAILABLE':
sys.exit(0)
if time.time() >= deadline:
sys.exit(f'Lakebase {name} did not reach AVAILABLE within 600s (state={state})')
time.sleep(15)
PY
env:
LAKEBASE_NAME: ${{ vars.DOCINTEL_LAKEBASE_NAME || 'docintel-demo-state-v1' }}

Expand All @@ -104,7 +107,7 @@ while True:
databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline
python scripts/wait_for_kpis.py --min-rows 3 --timeout 900
databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" index_refresh
python scripts/bootstrap_agent_bricks.py \
python -m agent.document_intelligence_agent \
--target demo \
--catalog "$DOCINTEL_CATALOG" \
--schema "$DOCINTEL_SCHEMA" \
Expand All @@ -130,19 +133,38 @@ while True:
# `bundle deploy` alone uploads code but doesn't apply config/restart.
run: databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" analyst_app

- name: Verify OBO scopes survived deploy
# `bundle run` may wipe user_api_scopes (documented destructive-update
# behavior). Fail loudly because user-token passthrough is mandatory.
- name: Verify app auth mode and endpoint grants
run: |
databricks apps get doc-intel-analyst-demo --output json > /tmp/app.json
python -c "
import json
app = json.load(open('/tmp/app.json'))
scopes = set(app.get('user_api_scopes') or [])
required = {'serving.serving-endpoints', 'sql'}
missing = required - scopes
assert not missing, f'OBO scopes missing: {sorted(missing)} (got {sorted(scopes)})'
"
app_obo_required="$(python -c "import yaml; d=yaml.safe_load(open('databricks.yml')); default=d.get('variables',{}).get('app_obo_required',{}).get('default','true'); value=d.get('targets',{}).get('demo',{}).get('variables',{}).get('app_obo_required', default); print(str(value).lower())")"
lakebase_name="$(python -c "import yaml; d=yaml.safe_load(open('databricks.yml')); print(d.get('targets',{}).get('demo',{}).get('variables',{}).get('lakebase_instance','docintel-demo-state-v1'))")"
python -c "import json; app=json.load(open('/tmp/app.json')); vals=[str(app.get(k)) for k in ('service_principal_client_id','service_principal_name','service_principal_id') if app.get(k) is not None]; print('\n'.join(dict.fromkeys(v for v in vals if v)))" > /tmp/app-sp-candidates.txt
db_granted=0
while IFS= read -r principal; do
grant_json="$(python -c "import json, sys; print(json.dumps({'access_control_list':[{'service_principal_name':sys.argv[1],'permission_level':'CAN_USE'}]}))" "$principal")"
if databricks permissions update database-instances "$lakebase_name" --json "$grant_json"; then
db_granted=1
break
fi
done < /tmp/app-sp-candidates.txt
test "$db_granted" = "1"
if [ "$app_obo_required" = "true" ]; then
# `bundle run` may wipe user_api_scopes (documented destructive-update
# behavior). Fail loudly if required user scopes are missing.
python -c "import json; app=json.load(open('/tmp/app.json')); scopes=set(app.get('user_api_scopes') or []); required={'serving.serving-endpoints','sql'}; missing=required-scopes; assert not missing, f'OBO scopes missing: {sorted(missing)} (got {sorted(scopes)})'"
else
python -c "import json; app=json.load(open('/tmp/app.json')); scopes=app.get('user_api_scopes'); assert not scopes, f'demo App-SP mode expected no user_api_scopes, got {scopes}'"
endpoint_id="$(databricks serving-endpoints get "$AGENT_ENDPOINT_NAME" --output json | python -c "import json, sys; e=json.load(sys.stdin); print(e.get('id') or e.get('name'))")"
granted=0
while IFS= read -r principal; do
grant_json="$(python -c "import json, sys; print(json.dumps({'access_control_list':[{'service_principal_name':sys.argv[1],'permission_level':'CAN_QUERY'}]}))" "$principal")"
if databricks permissions update serving-endpoints "$endpoint_id" --json "$grant_json"; then
granted=1
break
fi
done < /tmp/app-sp-candidates.txt
test "$granted" = "1"
fi

- name: CLEARS evaluation gate
run: python evals/clears_eval.py --endpoint "$AGENT_ENDPOINT_NAME" --dataset evals/dataset.jsonl
15 changes: 9 additions & 6 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ For an end-to-end overview written for humans, read [`README.md`](./README.md).

The bundle has three chicken-egg dependencies that prevent a single `databricks bundle deploy -t demo` from succeeding on a fresh workspace:

1. **Databricks App resource binding** references the Agent Bricks Supervisor endpoint that `scripts/bootstrap_agent_bricks.py` creates after the Vector Search index exists.
1. **Databricks App config** needs the generated Agent Bricks Supervisor endpoint name from `agent/document_intelligence_agent.py`, which can only run after the Vector Search index exists.
2. **Lakehouse Monitor** (`resources/consumers/kpi_drift.yml`) attaches to `gold_filing_kpis`, which doesn't exist until the pipeline runs once.
3. **Lakebase database_catalog + Databricks App** race the `database_instance` provisioning.

**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups; plain `databricks bundle deploy -t demo` for steady-state. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. Both deploys should succeed cleanly.
**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups. For steady-state manual deploys, resolve the generated Supervisor endpoint and pass it as a bundle variable: `databricks bundle deploy -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)"`. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. Both deploys should succeed cleanly.

**Do NOT try to "fix" these by:**
- Adding `depends_on` between heterogeneous DAB resource types — DAB doesn't reliably honor it across instance↔catalog↔app.
- Reintroducing a custom MLflow pyfunc serving endpoint. Agent Bricks Knowledge Assistant + Supervisor Agent is the production path.
- Bypassing Agent Bricks Knowledge Assistant + Supervisor Agent for the production path.
- Splitting monitors into a separate target overlay — adds complexity for a one-time concern.

Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy ordering gaps".
Expand All @@ -33,13 +33,13 @@ Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy o

```
pipelines/sql/ Lakeflow SDP — Bronze → Silver → Gold (SQL only, principle III)
agent/ Deterministic Agent Bricks tool glue only
agent/ Agent Bricks definition + deterministic tool glue
app/ Streamlit on Databricks Apps + Lakebase psycopg client
evals/ MLflow CLEARS gate (clears_eval.py + dataset.jsonl)
jobs/ Lakeflow Jobs Python tasks (retention, index_refresh)
resources/foundation/ DAB resources with no data deps: catalog/schema/volume, pipeline, retention job, Lakebase instance
resources/consumers/ DAB resources that depend on foundation data: monitor, index-refresh job, app, dashboard, Lakebase catalog
scripts/ Operational scripts (bootstrap-demo.sh, bootstrap_agent_bricks.py, wait_for_kpis.py)
scripts/ Operational scripts (bootstrap-demo.sh, wait_for_kpis.py)
samples/ Synthetic 10-K PDFs (regenerable via synthesize.py)
specs/001-… Spec-Kit artifacts (spec, plan, tasks, research, data-model, contracts, quickstart)
docs/runbook.md Day-2 ops + bring-up workflow
Expand All @@ -51,6 +51,7 @@ docs/runbook.md Day-2 ops + bring-up workflow
- Validate: `databricks bundle validate -t demo`
- Fresh stand-up: `./scripts/bootstrap-demo.sh` (requires `DOCINTEL_CATALOG`, `DOCINTEL_SCHEMA`, `DOCINTEL_WAREHOUSE_ID`)
- Steady-state deploy: `databricks bundle deploy -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)"`
- App config/restart: `databricks bundle run -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)" analyst_app`
- Run pipeline: `databricks bundle run -t demo doc_intel_pipeline`
- Run eval: `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl`

Expand All @@ -69,7 +70,9 @@ These were discovered the painful way during the 2026-04-25 bring-up. Future ses
- **Section normalization**: `pipelines/sql/03_gold_classify_extract.sql` POSEXPLODES `parsed:sections[*]` and represents sectionless VARIANT output as one `full_document` row so we never lose a filing.
- **`lakebase_stopped: true` is rejected on instance creation**: the API doesn't allow creating a database_instance directly into stopped state. Default is `false`; flip to `true` only after the instance exists. Reference: `databricks.yml` variable description.
- **macOS doesn't ship `python`**: scripts must prefer `.venv/bin/python` then fall back to `python3`. Reference: `scripts/bootstrap-demo.sh`.
- **Agent Bricks resources are SDK-managed**: `scripts/bootstrap_agent_bricks.py` creates/updates the Knowledge Assistant, its Vector Search knowledge source, the UC KPI function, and the Supervisor Agent. DAB still manages the surrounding data/app/monitor resources.
- **Agent Bricks resources are SDK-managed**: `agent/document_intelligence_agent.py` creates/updates the Knowledge Assistant, its Vector Search knowledge source, the UC KPI function, and the Supervisor Agent. DAB still manages the surrounding data/app/monitor resources.
- **Agent Bricks generates endpoint names**: use `scripts/resolve-agent-endpoint.sh <target>` and pass the result as `--var agent_endpoint_name=...` for deploys and app runs.
- **Agent Bricks invocation uses the invocations path directly**: `app/agent_bricks_client.py` posts to `/serving-endpoints/{endpoint}/invocations` with the user's OBO token and an `X-Request-ID`. Do not swap this back to `WorkspaceClient.serving_endpoints.query()` without revalidating the Agent Bricks response shape.
- **Streamlit on Databricks Apps requires CORS+XSRF off via env vars**: not flags. `STREAMLIT_SERVER_ENABLE_CORS=false` and `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false` in `app/app.yaml`. Databricks Apps runtime config: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/app-runtime.
- **`bundle deploy` doesn't apply app config / restart**: must follow with `databricks bundle run -t <target> analyst_app` (or use `databricks apps deploy`). Databricks Apps deploy docs: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy.
- **`bundle run` may wipe `user_api_scopes`**: documented as a destructive-update behavior in the Databricks Apps deploy docs. Bootstrap step 5c re-asserts; CI verifies. If you change the App resource, double-check OBO scopes after.
Expand Down
6 changes: 4 additions & 2 deletions PRODUCTION_READINESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,16 @@ This project is open-sourced as a Databricks reference implementation. Treat it
| Pilot-ready | Real filings exercise document variability and cost/latency | Reference-ready plus a reviewed EDGAR pilot corpus |
| Production-ready | Analysts can use it under governed identity and SLOs | Pilot-ready plus end-to-end OBO, dashboards, alerts, rollback, and runbook evidence |

Current demo status as of 2026-04-26: Agent Bricks bootstrap, Databricks App deploy, direct Supervisor endpoint smoke, Lakebase OAuth credential handling, and Vector Search index-refresh smoke passed. The project is not reference-ready yet because the latest synthetic CLEARS run failed the configured quality/latency gate. Prod readiness still requires user-token passthrough/OBO audit evidence. See [`VALIDATION.md`](./VALIDATION.md#latest-demo-snapshot).

## Reference-Ready Checklist

- `databricks bundle validate --strict -t demo` passes.
- `./scripts/bootstrap-demo.sh` succeeds in a clean demo workspace.
- Synthetic PDFs in `samples/` produce at least ACME/BETA/GAMMA KPI rows.
- Vector Search index sync completes and the Agent Bricks Supervisor endpoint answers a smoke question with citations.
- `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl` passes.
- App starts via `databricks bundle run -t demo analyst_app`.
- App starts via `databricks bundle run -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)" analyst_app` in the configured demo auth mode.

## Pilot-Ready Checklist

Expand All @@ -31,7 +33,7 @@ This project is open-sourced as a Databricks reference implementation. Treat it
## Production-Ready Checklist

- Databricks Apps user-token passthrough is enabled in the workspace.
- `resources/consumers/analyst.app.yml:user_api_scopes` is declared and survives `bundle run`.
- Prod target `user_api_scopes` in `databricks.yml` are declared and survive `bundle run`.
- Audit logs prove app requests, Agent Bricks, Knowledge Assistant, Vector Search, and structured KPI SQL calls execute under the invoking user where required.
- Service principal `run_as` is configured for prod via `--var service_principal_id=<sp-app-id>`.
- Analyst group grants include `USE_CATALOG`, `USE_SCHEMA`, `SELECT`, `EXECUTE`, `READ_VOLUME`, and `WRITE_VOLUME` as appropriate.
Expand Down
Loading
Loading