SatyKrish · SatyKrish · Apr 27, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -72,25 +72,28 @@ jobs:
         # transition it back through provisioning; the catalog/app bindings
         # need it AVAILABLE before the next bundle run touches them.
         run: |
-          python -c "
-import json, os, sys, time, subprocess
-name = os.environ.get('LAKEBASE_NAME') or 'docintel-demo-state-v1'
-deadline = time.time() + 600
-while True:
-    out = subprocess.run(['databricks','api','get','/api/2.0/database/instances','--output','json'],
-                         capture_output=True, text=True)
-    try:
-        d = json.loads(out.stdout)
-    except Exception:
-        d = {}
-    state = next((i.get('state') for i in d.get('database_instances',[]) if i.get('name')==name), 'UNKNOWN')
-    print(f'lakebase state: {state}')
-    if state == 'AVAILABLE':
-        sys.exit(0)
-    if time.time() >= deadline:
-        sys.exit(f'Lakebase {name} did not reach AVAILABLE within 600s (state={state})')
-    time.sleep(15)
-"
+          python - <<'PY'
+          import json, os, sys, time, subprocess
+          name = os.environ.get('LAKEBASE_NAME') or 'docintel-demo-state-v1'
+          deadline = time.time() + 600
+          while True:
+              out = subprocess.run(
+                  ['databricks', 'api', 'get', '/api/2.0/database/instances', '--output', 'json'],
+                  capture_output=True,
+                  text=True,
+              )
+              try:
+                  d = json.loads(out.stdout)
+              except Exception:
+                  d = {}
+              state = next((i.get('state') for i in d.get('database_instances', []) if i.get('name') == name), 'UNKNOWN')
+              print(f'lakebase state: {state}')
+              if state == 'AVAILABLE':
+                  sys.exit(0)
+              if time.time() >= deadline:
+                  sys.exit(f'Lakebase {name} did not reach AVAILABLE within 600s (state={state})')
+              time.sleep(15)
+          PY
         env:
           LAKEBASE_NAME: ${{ vars.DOCINTEL_LAKEBASE_NAME || 'docintel-demo-state-v1' }}
 
@@ -104,7 +107,7 @@ while True:
           databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline
           python scripts/wait_for_kpis.py --min-rows 3 --timeout 900
           databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" index_refresh
-          python scripts/bootstrap_agent_bricks.py \
+          python -m agent.document_intelligence_agent \
             --target demo \
             --catalog "$DOCINTEL_CATALOG" \
             --schema "$DOCINTEL_SCHEMA" \
@@ -130,19 +133,38 @@ while True:
         # `bundle deploy` alone uploads code but doesn't apply config/restart.
         run: databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" analyst_app
 
-      - name: Verify OBO scopes survived deploy
-        # `bundle run` may wipe user_api_scopes (documented destructive-update
-        # behavior). Fail loudly because user-token passthrough is mandatory.
+      - name: Verify app auth mode and endpoint grants
         run: |
           databricks apps get doc-intel-analyst-demo --output json > /tmp/app.json
-          python -c "
-import json
-app = json.load(open('/tmp/app.json'))
-scopes = set(app.get('user_api_scopes') or [])
-required = {'serving.serving-endpoints', 'sql'}
-missing = required - scopes
-assert not missing, f'OBO scopes missing: {sorted(missing)} (got {sorted(scopes)})'
-"
+          app_obo_required="$(python -c "import yaml; d=yaml.safe_load(open('databricks.yml')); default=d.get('variables',{}).get('app_obo_required',{}).get('default','true'); value=d.get('targets',{}).get('demo',{}).get('variables',{}).get('app_obo_required', default); print(str(value).lower())")"
+          lakebase_name="$(python -c "import yaml; d=yaml.safe_load(open('databricks.yml')); print(d.get('targets',{}).get('demo',{}).get('variables',{}).get('lakebase_instance','docintel-demo-state-v1'))")"
+          python -c "import json; app=json.load(open('/tmp/app.json')); vals=[str(app.get(k)) for k in ('service_principal_client_id','service_principal_name','service_principal_id') if app.get(k) is not None]; print('\n'.join(dict.fromkeys(v for v in vals if v)))" > /tmp/app-sp-candidates.txt
+          db_granted=0
+          while IFS= read -r principal; do
+            grant_json="$(python -c "import json, sys; print(json.dumps({'access_control_list':[{'service_principal_name':sys.argv[1],'permission_level':'CAN_USE'}]}))" "$principal")"
+            if databricks permissions update database-instances "$lakebase_name" --json "$grant_json"; then
+              db_granted=1
+              break
+            fi
+          done < /tmp/app-sp-candidates.txt
+          test "$db_granted" = "1"
+          if [ "$app_obo_required" = "true" ]; then
+            # `bundle run` may wipe user_api_scopes (documented destructive-update
+            # behavior). Fail loudly if required user scopes are missing.
+            python -c "import json; app=json.load(open('/tmp/app.json')); scopes=set(app.get('user_api_scopes') or []); required={'serving.serving-endpoints','sql'}; missing=required-scopes; assert not missing, f'OBO scopes missing: {sorted(missing)} (got {sorted(scopes)})'"
+          else
+            python -c "import json; app=json.load(open('/tmp/app.json')); scopes=app.get('user_api_scopes'); assert not scopes, f'demo App-SP mode expected no user_api_scopes, got {scopes}'"
+            endpoint_id="$(databricks serving-endpoints get "$AGENT_ENDPOINT_NAME" --output json | python -c "import json, sys; e=json.load(sys.stdin); print(e.get('id') or e.get('name'))")"
+            granted=0
+            while IFS= read -r principal; do
+              grant_json="$(python -c "import json, sys; print(json.dumps({'access_control_list':[{'service_principal_name':sys.argv[1],'permission_level':'CAN_QUERY'}]}))" "$principal")"
+              if databricks permissions update serving-endpoints "$endpoint_id" --json "$grant_json"; then
+                granted=1
+                break
+              fi
+            done < /tmp/app-sp-candidates.txt
+            test "$granted" = "1"
+          fi
 
       - name: CLEARS evaluation gate
         run: python evals/clears_eval.py --endpoint "$AGENT_ENDPOINT_NAME" --dataset evals/dataset.jsonl
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -16,15 +16,15 @@ For an end-to-end overview written for humans, read [`README.md`](./README.md).
 
 The bundle has three chicken-egg dependencies that prevent a single `databricks bundle deploy -t demo` from succeeding on a fresh workspace:
 
-1. **Databricks App resource binding** references the Agent Bricks Supervisor endpoint that `scripts/bootstrap_agent_bricks.py` creates after the Vector Search index exists.
+1. **Databricks App config** needs the generated Agent Bricks Supervisor endpoint name from `agent/document_intelligence_agent.py`, which can only run after the Vector Search index exists.
 2. **Lakehouse Monitor** (`resources/consumers/kpi_drift.yml`) attaches to `gold_filing_kpis`, which doesn't exist until the pipeline runs once.
 3. **Lakebase database_catalog + Databricks App** race the `database_instance` provisioning.
 
-**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups; plain `databricks bundle deploy -t demo` for steady-state. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. Both deploys should succeed cleanly.
+**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups. For steady-state manual deploys, resolve the generated Supervisor endpoint and pass it as a bundle variable: `databricks bundle deploy -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)"`. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. Both deploys should succeed cleanly.
 
 **Do NOT try to "fix" these by:**
 - Adding `depends_on` between heterogeneous DAB resource types — DAB doesn't reliably honor it across instance↔catalog↔app.
-- Reintroducing a custom MLflow pyfunc serving endpoint. Agent Bricks Knowledge Assistant + Supervisor Agent is the production path.
+- Bypassing Agent Bricks Knowledge Assistant + Supervisor Agent for the production path.
 - Splitting monitors into a separate target overlay — adds complexity for a one-time concern.
 
 Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy ordering gaps".
@@ -33,13 +33,13 @@ Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy o
 
 ```
 pipelines/sql/        Lakeflow SDP — Bronze → Silver → Gold (SQL only, principle III)
-agent/                Deterministic Agent Bricks tool glue only
+agent/                Agent Bricks definition + deterministic tool glue
 app/                  Streamlit on Databricks Apps + Lakebase psycopg client
 evals/                MLflow CLEARS gate (clears_eval.py + dataset.jsonl)
 jobs/                 Lakeflow Jobs Python tasks (retention, index_refresh)
 resources/foundation/ DAB resources with no data deps: catalog/schema/volume, pipeline, retention job, Lakebase instance
 resources/consumers/  DAB resources that depend on foundation data: monitor, index-refresh job, app, dashboard, Lakebase catalog
-scripts/              Operational scripts (bootstrap-demo.sh, bootstrap_agent_bricks.py, wait_for_kpis.py)
+scripts/              Operational scripts (bootstrap-demo.sh, wait_for_kpis.py)
 samples/              Synthetic 10-K PDFs (regenerable via synthesize.py)
 specs/001-…           Spec-Kit artifacts (spec, plan, tasks, research, data-model, contracts, quickstart)
 docs/runbook.md       Day-2 ops + bring-up workflow
@@ -51,6 +51,7 @@ docs/runbook.md       Day-2 ops + bring-up workflow
 - Validate: `databricks bundle validate -t demo`
 - Fresh stand-up: `./scripts/bootstrap-demo.sh` (requires `DOCINTEL_CATALOG`, `DOCINTEL_SCHEMA`, `DOCINTEL_WAREHOUSE_ID`)
 - Steady-state deploy: `databricks bundle deploy -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)"`
+- App config/restart: `databricks bundle run -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)" analyst_app`
 - Run pipeline: `databricks bundle run -t demo doc_intel_pipeline`
 - Run eval: `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl`
 
@@ -69,7 +70,9 @@ These were discovered the painful way during the 2026-04-25 bring-up. Future ses
 - **Section normalization**: `pipelines/sql/03_gold_classify_extract.sql` POSEXPLODES `parsed:sections[*]` and represents sectionless VARIANT output as one `full_document` row so we never lose a filing.
 - **`lakebase_stopped: true` is rejected on instance creation**: the API doesn't allow creating a database_instance directly into stopped state. Default is `false`; flip to `true` only after the instance exists. Reference: `databricks.yml` variable description.
 - **macOS doesn't ship `python`**: scripts must prefer `.venv/bin/python` then fall back to `python3`. Reference: `scripts/bootstrap-demo.sh`.
-- **Agent Bricks resources are SDK-managed**: `scripts/bootstrap_agent_bricks.py` creates/updates the Knowledge Assistant, its Vector Search knowledge source, the UC KPI function, and the Supervisor Agent. DAB still manages the surrounding data/app/monitor resources.
+- **Agent Bricks resources are SDK-managed**: `agent/document_intelligence_agent.py` creates/updates the Knowledge Assistant, its Vector Search knowledge source, the UC KPI function, and the Supervisor Agent. DAB still manages the surrounding data/app/monitor resources.
+- **Agent Bricks generates endpoint names**: use `scripts/resolve-agent-endpoint.sh <target>` and pass the result as `--var agent_endpoint_name=...` for deploys and app runs.
+- **Agent Bricks invocation uses the invocations path directly**: `app/agent_bricks_client.py` posts to `/serving-endpoints/{endpoint}/invocations` with the user's OBO token and an `X-Request-ID`. Do not swap this back to `WorkspaceClient.serving_endpoints.query()` without revalidating the Agent Bricks response shape.
 - **Streamlit on Databricks Apps requires CORS+XSRF off via env vars**: not flags. `STREAMLIT_SERVER_ENABLE_CORS=false` and `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false` in `app/app.yaml`. Databricks Apps runtime config: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/app-runtime.
 - **`bundle deploy` doesn't apply app config / restart**: must follow with `databricks bundle run -t <target> analyst_app` (or use `databricks apps deploy`). Databricks Apps deploy docs: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy.
 - **`bundle run` may wipe `user_api_scopes`**: documented as a destructive-update behavior in the Databricks Apps deploy docs. Bootstrap step 5c re-asserts; CI verifies. If you change the App resource, double-check OBO scopes after.

diff --git a/PRODUCTION_READINESS.md b/PRODUCTION_READINESS.md
@@ -10,14 +10,16 @@ This project is open-sourced as a Databricks reference implementation. Treat it
 | Pilot-ready | Real filings exercise document variability and cost/latency | Reference-ready plus a reviewed EDGAR pilot corpus |
 | Production-ready | Analysts can use it under governed identity and SLOs | Pilot-ready plus end-to-end OBO, dashboards, alerts, rollback, and runbook evidence |
 
+Current demo status as of 2026-04-26: Agent Bricks bootstrap, Databricks App deploy, direct Supervisor endpoint smoke, Lakebase OAuth credential handling, and Vector Search index-refresh smoke passed. The project is not reference-ready yet because the latest synthetic CLEARS run failed the configured quality/latency gate. Prod readiness still requires user-token passthrough/OBO audit evidence. See [`VALIDATION.md`](./VALIDATION.md#latest-demo-snapshot).
+
 ## Reference-Ready Checklist
 
 - `databricks bundle validate --strict -t demo` passes.
 - `./scripts/bootstrap-demo.sh` succeeds in a clean demo workspace.
 - Synthetic PDFs in `samples/` produce at least ACME/BETA/GAMMA KPI rows.
 - Vector Search index sync completes and the Agent Bricks Supervisor endpoint answers a smoke question with citations.
 - `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl` passes.
-- App starts via `databricks bundle run -t demo analyst_app`.
+- App starts via `databricks bundle run -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)" analyst_app` in the configured demo auth mode.
 
 ## Pilot-Ready Checklist
 
@@ -31,7 +33,7 @@ This project is open-sourced as a Databricks reference implementation. Treat it
 ## Production-Ready Checklist
 
 - Databricks Apps user-token passthrough is enabled in the workspace.
-- `resources/consumers/analyst.app.yml:user_api_scopes` is declared and survives `bundle run`.
+- Prod target `user_api_scopes` in `databricks.yml` are declared and survive `bundle run`.
 - Audit logs prove app requests, Agent Bricks, Knowledge Assistant, Vector Search, and structured KPI SQL calls execute under the invoking user where required.
 - Service principal `run_as` is configured for prod via `--var service_principal_id=<sp-app-id>`.
 - Analyst group grants include `USE_CATALOG`, `USE_SCHEMA`, `SELECT`, `EXECUTE`, `READ_VOLUME`, and `WRITE_VOLUME` as appropriate.