SatyKrish · SatyKrish · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -47,12 +47,25 @@ jobs:
       - name: Install Python deps
         run: pip install -r agent/requirements.txt -r evals/requirements.txt
 
+      - name: Resolve steady-state Agent Bricks endpoint
+        run: |
+          endpoint="$(./scripts/resolve-agent-endpoint.sh demo 2>/dev/null || true)"
+          if [ -z "$endpoint" ]; then
+            echo "::error::Agent Bricks supervisor for demo is missing. Run ./scripts/bootstrap-demo.sh once for first workspace bring-up, then rerun CI."
+            exit 1
+          fi
+          if ! databricks api get "/api/2.0/serving-endpoints/${endpoint}" >/dev/null 2>&1; then
+            echo "::error::Agent Bricks supervisor record points to endpoint ${endpoint}, but that serving endpoint is missing or not listable. Run ./scripts/bootstrap-demo.sh locally to repair workspace drift."
+            exit 1
+          fi
+          echo "AGENT_ENDPOINT_NAME=${endpoint}" >> "$GITHUB_ENV"
+
       - name: Deploy bundle (full — consumers already exist in steady-state)
-        # Pin warehouse_id so the dashboard + serving env match what
-        # wait_for_kpis / log_and_register use. Without --var, the bundle
+        # Pin warehouse_id so the dashboard and Agent Bricks bootstrap match
+        # wait_for_kpis. Without --var, the bundle
         # falls back to its `lookup: warehouse: Serverless Starter Warehouse`
         # default and silently picks a different ID.
-        run: databricks bundle deploy -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID"
+        run: databricks bundle deploy -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME"
 
       - name: Wait for Lakebase instance to be AVAILABLE
         # Lakebase already exists in steady-state but a config change can
@@ -81,7 +94,7 @@ while True:
         env:
           LAKEBASE_NAME: ${{ vars.DOCINTEL_LAKEBASE_NAME || 'docintel-demo-state-v1' }}
 
-      - name: Refresh data — upload samples, run pipeline, register new model version
+      - name: Refresh data and Agent Bricks configuration
         run: |
           for f in samples/*_10K_*.pdf; do
             databricks fs cp "$f" \
@@ -90,9 +103,12 @@ while True:
           done
           databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline
           python scripts/wait_for_kpis.py --min-rows 3 --timeout 900
-          # --serving-endpoint repoints the existing endpoint to the new
-          # model version in-place (steady-state idempotent operation).
-          python agent/log_and_register.py --target demo --serving-endpoint analyst-agent-demo
+          databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" index_refresh
+          python scripts/bootstrap_agent_bricks.py \
+            --target demo \
+            --catalog "$DOCINTEL_CATALOG" \
+            --schema "$DOCINTEL_SCHEMA" \
+            --warehouse-id "$DOCINTEL_WAREHOUSE_ID"
 
       - name: Apply UC grants (catalog + schema; not DAB-supported)
         # UC requires the full chain: USE_CATALOG → USE_SCHEMA → SELECT/EXECUTE.
@@ -112,26 +128,21 @@ while True:
         # Databricks Apps deploy docs:
         # https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy
         # `bundle deploy` alone uploads code but doesn't apply config/restart.
-        run: databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" analyst_app
+        run: databricks bundle run -t demo --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" --var "agent_endpoint_name=$AGENT_ENDPOINT_NAME" analyst_app
 
       - name: Verify OBO scopes survived deploy
         # `bundle run` may wipe user_api_scopes (documented destructive-update
-        # behavior). Fail loudly so we re-apply. Skipped when user_api_scopes
-        # are not declared (workspace feature off).
+        # behavior). Fail loudly because user-token passthrough is mandatory.
         run: |
-          if grep -q '^      user_api_scopes:' resources/consumers/analyst.app.yml; then
-            databricks apps get doc-intel-analyst-demo --output json > /tmp/app.json
-            python -c "
+          databricks apps get doc-intel-analyst-demo --output json > /tmp/app.json
+          python -c "
 import json
 app = json.load(open('/tmp/app.json'))
 scopes = set(app.get('user_api_scopes') or [])
 required = {'serving.serving-endpoints', 'sql'}
 missing = required - scopes
 assert not missing, f'OBO scopes missing: {sorted(missing)} (got {sorted(scopes)})'
 "
-          else
-            echo "user_api_scopes not declared (workspace feature off); skipping OBO scope check"
-          fi
 
       - name: CLEARS evaluation gate
-        run: python evals/clears_eval.py --endpoint analyst-agent-demo --dataset evals/dataset.jsonl
+        run: python evals/clears_eval.py --endpoint "$AGENT_ENDPOINT_NAME" --dataset evals/dataset.jsonl
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -4,10 +4,10 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Repository status
 
-**Databricks Document Intelligence Agent — Reference Implementation.**
+**Databricks Document Intelligence Agent - Agent Bricks implementation.**
 Active feature: **001-doc-intel-10k** — demonstrated on synthetic SEC 10-K filings.
 Drives a Bronze→Silver→Gold pipeline (`ai_parse_document` / `ai_classify` / `ai_extract`),
-Mosaic AI Vector Search index, agent endpoint behind AI Gateway, Streamlit App on Databricks Apps,
+Mosaic AI Vector Search index, Agent Bricks Supervisor endpoint behind AI Gateway, Streamlit App on Databricks Apps,
 Lakebase state, Lakehouse Monitoring, and an MLflow CLEARS eval gate — all in one DAB.
 
 For an end-to-end overview written for humans, read [`README.md`](./README.md).
@@ -16,15 +16,15 @@ For an end-to-end overview written for humans, read [`README.md`](./README.md).
 
 The bundle has three chicken-egg dependencies that prevent a single `databricks bundle deploy -t demo` from succeeding on a fresh workspace:
 
-1. **Model Serving endpoint** references a registered model version that doesn't exist until `agent/log_and_register.py` runs.
+1. **Databricks App resource binding** references the Agent Bricks Supervisor endpoint that `scripts/bootstrap_agent_bricks.py` creates after the Vector Search index exists.
 2. **Lakehouse Monitor** (`resources/consumers/kpi_drift.yml`) attaches to `gold_filing_kpis`, which doesn't exist until the pipeline runs once.
 3. **Lakebase database_catalog + Databricks App** race the `database_instance` provisioning.
 
-**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups; plain `databricks bundle deploy -t demo` for steady-state. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. **Both deploys succeed cleanly** — no "errors tolerated" hand-waving, no orphans to clean up on retry.
+**Canonical fix**: Run `./scripts/bootstrap-demo.sh` for fresh stand-ups; plain `databricks bundle deploy -t demo` for steady-state. The script does a **staged deploy** — `resources/` is split into `foundation/` (no data deps) and `consumers/` (need data). Stage 1 temporarily renames consumer YAMLs to `*.yml.skip` so the bundle glob skips them; stage 2 produces data and then runs full `bundle deploy`. Both deploys should succeed cleanly.
 
 **Do NOT try to "fix" these by:**
 - Adding `depends_on` between heterogeneous DAB resource types — DAB doesn't reliably honor it across instance↔catalog↔app.
-- Switching `resources/consumers/agent.serving.yml` to UC alias syntax (`@demo`) — DAB serving config may reject alias syntax; that's why `_promote_serving_endpoint` exists in `agent/log_and_register.py`.
+- Reintroducing a custom MLflow pyfunc serving endpoint. Agent Bricks Knowledge Assistant + Supervisor Agent is the production path.
 - Splitting monitors into a separate target overlay — adds complexity for a one-time concern.
 
 Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy ordering gaps".
@@ -33,13 +33,13 @@ Full breakdown lives in [`docs/runbook.md`](./docs/runbook.md) §"Known deploy o
 
 ```
 pipelines/sql/        Lakeflow SDP — Bronze → Silver → Gold (SQL only, principle III)
-agent/                Mosaic AI Agent Framework: pyfunc, retrieval, supervisor, UC tools, registration, OBO helpers
+agent/                Deterministic Agent Bricks tool glue only
 app/                  Streamlit on Databricks Apps + Lakebase psycopg client
 evals/                MLflow CLEARS gate (clears_eval.py + dataset.jsonl)
 jobs/                 Lakeflow Jobs Python tasks (retention, index_refresh)
 resources/foundation/ DAB resources with no data deps: catalog/schema/volume, pipeline, retention job, Lakebase instance
-resources/consumers/  DAB resources that depend on foundation data: serving endpoint, monitor, index-refresh job, app, dashboard, Lakebase catalog
-scripts/              Operational scripts (bootstrap-demo.sh, wait_for_kpis.py)
+resources/consumers/  DAB resources that depend on foundation data: monitor, index-refresh job, app, dashboard, Lakebase catalog
+scripts/              Operational scripts (bootstrap-demo.sh, bootstrap_agent_bricks.py, wait_for_kpis.py)
 samples/              Synthetic 10-K PDFs (regenerable via synthesize.py)
 specs/001-…           Spec-Kit artifacts (spec, plan, tasks, research, data-model, contracts, quickstart)
 docs/runbook.md       Day-2 ops + bring-up workflow
@@ -50,13 +50,13 @@ docs/runbook.md       Day-2 ops + bring-up workflow
 
 - Validate: `databricks bundle validate -t demo`
 - Fresh stand-up: `./scripts/bootstrap-demo.sh` (requires `DOCINTEL_CATALOG`, `DOCINTEL_SCHEMA`, `DOCINTEL_WAREHOUSE_ID`)
-- Steady-state deploy: `databricks bundle deploy -t demo`
+- Steady-state deploy: `databricks bundle deploy -t demo --var "agent_endpoint_name=$(./scripts/resolve-agent-endpoint.sh demo)"`
 - Run pipeline: `databricks bundle run -t demo doc_intel_pipeline`
-- Run eval: `python evals/clears_eval.py --endpoint analyst-agent-demo --dataset evals/dataset.jsonl`
+- Run eval: `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl`
 
 ## Tests & validation
 
-- `pytest agent/tests/` — unit tests for retrieval, agent routing, supervisor
+- `pytest agent/tests/` — unit tests for deterministic Agent Bricks tool glue
 - `databricks bundle validate -t demo` and `-t prod` — schema check both targets before merging
 - The CLEARS eval is the deploy gate; principle V says no agent ships without it passing
 
@@ -66,19 +66,17 @@ These were discovered the painful way during the 2026-04-25 bring-up. Future ses
 
 - **SDP streaming chains require explicit `STREAM(...)`**: a temp view that reads from `STREAM(upstream_table)` is itself a streaming view, and downstream references must wrap it in `STREAM(...)` again. Reference: `pipelines/sql/02_silver_parse.sql:23` (`FROM STREAM(silver_with_parsed)`).
 - **One Auto Loader source per path**: split downstream tables off a single `STREAM read_files(...)` via a temp streaming view. Reference: `pipelines/sql/01_bronze.sql` (`raw_pdf_arrivals` view); Auto Loader docs: https://docs.databricks.com/aws/en/ingestion/cloud-object-storage/auto-loader/.
-- **Section explosion fallback**: `pipelines/sql/03_gold_classify_extract.sql` POSEXPLODES `parsed:sections[*]` and falls back to a single `full_document` row when the VARIANT lacks `$.sections` so we never lose a filing.
-- **MLflow + UC requires both inputs AND outputs in signatures**: an inputs-only signature is rejected at registration. For variable-shape fields like `citations` (array of dicts), use `mlflow.types.schema.AnyType()` to avoid serving-time truncation. Reference: `agent/log_and_register.py:_signature`.
+- **Section normalization**: `pipelines/sql/03_gold_classify_extract.sql` POSEXPLODES `parsed:sections[*]` and represents sectionless VARIANT output as one `full_document` row so we never lose a filing.
 - **`lakebase_stopped: true` is rejected on instance creation**: the API doesn't allow creating a database_instance directly into stopped state. Default is `false`; flip to `true` only after the instance exists. Reference: `databricks.yml` variable description.
 - **macOS doesn't ship `python`**: scripts must prefer `.venv/bin/python` then fall back to `python3`. Reference: `scripts/bootstrap-demo.sh`.
-- **`agent/log_and_register.py` needs `PYTHONPATH`**: it imports the `agent` package; run with `PYTHONPATH=$REPO_ROOT` or use the bootstrap script which exports it.
-- **Serving endpoint version drifts from YAML**: `resources/consumers/agent.serving.yml` pins `entity_version: "1"` as the bootstrap value. Steady-state CI re-registers new versions and uses `_promote_serving_endpoint` to update the served entity in-place. The YAML and the live endpoint diverge over time — that's intentional, not drift.
+- **Agent Bricks resources are SDK-managed**: `scripts/bootstrap_agent_bricks.py` creates/updates the Knowledge Assistant, its Vector Search knowledge source, the UC KPI function, and the Supervisor Agent. DAB still manages the surrounding data/app/monitor resources.
 - **Streamlit on Databricks Apps requires CORS+XSRF off via env vars**: not flags. `STREAMLIT_SERVER_ENABLE_CORS=false` and `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false` in `app/app.yaml`. Databricks Apps runtime config: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/app-runtime.
 - **`bundle deploy` doesn't apply app config / restart**: must follow with `databricks bundle run -t <target> analyst_app` (or use `databricks apps deploy`). Databricks Apps deploy docs: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy.
 - **`bundle run` may wipe `user_api_scopes`**: documented as a destructive-update behavior in the Databricks Apps deploy docs. Bootstrap step 5c re-asserts; CI verifies. If you change the App resource, double-check OBO scopes after.
 - **OBO token never refreshes on Streamlit**: captured at HTTP request, then WebSocket. Long sessions need a page reload to re-acquire.
 - **Lakebase init runs at startup under whatever creds the app process has**: in deployed mode that's the App SP (per resource binding); in local dev, set `DATABRICKS_CLIENT_ID/SECRET` to the App SP or tables get user-owned and break the deployed App. `lakebase_client.init_schema` warns on identity mismatch. See `app/README.md`.
 - **Prod `bundle validate` fails without `service_principal_id`**: that's the safety. Pass `--var service_principal_id=<sp-app-id>` for any prod operation.
-- **Prod `run_as` rejected by app/monitor/serving when validated as a user**: DAB requires `run_as == owner`, and these three resource types set their owner to the deploying identity. Local `bundle validate -t prod --var service_principal_id=…` as a *user* fails with three errors; CI authenticated as the *SP* (matching `service_principal_id`) validates and deploys cleanly. This is correct enforcement, not a bug.
+- **Prod `run_as` rejected by app/monitor resources when validated as a user**: DAB requires `run_as == owner`, and these resource types set their owner to the deploying identity. Local `bundle validate -t prod --var service_principal_id=...` as a user can fail; CI authenticated as the SP matching `service_principal_id` is the production validation path.
 
 ## Spec-Kit cycle
 

diff --git a/PRODUCTION_READINESS.md b/PRODUCTION_READINESS.md
@@ -6,37 +6,37 @@ This project is open-sourced as a Databricks reference implementation. Treat it
 
 | Level | Bar | Evidence |
 |---|---|---|
-| Reference-ready | Synthetic corpus demonstrates the full architecture | Dev bundle validates, staged bootstrap succeeds, synthetic CLEARS passes |
+| Reference-ready | Synthetic corpus demonstrates the full architecture | Demo bundle validates, staged bootstrap succeeds, synthetic CLEARS passes |
 | Pilot-ready | Real filings exercise document variability and cost/latency | Reference-ready plus a reviewed EDGAR pilot corpus |
 | Production-ready | Analysts can use it under governed identity and SLOs | Pilot-ready plus end-to-end OBO, dashboards, alerts, rollback, and runbook evidence |
 
 ## Reference-Ready Checklist
 
-- `databricks bundle validate --strict -t dev` passes.
-- `./scripts/bootstrap-dev.sh` succeeds in a clean dev workspace.
+- `databricks bundle validate --strict -t demo` passes.
+- `./scripts/bootstrap-demo.sh` succeeds in a clean demo workspace.
 - Synthetic PDFs in `samples/` produce at least ACME/BETA/GAMMA KPI rows.
-- Vector Search index sync completes and the endpoint answers a smoke question with citations.
-- `python evals/clears_eval.py --endpoint analyst-agent-dev --dataset evals/dataset.jsonl` passes.
-- App starts via `databricks bundle run -t dev analyst_app`.
+- Vector Search index sync completes and the Agent Bricks Supervisor endpoint answers a smoke question with citations.
+- `python evals/clears_eval.py --endpoint "$(./scripts/resolve-agent-endpoint.sh demo)" --dataset evals/dataset.jsonl` passes.
+- App starts via `databricks bundle run -t demo analyst_app`.
 
 ## Pilot-Ready Checklist
 
 - At least 5 representative public SEC 10-K filings are uploaded from EDGAR and processed.
-- Section explosion produces meaningful section labels, not only `full_document` fallbacks.
+- Section explosion produces meaningful section labels, not only `full_document` normalized rows.
 - KPI extraction is manually reviewed for revenue, EBITDA, segment revenue, and top risks.
 - Quality rubric distribution is reviewed; low-quality filings are retained in Gold but excluded from `gold_filing_sections_indexable`.
 - Latency p95 is measured for single-filing and cross-company prompts.
-- Estimated AI Functions, Vector Search, Model Serving, Lakebase, and Apps costs are documented.
+- Estimated AI Functions, Vector Search, Agent Bricks, AI Gateway, Lakebase, and Apps costs are documented.
 
 ## Production-Ready Checklist
 
 - Databricks Apps user-token passthrough is enabled in the workspace.
-- `resources/consumers/analyst.app.yml:user_api_scopes` is uncommented and survives `bundle run`.
-- Audit logs prove app requests, agent SQL, Vector Search, and downstream serving calls execute under the invoking user where required.
+- `resources/consumers/analyst.app.yml:user_api_scopes` is declared and survives `bundle run`.
+- Audit logs prove app requests, Agent Bricks, Knowledge Assistant, Vector Search, and structured KPI SQL calls execute under the invoking user where required.
 - Service principal `run_as` is configured for prod via `--var service_principal_id=<sp-app-id>`.
 - Analyst group grants include `USE_CATALOG`, `USE_SCHEMA`, `SELECT`, `EXECUTE`, `READ_VOLUME`, and `WRITE_VOLUME` as appropriate.
 - CLEARS passes against the pilot corpus and synthetic regression corpus.
-- Rollback is tested by re-pointing the UC model alias or served model version to a prior working version.
+- Rollback is tested by reverting Agent Bricks configuration and redeploying the previous known-good bundle.
 - Dashboards and monitors are deployed and reviewed by an owner.
 - Alerting exists for pipeline failures, index-refresh failures, endpoint errors, app startup failures, CLEARS failures, and Lakebase write failures.
 
@@ -45,4 +45,4 @@ This project is open-sourced as a Databricks reference implementation. Treat it
 - It is not a managed product.
 - It does not include a legal/compliance review for SEC filing usage.
 - It does not guarantee support for every 10-K layout or scanned PDF quality.
-- It does not make SP fallback acceptable for production row-level-security use cases.
+- It does not permit broad service-principal reads for production document Q&A.