Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ jobs:
env:
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
run: databricks bundle validate --strict -t dev
DOCINTEL_WAREHOUSE_ID: ${{ vars.DOCINTEL_WAREHOUSE_ID }}
run: databricks bundle validate --strict -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID"

deploy-dev:
# CI assumes steady-state: the first-ever bring-up of a workspace must be
Expand Down Expand Up @@ -47,7 +48,11 @@ jobs:
run: pip install -r agent/requirements.txt -r evals/requirements.txt

- name: Deploy bundle (full — consumers already exist in steady-state)
run: databricks bundle deploy -t dev
# Pin warehouse_id so the dashboard + serving env match what
# wait_for_kpis / log_and_register use. Without --var, the bundle
# falls back to its `lookup: warehouse: Serverless Starter Warehouse`
# default and silently picks a different ID.
run: databricks bundle deploy -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID"

- name: Wait for Lakebase instance to be AVAILABLE
# Lakebase already exists in steady-state but a config change can
Expand Down Expand Up @@ -83,7 +88,7 @@ while True:
"dbfs:/Volumes/${DOCINTEL_CATALOG}/${DOCINTEL_SCHEMA}/raw_filings/" \
--overwrite
done
databricks bundle run -t dev doc_intel_pipeline
databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" doc_intel_pipeline
python scripts/wait_for_kpis.py --min-rows 3 --timeout 900
# --serving-endpoint repoints the existing endpoint to the new
# model version in-place (steady-state idempotent operation).
Expand All @@ -107,7 +112,7 @@ while True:
# Databricks Apps deploy docs:
# https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy
# `bundle deploy` alone uploads code but doesn't apply config/restart.
run: databricks bundle run -t dev analyst_app
run: databricks bundle run -t dev --var "warehouse_id=$DOCINTEL_WAREHOUSE_ID" analyst_app

- name: Verify OBO scopes survived deploy
# `bundle run` may wipe user_api_scopes (documented destructive-update
Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ databricks auth profiles # verify the DEFAULT profile is configured
git clone https://github.com/<your-fork>/databricks-document-intelligence-agent.git
cd databricks-document-intelligence-agent
python -m venv .venv
.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt
.venv/bin/pip install -r agent/requirements.txt -r evals/requirements.txt pytest
```

### 2. Discover your workspace IDs
Expand Down Expand Up @@ -213,8 +213,10 @@ databricks bundle run -t dev analyst_app # apply app config

# Agent code changes (agent/*.py): register a new model version
# and repoint the existing serving endpoint in-place.
DOCINTEL_CATALOG=workspace DOCINTEL_SCHEMA=docintel_10k_dev \
python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev
DOCINTEL_CATALOG=workspace \
DOCINTEL_SCHEMA=docintel_10k_dev \
DOCINTEL_WAREHOUSE_ID=<from-step-2> \
.venv/bin/python agent/log_and_register.py --target dev --serving-endpoint analyst-agent-dev

# Pipeline SQL changes that need to re-process existing filings
databricks bundle run -t dev doc_intel_pipeline
Expand Down Expand Up @@ -596,7 +598,7 @@ Override via `--var name=value` on any `bundle` command.
|---|---|---|
| `DOCINTEL_CATALOG` | yes | Bootstrap, CI, eval |
| `DOCINTEL_SCHEMA` | yes | Same |
| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap kpi-poll, eval slicer |
| `DOCINTEL_WAREHOUSE_ID` | yes | Bootstrap (passed to bundle as `--var warehouse_id`, used by kpi-poll + smoke); `agent/log_and_register.py` (auth-policy SQL warehouse resource); `agent/tools.py` UC Function tool |
| `DOCINTEL_TARGET` | no (default `dev`) | Bootstrap |
| `DOCINTEL_ANALYST_GROUP` | no (default `account users`) | UC grants in bootstrap + CI |
| `DOCINTEL_WAIT_SECONDS` | no (default 600) | Bootstrap KPI-table poll timeout |
Expand Down
28 changes: 22 additions & 6 deletions scripts/bootstrap-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ if [[ "${DOCINTEL_FORCE_LOCK:-0}" == "1" ]]; then
DEPLOY_FLAGS+=(--force-lock)
fi

# Pin the bundle's `warehouse_id` variable to the user-selected ID so the
# dashboard + serving-endpoint env match wait_for_kpis / log_and_register.
# Without this, the bundle falls back to its `lookup: warehouse: Serverless
# Starter Warehouse` default — which fails validation in workspaces lacking
# that named warehouse, and silently picks a different ID otherwise.
VAR_FLAGS=(--var "warehouse_id=$DOCINTEL_WAREHOUSE_ID")

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$REPO_ROOT"
export PYTHONPATH="$REPO_ROOT${PYTHONPATH:+:$PYTHONPATH}"
Expand Down Expand Up @@ -209,15 +216,15 @@ if [[ "$MODE" == "first" ]]; then
mv "$f" "$f.skip"
done

databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
die "stage-1 deploy failed (foundation should be self-contained — investigate)"

restore_consumers
trap - EXIT INT TERM

log "step 2/6: producing data"
upload_samples
databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \
databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \
die "pipeline run failed — inspect SDP UI before retrying"
"$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \
die "timed out waiting for $KPI_TABLE"
Expand All @@ -226,18 +233,27 @@ if [[ "$MODE" == "first" ]]; then
wait_for_lakebase_available

log "step 3/6: stage-2 deploy (full bundle — consumers join the foundation)"
databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
die "stage-2 deploy failed; check logs"

# The index_refresh job is created by stage-2 deploy and is `table_update`-
# triggered. Triggers do not fire retroactively on the rows the pipeline
# produced in stage 2, so we have to materialize the Vector Search index
# explicitly the first time. sync_index.py is create-if-missing/sync-if-
# exists, so this is idempotent on subsequent runs.
log "step 3.5/6: triggering initial Vector Search index materialization"
databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" index_refresh || \
log " warn: index_refresh failed; the table_update trigger will retry on the next pipeline run"

else
# ─── Steady-state path: single full deploy + in-place data refresh ────────
log "step 1/6: full bundle deploy (steady-state — consumers already exist)"
databricks bundle deploy -t "$TARGET" "${DEPLOY_FLAGS[@]}" || \
databricks bundle deploy -t "$TARGET" "${VAR_FLAGS[@]}" "${DEPLOY_FLAGS[@]}" || \
die "bundle deploy failed; if a prior deploy was interrupted, set DOCINTEL_FORCE_LOCK=1 and retry"

log "step 2/6: refreshing data + repointing serving endpoint"
upload_samples
databricks bundle run -t "$TARGET" "$PIPELINE_KEY" || \
databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" "$PIPELINE_KEY" || \
die "pipeline run failed — inspect SDP UI before retrying"
"$PYTHON" scripts/wait_for_kpis.py --min-rows 1 --timeout "$WAIT_SECONDS" || \
die "timed out waiting for $KPI_TABLE"
Expand All @@ -250,7 +266,7 @@ fi

# ─── Step 4: app run (both paths) ────────────────────────────────────────────
log "step 4/6: applying app config + restart"
databricks bundle run -t "$TARGET" analyst_app || \
databricks bundle run -t "$TARGET" "${VAR_FLAGS[@]}" analyst_app || \
log " warn: analyst_app run failed; retry manually with 'databricks bundle run -t $TARGET analyst_app'"

# ─── Step 5: UC grants (idempotent) ──────────────────────────────────────────
Expand Down