Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions .github/workflows/eval-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: "Eval: E2E Lifecycle"

on:
schedule:
# 6am UTC Monday
- cron: "0 6 * * 1"
workflow_dispatch:

permissions:
id-token: write
contents: read

jobs:
e2e-lifecycle:
runs-on: ubuntu-latest
env:
AZURE_ENV_NAME: eval-e2e-${{ github.run_id }}
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Add azd to PATH
run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"

- name: Azure Login (OIDC)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
Comment on lines +36 to +41
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move this from github-action to Azdo internal pipeline? - Is there a hard dependency for this to be gh-action?

It's been hard in the past to get secrets like this added to our public repo, and the strategy is to use the internal Azdo pipelines.

@danieljurek FYI

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do run some github actions on a set of 1ES runners. I'm checking with @weshaggard about how to handle permissions for GH Actions.


- name: Install Waza CLI
run: npm install -g waza

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run lifecycle evaluations
working-directory: cli/azd/test/eval
continue-on-error: true
env:
COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
run: waza run --executor copilot-sdk --filter "tasks/lifecycle/"

- name: Upload E2E results
if: always()
uses: actions/upload-artifact@v4
with:
name: e2e-results-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 30

- name: Cleanup Azure resources
if: always()
working-directory: cli/azd/test/eval
run: |
cd /tmp
azd down --purge --force --no-prompt 2>/dev/null || true
env:
AZURE_ENV_NAME: eval-e2e-${{ github.run_id }}
62 changes: 62 additions & 0 deletions .github/workflows/eval-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: "Eval: Weekly Report"

on:
schedule:
# 8am UTC Monday, after E2E completes
- cron: "0 8 * * 1"
workflow_dispatch:

permissions:
contents: read
actions: read

jobs:
generate-report:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Download recent Waza artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdir -p cli/azd/test/eval/reports/waza
RUN_ID=$(gh api repos/${{ github.repository }}/actions/workflows/eval-waza.yml/runs \
--jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
if [ -n "$RUN_ID" ]; then
gh run download "$RUN_ID" -D cli/azd/test/eval/reports/waza 2>/dev/null || echo "No waza artifacts found"
else
echo "No successful waza runs found, skipping"
fi

- name: Download recent E2E artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdir -p cli/azd/test/eval/reports/e2e
RUN_ID=$(gh api repos/${{ github.repository }}/actions/workflows/eval-e2e.yml/runs \
--jq '.workflow_runs | map(select(.conclusion == "success")) | .[0].id // empty' 2>/dev/null)
if [ -n "$RUN_ID" ]; then
gh run download "$RUN_ID" -D cli/azd/test/eval/reports/e2e 2>/dev/null || echo "No e2e artifacts found"
else
echo "No successful e2e runs found, skipping"
fi

# TODO: Implement report generation script (scripts/generate-report.ts)
# that diffs Waza result JSON files and produces regression-issues.json.
# Once implemented, add a step to create GitHub issues from regressions.

- name: Upload aggregated artifacts
uses: actions/upload-artifact@v4
with:
name: eval-weekly-report-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 90
51 changes: 51 additions & 0 deletions .github/workflows/eval-unit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: "Eval: Unit Tests"

on:
pull_request:
paths:
- "cli/azd/test/eval/**"
- "cli/azd/internal/mcp/**"
- "cli/azd/cmd/mcp.go"
- "cli/azd/cmd/root.go"

permissions:
contents: read

jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run unit tests
working-directory: cli/azd/test/eval
run: npm run test:unit -- --ci

- name: Validate Waza task YAML
working-directory: cli/azd/test/eval
run: npm run waza:validate
continue-on-error: true

- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: eval-unit-results
path: cli/azd/test/eval/reports/
retention-days: 30
53 changes: 53 additions & 0 deletions .github/workflows/eval-waza.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: "Eval: Waza Runs"

on:
schedule:
# 5am, 12pm, 8pm UTC, Tuesday through Saturday
- cron: "0 5,12,20 * * 2-6"
workflow_dispatch:

permissions:
contents: read

jobs:
waza-run:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: "cli/azd/go.mod"

- uses: actions/setup-node@v4
with:
node-version: "22"

- name: Build azd
working-directory: cli/azd
run: go build -o ./azd .

- name: Add azd to PATH
run: echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"

- name: Install Waza CLI
run: npm install -g waza

- name: Install eval dependencies
working-directory: cli/azd/test/eval
run: npm ci

- name: Run Waza evaluations
working-directory: cli/azd/test/eval
continue-on-error: true
env:
COPILOT_CLI_TOKEN: ${{ secrets.COPILOT_CLI_TOKEN }}
run: waza run --executor copilot-sdk

- name: Upload Waza results
if: always()
uses: actions/upload-artifact@v4
with:
name: waza-results-${{ github.run_id }}
path: cli/azd/test/eval/reports/
retention-days: 30
22 changes: 22 additions & 0 deletions cli/azd/.vscode/cspell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,28 @@ overrides:
- filename: extensions/azure.ai.models/internal/cmd/custom_create.go
words:
- Qwen
- filename: test/eval/README.md
words:
- Waza
- waza
- urlopen
- filename: "test/eval/graders/*.py"
words:
- Waza
- waza
- hdrs
- mysite
- mydb
- filename: "test/eval/tasks/**/*.yaml"
words:
- authenticat
- idempoten
- filename: "test/eval/tests/human/*.test.ts"
words:
- compdef
- badcfg
- provison
- notacommand
ignorePaths:
- "**/*_test.go"
- "**/mock*.go"
Expand Down
6 changes: 6 additions & 0 deletions cli/azd/test/eval/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
node_modules/
dist/
reports/*.json
reports/*.md
reports/junit.xml
!reports/.gitkeep
Loading
Loading