Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 12 additions & 61 deletions .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,18 @@ jobs:
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
--opt remote-data-dir=${{ matrix.remote_storage }} \
${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }}

- name: Capture file sizes
if: matrix.remote_storage == null
shell: bash
run: |
uv run --no-project scripts/capture-file-sizes.py \
vortex-bench/data \
--benchmark ${{ matrix.subcommand }} \
--commit ${{ inputs.mode == 'pr' && github.event.pull_request.head.sha || github.sha }} \
-o sizes.json
cat sizes.json >> results.json

- name: Compare results
if: inputs.mode == 'pr'
shell: bash
Expand Down Expand Up @@ -435,56 +447,6 @@ jobs:
# unique benchmark configuration must have a unique comment-tag.
comment-tag: bench-pr-comment-${{ matrix.id }}

- name: Compare file sizes
if: inputs.mode == 'pr' && matrix.remote_storage == null
shell: bash
run: |
set -Eeu -o pipefail -x

# Capture HEAD file sizes (vortex formats only)
uv run --no-project scripts/capture-file-sizes.py \
vortex-bench/data \
--benchmark ${{ matrix.subcommand }} \
--commit ${{ github.event.pull_request.head.sha }} \
-o head-sizes.json

# Get base commit SHA (same as benchmark comparison)
base_commit_sha=$(\
curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
https://api.github.com/repos/vortex-data/vortex/actions/workflows/bench.yml/runs\?branch\=develop\&status\=success\&per_page\=1 \
| jq -r '.workflow_runs[].head_sha' \
)

# Download file sizes baseline (per-benchmark file)
python3 scripts/s3-download.py s3://vortex-ci-benchmark-results/file-sizes-${{ matrix.id }}.json.gz file-sizes.json.gz --no-sign-request || true

# Generate comparison report
echo '# File Sizes: ${{ matrix.name }}' > sizes-comment.md
echo '' >> sizes-comment.md

if [ -f file-sizes.json.gz ]; then
gzip -d -c file-sizes.json.gz | grep $base_commit_sha > base-sizes.json || true
if [ -s base-sizes.json ]; then
uv run --no-project scripts/compare-file-sizes.py base-sizes.json head-sizes.json \
>> sizes-comment.md
else
echo '_No baseline file sizes found for base commit._' >> sizes-comment.md
fi
else
echo '_No baseline file sizes available yet._' >> sizes-comment.md
fi

cat sizes-comment.md >> $GITHUB_STEP_SUMMARY

- name: Comment PR with file sizes
if: inputs.mode == 'pr' && matrix.remote_storage == null && github.event.pull_request.head.repo.fork == false
uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3
with:
file-path: sizes-comment.md
comment-tag: file-sizes-${{ matrix.id }}

- name: Comment PR on failure
if: failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3
Expand Down Expand Up @@ -513,17 +475,6 @@ jobs:
--benchmark-id "${{ matrix.id }}" \
--repo-url "${{ github.server_url }}/${{ github.repository }}"

- name: Upload File Sizes
if: inputs.mode == 'develop' && matrix.remote_storage == null
shell: bash
run: |
uv run --no-project scripts/capture-file-sizes.py \
vortex-bench/data \
--benchmark ${{ matrix.subcommand }} \
--commit ${{ github.sha }} \
-o sizes.json
bash scripts/cat-s3.sh vortex-ci-benchmark-results file-sizes-${{ matrix.id }}.json.gz sizes.json

- name: Alert incident.io
if: failure() && inputs.mode == 'develop'
uses: ./.github/actions/alert-incident-io
Expand Down
23 changes: 17 additions & 6 deletions scripts/capture-file-sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,28 @@ def main():

records.append(
{
"metric": "file_size",
"unit": "bytes",
"value": size_bytes,
"commit_id": args.commit,
"benchmark": args.benchmark,
"scale_factor": scale_factor,
"format": format_name,
"file": str(relative_path),
"size_bytes": size_bytes,
"file_size": {
"benchmark": args.benchmark,
"scale_factor": scale_factor,
"format": format_name,
"file": str(relative_path),
},
}
)

# Sort for deterministic output
records.sort(key=lambda r: (r["benchmark"], r["scale_factor"], r["format"], r["file"]))
records.sort(
key=lambda r: (
r["file_size"]["benchmark"],
r["file_size"]["scale_factor"],
r["file_size"]["format"],
r["file_size"]["file"],
)
)

# Write JSONL output
with open(args.output, "w") as f:
Expand Down
Loading
Loading