Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/bench-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,8 @@ jobs:
| jq -r '.workflow_runs[].head_sha' \
)

aws s3 cp s3://vortex-benchmark-results-database/data.json.gz - --no-sign-request \
| gzip -d \
| grep $base_commit_sha \
> base.json
python3 scripts/s3-download.py s3://vortex-benchmark-results-database/data.json.gz data.json.gz --no-sign-request
gzip -d -c data.json.gz | grep $base_commit_sha > base.json

echo '# Benchmarks: ${{ matrix.benchmark.name }}' > comment.md
echo '' >> comment.md
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fuzz-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
CORPUS_KEY="${{ matrix.fuzz_target }}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${{ matrix.fuzz_target }}"

if aws s3 cp "s3://vortex-fuzz-corpus/$CORPUS_KEY" . 2>/dev/null; then
if python3 scripts/s3-download.py "s3://vortex-fuzz-corpus/$CORPUS_KEY" "$CORPUS_KEY"; then
echo "Downloaded corpus successfully"
tar -xf "$CORPUS_KEY"
else
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/minimize_fuzz_corpus_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}"
if aws s3 cp "s3://vortex-fuzz-corpus/$CORPUS_KEY" . 2>/dev/null; then
if python3 scripts/s3-download.py "s3://vortex-fuzz-corpus/$CORPUS_KEY" "$CORPUS_KEY"; then
echo "Downloaded corpus successfully"
tar -xf "$CORPUS_KEY"
else
Expand Down Expand Up @@ -108,4 +108,4 @@ jobs:
CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}"
tar -acf "$CORPUS_KEY" "$CORPUS_DIR"
aws s3api put-object --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32
python3 scripts/s3-upload.py --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32
24 changes: 2 additions & 22 deletions .github/workflows/run-fuzzer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,21 +92,8 @@ jobs:
CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst"
CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}"

# Try to get ETag for optimistic locking on upload
if aws s3api head-object --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --query ETag --output text > current_etag 2>/dev/null; then
echo "Found existing corpus at s3://vortex-fuzz-corpus/$CORPUS_KEY"
else
echo ""
echo "=========================================="
echo "WARNING: No existing corpus found for ${{ inputs.fuzz_target }}"
echo "This is expected for new fuzzers. Starting with empty corpus."
echo "=========================================="
echo ""
echo '""' > current_etag
fi

# Try to download corpus
if aws s3 cp "s3://vortex-fuzz-corpus/$CORPUS_KEY" . 2>/dev/null; then
if python3 scripts/s3-download.py "s3://vortex-fuzz-corpus/$CORPUS_KEY" "$CORPUS_KEY"; then
echo "Downloaded corpus successfully"
tar -xf "$CORPUS_KEY"
else
Expand Down Expand Up @@ -183,14 +170,7 @@ jobs:

tar -acf "$CORPUS_KEY" "$CORPUS_DIR"

ETAG=$(cat current_etag)
if [ "$ETAG" = '""' ] || [ -z "$ETAG" ]; then
# New corpus, no ETag check needed
aws s3api put-object --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32
else
# Existing corpus, use optimistic locking
aws s3api put-object --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32 --if-match "$ETAG"
fi
python3 scripts/s3-upload.py --bucket vortex-fuzz-corpus --key "$CORPUS_KEY" --body "$CORPUS_KEY" --checksum-algorithm CRC32 --optimistic-lock

- name: Fail job if fuzz run found a bug
if: steps.check.outputs.crashes_found == 'true'
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,8 @@ jobs:
| jq -r '.workflow_runs[].head_sha' \
)

aws s3 cp s3://vortex-benchmark-results-database/data.json.gz - --no-sign-request \
| gzip -d \
| grep $base_commit_sha \
> base.json
python3 scripts/s3-download.py s3://vortex-benchmark-results-database/data.json.gz data.json.gz --no-sign-request
gzip -d -c data.json.gz | grep $base_commit_sha > base.json

echo '# Benchmarks: ${{ matrix.name }}' > comment.md
echo '' >> comment.md
Expand Down
52 changes: 52 additions & 0 deletions scripts/s3-download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors

"""Download a file from S3 with exponential backoff retry."""

import argparse
import subprocess
import sys
import time


def main():
parser = argparse.ArgumentParser(description="Download a file from S3 with retry")
parser.add_argument("s3_url", help="S3 URL to download (e.g. s3://bucket/key)")
parser.add_argument("output", help="Local output file path")
parser.add_argument(
"--no-sign-request",
action="store_true",
help="Do not sign the request (for public buckets)",
)
parser.add_argument("--max-retries", type=int, default=5, help="Maximum number of retries")
args = parser.parse_args()

cmd = ["aws", "s3", "cp", args.s3_url, args.output]
if args.no_sign_request:
cmd.append("--no-sign-request")

for attempt in range(1, args.max_retries + 1):
result = subprocess.run(cmd)
if result.returncode == 0:
return

if attempt == args.max_retries:
break

delay = min(2**attempt, 30)
print(
f"S3 download failed (attempt {attempt}/{args.max_retries}), retrying in {delay}s...",
file=sys.stderr,
)
time.sleep(delay)

print(
f"S3 download failed after {args.max_retries} attempts",
file=sys.stderr,
)
sys.exit(1)


if __name__ == "__main__":
main()
111 changes: 111 additions & 0 deletions scripts/s3-upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors

"""Upload a file to S3 with exponential backoff retry and optional optimistic locking."""

import argparse
import subprocess
import sys
import time


def head_etag(bucket: str, key: str) -> str | None:
"""Fetch the current ETag for an object, or None if it doesn't exist."""
result = subprocess.run(
[
"aws",
"s3api",
"head-object",
"--bucket",
bucket,
"--key",
key,
"--query",
"ETag",
"--output",
"text",
],
capture_output=True,
text=True,
)
if result.returncode != 0:
return None
etag = result.stdout.strip()
if not etag or etag == "null":
return None
return etag


def put_object(
bucket: str,
key: str,
body: str,
checksum_algorithm: str | None,
if_match: str | None,
) -> bool:
"""Upload an object, returning True on success."""
cmd = [
"aws",
"s3api",
"put-object",
"--bucket",
bucket,
"--key",
key,
"--body",
body,
]
if checksum_algorithm:
cmd.extend(["--checksum-algorithm", checksum_algorithm])
if if_match:
cmd.extend(["--if-match", if_match])

result = subprocess.run(cmd)
return result.returncode == 0


def main():
parser = argparse.ArgumentParser(description="Upload a file to S3 with retry and optional optimistic locking")
parser.add_argument("--bucket", required=True, help="S3 bucket name")
parser.add_argument("--key", required=True, help="S3 object key")
parser.add_argument("--body", required=True, help="Local file to upload")
parser.add_argument("--checksum-algorithm", help="Checksum algorithm (e.g. CRC32)")
parser.add_argument(
"--optimistic-lock",
action="store_true",
help="Use ETag-based optimistic locking (re-fetches ETag on each retry)",
)
parser.add_argument("--max-retries", type=int, default=5, help="Maximum number of retries")
args = parser.parse_args()

for attempt in range(1, args.max_retries + 1):
if_match = None
if args.optimistic_lock:
if_match = head_etag(args.bucket, args.key)
# New object, no ETag to match — just upload without locking
# (this handles the first-ever upload case)

if put_object(args.bucket, args.key, args.body, args.checksum_algorithm, if_match):
print("Upload successful.")
return

if attempt == args.max_retries:
break

delay = min(2**attempt, 30)
print(
f"S3 upload failed (attempt {attempt}/{args.max_retries}), retrying in {delay}s...",
file=sys.stderr,
)
time.sleep(delay)

print(
f"S3 upload failed after {args.max_retries} attempts",
file=sys.stderr,
)
sys.exit(1)


if __name__ == "__main__":
main()
Loading