Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions .github/workflows/validate-task.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: validate-task

on:
pull_request:
branches: [main]
paths:
- ".github/workflows/validate-task.yml"
- "test-cases/task.schema.json"
- "test-cases/**/task.json"
- "test-cases/**/extra_info/**"
push:
branches: [main]
paths:
- ".github/workflows/validate-task.yml"
- "test-cases/task.schema.json"
- "test-cases/**/task.json"
- "test-cases/**/extra_info/**"
workflow_dispatch:

jobs:
validate-task:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"

- name: Install validator
run: python -m pip install "jsonschema==4.26.0"

- name: Collect changed task files
id: changed
shell: bash
run: |
set -euo pipefail

if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
find test-cases -path "*/task.json" -o -path "*/extra_info/*" > changed-files.txt
elif [[ "${{ github.event_name }}" == "pull_request" ]]; then
base="${{ github.event.pull_request.base.sha }}"
git diff --name-only "$base"...HEAD > changed-files.txt
elif [[ "${{ github.event_name }}" == "push" && "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" > changed-files.txt
else
find test-cases -path "*/task.json" -o -path "*/extra_info/*" > changed-files.txt
fi

echo "Changed files:"
cat changed-files.txt

- name: Validate changed tasks
run: |
python - <<'PY'
import json
import sys
from pathlib import Path

from jsonschema import Draft202012Validator

repo = Path(".")
schema_path = repo / "test-cases" / "task.schema.json"
changed_paths = [
Path(line.strip())
for line in Path("changed-files.txt").read_text().splitlines()
if line.strip()
]

schema = json.loads(schema_path.read_text())
validator = Draft202012Validator(schema)

validate_all = schema_path in changed_paths
task_files: set[Path] = set()
changed_json_files: set[Path] = set()

if validate_all:
task_files.update(repo.glob("test-cases/**/task.json"))

for path in changed_paths:
if not str(path).startswith("test-cases/"):
continue
if path.name == "task.json" and path.exists():
task_files.add(path)
changed_json_files.add(path)
continue
if "extra_info" in path.parts:
extra_index = path.parts.index("extra_info")
task_dir = Path(*path.parts[:extra_index])
task_file = task_dir / "task.json"
if task_file.exists():
task_files.add(task_file)
if path.suffix == ".json" and path.exists():
changed_json_files.add(path)

errors: list[str] = []

for json_file in sorted(changed_json_files):
try:
json.loads(json_file.read_text())
except Exception as exc:
errors.append(f"{json_file}: invalid JSON: {exc}")

for task_file in sorted(task_files):
try:
task = json.loads(task_file.read_text())
except Exception as exc:
errors.append(f"{task_file}: invalid JSON: {exc}")
continue

for error in sorted(validator.iter_errors(task), key=lambda item: list(item.path)):
location = "/" + "/".join(str(part) for part in error.path)
errors.append(f"{task_file}{location}: {error.message}")

extra_info = task.get("extra_info") or []
if not isinstance(extra_info, list):
continue
for index, item in enumerate(extra_info):
if not isinstance(item, dict) or not item.get("path"):
continue
extra_path = task_file.parent / item["path"]
if not extra_path.exists():
errors.append(
f"{task_file}: extra_info[{index}].path does not exist: {item['path']}"
)

if errors:
print("Task validation failed:")
for error in errors:
print(f"- {error}")
sys.exit(1)

print(f"Validated {len(task_files)} task file(s) and {len(changed_json_files)} changed JSON file(s).")
PY
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"job_url": "https://boards.greenhouse.io/example/jobs/1234567",
"job_url": "https://job-boards.greenhouse.io/codepath/jobs/4526154007",
"job_title": "Senior Software Engineer",
"company": "Example Corp"
}
"company": "CodePath"
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
"task_id": 86,
"metaclass": "job-search-hr",
"class": "cv-autofill",
"description": "Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
"description": "Extract information from resume.pdf and fill out the CodePath Senior Software Engineer application on Greenhouse",
"sites_involved": [
"greenhouse.com"
],
"platform": "greenhouse-meta",
"platform": "greenhouse-codepath",
"common_info": {
"email_credentials": "credentials to use the assigned disposable email account",
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
"user_resume": "PDF resume with disposable email account injected"
}
},
"instruction": "Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
"instruction": "Extract information from resume.pdf and fill out the CodePath Senior Software Engineer application on Greenhouse",
"eval_schema": {
"url_pattern": "boards-api\\.greenhouse\\.io/v1/boards/.+/jobs/\\d+|job-boards\\.greenhouse\\.io/.+/jobs/\\d+",
"method": "POST"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"job_url": "https://boards.greenhouse.io/example/jobs/1234567",
"job_title": "Senior Software Engineer",
"company": "Example Corp"
}
"job_url": "https://simplify.jobs/p/d4eaf15e-63f7-4024-931c-f8cb52184dc4/Applied-Scientist",
"job_title": "Applied Scientist",
"company": "Amazon"
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"review_text": "Excellent service and product quality. The user experience is intuitive and well-designed. Would recommend to colleagues.",
"rating": 5,
"rating": 4,
"pros": "Easy to use, great customer support, reliable service",
"cons": "Pricing could be more competitive for small teams"
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"project_type": "bathroom renovation",
"description": "Full bathroom renovation for a standard-size condo bathroom (approx. 5x8 ft). Looking to replace the bathtub with a walk-in shower, update the vanity and mirror, install new tile flooring, and add a heated towel rack. Prefer modern minimalist style with neutral tones.",
"budget_range": "$8,000 - $15,000 CAD",
"timeline": "4-6 weeks",
"location": "Toronto, ON (condo building)",
"special_requirements": "Must comply with condo renovation rules. Need quiet hours (no work before 9am or after 5pm)."
}
"project_type": "research portfolio",
"description": "Portfolio for tracking three 2026 research sub-projects: Scaling Laws Study, Agent Evaluation Benchmark, and Safety Red-Teaming.",
"budget_range": "N/A",
"timeline": "2026",
"location": "Online",
"special_requirements": "Create the portfolio named Research Projects 2026 and include the three sub-projects."
}
2 changes: 1 addition & 1 deletion test-cases/v1/501-creation-init-general-asana/task.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"extra_info": [
{
"path": "extra_info/project_description.json",
"description": "Project description with scope, budget, and requirements"
"description": "Portfolio and sub-project details"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,26 +1,9 @@
{
"listing_title": "Bright 1BR in Toronto Annex, Furnished, July-August Sublet",
"description": "Spacious furnished one-bedroom apartment in the heart of Toronto's Annex neighborhood. Walking distance to U of T campus, TTC subway, and Bloor Street shops and restaurants. The unit features hardwood floors, in-suite laundry, a fully equipped kitchen, and a sunny south-facing balcony. Perfect for visiting scholars or summer interns.",
"price": 2400,
"currency": "CAD",
"period": "monthly",
"available_from": "2026-07-01",
"available_to": "2026-08-31",
"furnished": true,
"bedrooms": 1,
"bathrooms": 1,
"amenities": [
"In-suite laundry",
"Dishwasher",
"Balcony",
"Air conditioning",
"WiFi included"
],
"photos": [
"living_room.jpg",
"bedroom.jpg",
"kitchen.jpg",
"balcony.jpg"
],
"listing_title": "Software Engineer",
"company": "Pinecrest Tech",
"location": "Toronto, ON",
"salary_range": "$115,000 - $145,000 CAD",
"description": "Pinecrest Tech is hiring a Software Engineer to build reliable web services and internal tools for AI product workflows.",
"requirements": "3+ years of experience with Python or Go, TypeScript, cloud infrastructure, and relational databases.",
"contact_email": "alex.green.uoft@clawbench.cc"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"extra_info": [
{
"path": "extra_info/listing_details.json",
"description": "Sublet listing details including description, amenities, and photos"
"description": "Software Engineer job listing details"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,26 +1,9 @@
{
"listing_title": "Bright 1BR in Toronto Annex, Furnished, July-August Sublet",
"description": "Spacious furnished one-bedroom apartment in the heart of Toronto's Annex neighborhood. Walking distance to U of T campus, TTC subway, and Bloor Street shops and restaurants. The unit features hardwood floors, in-suite laundry, a fully equipped kitchen, and a sunny south-facing balcony. Perfect for visiting scholars or summer interns.",
"price": 2400,
"currency": "CAD",
"period": "monthly",
"available_from": "2026-07-01",
"available_to": "2026-08-31",
"furnished": true,
"bedrooms": 1,
"bathrooms": 1,
"amenities": [
"In-suite laundry",
"Dishwasher",
"Balcony",
"Air conditioning",
"WiFi included"
],
"photos": [
"living_room.jpg",
"bedroom.jpg",
"kitchen.jpg",
"balcony.jpg"
],
"listing_title": "Software Engineer",
"company": "Pinecrest Tech",
"location": "Toronto, ON",
"salary_range": "$115,000 - $145,000 CAD",
"description": "Pinecrest Tech is hiring a Software Engineer to build reliable web services and internal tools for AI product workflows.",
"requirements": "3+ years of experience with Python or Go, TypeScript, cloud infrastructure, and relational databases.",
"contact_email": "alex.green.uoft@clawbench.cc"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"extra_info": [
{
"path": "extra_info/listing_details.json",
"description": "Sublet listing details including description, amenities, and photos"
"description": "Job position details to publish"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"job_url": "https://boards.greenhouse.io/example/jobs/1234567",
"job_url": "https://job-boards.greenhouse.io/codepath/jobs/4526154007",
"job_title": "Senior Software Engineer",
"company": "Example Corp"
}
"company": "CodePath"
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
"task_id": 86,
"metaclass": "job-search-hr",
"class": "cv-autofill",
"description": "Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
"description": "Extract information from resume.pdf and fill out the CodePath Senior Software Engineer application on Greenhouse",
"sites_involved": [
"greenhouse.com"
],
"platform": "greenhouse-meta",
"platform": "greenhouse-codepath",
"common_info": {
"email_credentials": "credentials to use the assigned disposable email account",
"user_info": "alex_green_personal_info.json; the dummy user's personal information",
"user_resume": "PDF resume with disposable email account injected"
}
},
"instruction": "Register on GreeHouse, Extract information from resume.pdf and fill out the Meta Senior Software Engineer application on Greenhouse",
"instruction": "Extract information from resume.pdf and fill out the CodePath Senior Software Engineer application on Greenhouse",
"eval_schema": {
"url_pattern": "boards-api\\.greenhouse\\.io/v1/boards/.+/jobs/\\d+|job-boards\\.greenhouse\\.io/.+/jobs/\\d+",
"method": "POST"
Expand All @@ -27,4 +27,4 @@
"description": "Job posting URL(s) to apply to"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"job_url": "https://boards.greenhouse.io/example/jobs/1234567",
"job_title": "Senior Software Engineer",
"company": "Example Corp"
}
"job_url": "https://simplify.jobs/p/d4eaf15e-63f7-4024-931c-f8cb52184dc4/Applied-Scientist",
"job_title": "Applied Scientist",
"company": "Amazon"
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"review_text": "Excellent service and product quality. The user experience is intuitive and well-designed. Would recommend to colleagues.",
"rating": 5,
"rating": 4,
"pros": "Easy to use, great customer support, reliable service",
"cons": "Pricing could be more competitive for small teams"
}