Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/agents_validate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Validate AGENTS.md

on:
push:
branches:
- '**'
paths:
- 'AGENTS.md'
- 'CLAUDE.md'
- 'scripts/validate_agents_md.py'
- '.github/workflows/agents_validate.yaml'
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'AGENTS.md'
- 'CLAUDE.md'
- 'scripts/validate_agents_md.py'
- '.github/workflows/agents_validate.yaml'
workflow_dispatch:

jobs:
agents_validate:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: 'recursive'
token: ${{ secrets.GITHUB_TOKEN }}

- name: Checkout submodules
run: |
git submodule init
git submodule update
- name: Read Python version
run: echo "PYTHON_VERSION=$(cat .python-version | tr -d '\n')" >> $GITHUB_ENV
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install dependencies
run: |
uv sync
- name: Validate AGENTS.md
run: |
make agents_validate
49 changes: 49 additions & 0 deletions .github/workflows/nightly_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Nightly Tests

on:
schedule:
- cron: "0 2 * * *"
workflow_dispatch:

jobs:
nightly_tests:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: 'recursive'
token: ${{ secrets.GITHUB_TOKEN }}

- name: Checkout submodules
run: |
git submodule init
git submodule update
- name: Read Python version
run: echo "PYTHON_VERSION=$(cat .python-version | tr -d '\n')" >> $GITHUB_ENV
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install dependencies
run: |
uv sync
- name: Set environment variables
run: |
echo "DEV_ENV=${{ secrets.DEV_ENV }}" >> $GITHUB_ENV
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV
echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> $GITHUB_ENV
echo "GROQ_API_KEY=${{ secrets.GROQ_API_KEY }}" >> $GITHUB_ENV
echo "PERPLEXITY_API_KEY=${{ secrets.PERPLEXITY_API_KEY }}" >> $GITHUB_ENV
echo "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" >> $GITHUB_ENV
- name: Run slow tests
run: |
make test_slow
- name: Run nondeterministic tests
run: |
make test_nondeterministic
- name: Validate AGENTS.md
run: |
make agents_validate
11 changes: 4 additions & 7 deletions .github/workflows/test_target_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
submodules: 'recursive'
token: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -40,7 +40,7 @@ jobs:
echo "Log Level: ${{ github.event.inputs.log_level }}"
echo "Environment: ${{ github.event.inputs.environment }}"
- name: Set up Python
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
Expand All @@ -56,12 +56,9 @@ jobs:
echo "GROQ_API_KEY=${{ secrets.GROQ_API_KEY }}" >> $GITHUB_ENV
echo "PERPLEXITY_API_KEY=${{ secrets.PERPLEXITY_API_KEY }}" >> $GITHUB_ENV
echo "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" >> $GITHUB_ENV
- name: Run tests
- name: Run fast tests
run: |
make test
make test_fast
- name: Run flaky test detection
run: |
make test_flaky
- name: Validate AGENTS.md
run: |
make agents_validate
4 changes: 4 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ make all # Run main.py with setup

# Testing
make test # Run pytest on tests/
make test_fast # Run fast tests (no slow/nondeterministic)
make test_flaky # Repeat fast tests to detect flakiness
make test_slow # Run slow tests only
make test_nondeterministic # Run nondeterministic tests only

# Code Quality (run after major changes)
make fmt # Run black formatter + JSON formatting
Expand Down
31 changes: 28 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,39 @@ docs: ## Run docs with bun
TEST_TARGETS = tests/

### Testing
test: check_uv ## Run pytest tests
test: check_uv ## Run all pytest tests
@echo "$(GREEN)🧪Running Target Tests...$(RESET)"
$(TEST) $(TEST_TARGETS)
@echo "$(GREEN)✅Target Tests Passed.$(RESET)"

test_flaky: check_uv ## Run tests twice to detect flaky tests
test_fast: check_uv ## Run fast tests (exclude slow/nondeterministic)
@echo "$(GREEN)🧪Running Fast Tests...$(RESET)"
$(TEST) -m "not slow and not nondeterministic" $(TEST_TARGETS)
@echo "$(GREEN)✅Fast Tests Passed.$(RESET)"

test_slow: check_uv ## Run slow tests only
@echo "$(GREEN)🧪Running Slow Tests...$(RESET)"
@$(TEST) -m "slow" $(TEST_TARGETS); \
status=$$?; \
if [ $$status -eq 5 ]; then \
echo "$(YELLOW)⚠️ No slow tests collected.$(RESET)"; \
exit 0; \
fi; \
exit $$status

test_nondeterministic: check_uv ## Run nondeterministic tests only
@echo "$(GREEN)🧪Running Nondeterministic Tests...$(RESET)"
@$(TEST) -m "nondeterministic" $(TEST_TARGETS); \
status=$$?; \
if [ $$status -eq 5 ]; then \
echo "$(YELLOW)⚠️ No nondeterministic tests collected.$(RESET)"; \
exit 0; \
fi; \
exit $$status

test_flaky: check_uv ## Repeat fast tests to detect flaky tests
@echo "$(GREEN)🧪Running Flaky Test Detection...$(RESET)"
$(TEST) --count 2 -m "not slow" $(TEST_TARGETS)
$(TEST) --count 2 -m "not slow and not nondeterministic" $(TEST_TARGETS)
@echo "$(GREEN)✅Flaky Test Detection Passed.$(RESET)"


Expand Down