Get update from RAG-201 into test-RAG-162 #55

Workflow file for this run

.github/workflows/deepeval-tests.yml at 7184a3e

	name: DeepEval RAG System Tests

	on:
	pull_request:
	types: [opened, synchronize, reopened]
	paths:
	- 'src/**'
	- 'tests/**'
	- '.github/workflows/deepeval-tests.yml'

	jobs:
	deepeval-tests:
	runs-on: ubuntu-latest
	timeout-minutes: 40

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version-file: '.python-version'

	- name: Set up uv
	uses: astral-sh/setup-uv@v6

	- name: Install dependencies (locked)
	run: uv sync --frozen

	- name: Run DeepEval tests
	id: run_tests
	env:
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	run: uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short

	- name: Generate evaluation report
	if: always()
	run: python tests/deepeval_tests/report_generator.py

	- name: Comment PR with test results
	if: always() && github.event_name == 'pull_request'
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');

	try {
	const reportContent = fs.readFileSync('test_report.md', 'utf8');

	const comments = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number
	});

	const existingComment = comments.data.find(
	comment => comment.user.login === 'github-actions[bot]' &&
	comment.body.includes('RAG System Evaluation Report')
	);

	if (existingComment) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: existingComment.id,
	body: reportContent
	});
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body: reportContent
	});
	}

	} catch (error) {
	console.error('Failed to post test results:', error);

	await github.rest.issues.createComment({
	issue_number: context.issue.number,
	owner: context.repo.owner,
	repo: context.repo.repo,
	body: `## RAG System Evaluation Report\n\nError generating test report\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
	});
	}

	- name: Check test results and fail if needed
	if: always()
	run: \|
	# Check if pytest ran (look at step output)
	if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
	echo "Tests ran but failed - this is expected if RAG performance is below threshold"
	fi
	if [ -f "pytest_captured_results.json" ]; then
	total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
	passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)

	if [ "$total_tests" -eq 0 ]; then
	echo "ERROR: No tests were executed"
	exit 1
	fi

	pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")

	echo "DeepEval Test Results:"
	echo "Total Tests: $total_tests"
	echo "Passed Tests: $passed_tests"
	echo "Pass Rate: $pass_rate%"

	if (( $(echo "$pass_rate < 70" \| bc -l) )); then
	echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%"
	echo "RAG system performance is below acceptable standards."
	exit 1
	else
	echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
	fi
	else
	echo "ERROR: No test results file found"
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Get update from RAG-201 into test-RAG-162 #55

Workflow file

Get update from RAG-201 into test-RAG-162 #55

Uh oh!

Workflow file for this run