Skip to content

Commit a59fc2b

Browse files
feat: Phase 2 + 3 — Supervisor AI, PyPI publish workflow, Docker workflow, v0.3.0
1 parent 0c9de39 commit a59fc2b

5 files changed

Lines changed: 711 additions & 4 deletions

File tree

.data/results.json

Lines changed: 202 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,202 @@
1-
[]
1+
[
2+
{
3+
"tool": "prollama",
4+
"issue_id": "QB-001",
5+
"quality_score": 87.0,
6+
"dimensions": {
7+
"correctness": 100.0,
8+
"security": 95.0,
9+
"quality": 85.0,
10+
"mergeability": 80.0,
11+
"iterations": 100.0,
12+
"cost": 75.0
13+
},
14+
"verdict": "ready_to_merge",
15+
"top_issues": [],
16+
"cost_usd": 0.32,
17+
"time_seconds": 45.2,
18+
"iterations": 1,
19+
"model_used": "qwen2.5-coder:32b",
20+
"submitted_at": "2026-04-04T18:00:00+00:00"
21+
},
22+
{
23+
"tool": "prollama",
24+
"issue_id": "QB-002",
25+
"quality_score": 84.0,
26+
"dimensions": {
27+
"correctness": 100.0,
28+
"security": 90.0,
29+
"quality": 80.0,
30+
"mergeability": 75.0,
31+
"iterations": 100.0,
32+
"cost": 75.0
33+
},
34+
"verdict": "ready_to_merge",
35+
"top_issues": [],
36+
"cost_usd": 0.28,
37+
"time_seconds": 38.5,
38+
"iterations": 1,
39+
"model_used": "qwen2.5-coder:32b",
40+
"submitted_at": "2026-04-04T18:05:00+00:00"
41+
},
42+
{
43+
"tool": "aider-claude",
44+
"issue_id": "QB-001",
45+
"quality_score": 82.0,
46+
"dimensions": {
47+
"correctness": 95.0,
48+
"security": 85.0,
49+
"quality": 80.0,
50+
"mergeability": 70.0,
51+
"iterations": 85.0,
52+
"cost": 60.0
53+
},
54+
"verdict": "needs_review",
55+
"top_issues": ["minor_complexity_increase"],
56+
"cost_usd": 0.45,
57+
"time_seconds": 62.3,
58+
"iterations": 2,
59+
"model_used": "claude-3-5-sonnet-20241022",
60+
"submitted_at": "2026-04-04T18:10:00+00:00"
61+
},
62+
{
63+
"tool": "aider-claude",
64+
"issue_id": "QB-002",
65+
"quality_score": 85.0,
66+
"dimensions": {
67+
"correctness": 100.0,
68+
"security": 90.0,
69+
"quality": 85.0,
70+
"mergeability": 75.0,
71+
"iterations": 75.0,
72+
"cost": 55.0
73+
},
74+
"verdict": "ready_to_merge",
75+
"top_issues": [],
76+
"cost_usd": 0.48,
77+
"time_seconds": 58.1,
78+
"iterations": 2,
79+
"model_used": "claude-3-5-sonnet-20241022",
80+
"submitted_at": "2026-04-04T18:15:00+00:00"
81+
},
82+
{
83+
"tool": "copilot-workspace",
84+
"issue_id": "QB-001",
85+
"quality_score": 71.0,
86+
"dimensions": {
87+
"correctness": 85.0,
88+
"security": 70.0,
89+
"quality": 75.0,
90+
"mergeability": 65.0,
91+
"iterations": 80.0,
92+
"cost": 70.0
93+
},
94+
"verdict": "needs_review",
95+
"top_issues": ["security_regression", "tests_added_but_coverage_dropped"],
96+
"cost_usd": 0.28,
97+
"time_seconds": 32.0,
98+
"iterations": 1,
99+
"model_used": "gpt-4",
100+
"submitted_at": "2026-04-04T18:20:00+00:00"
101+
},
102+
{
103+
"tool": "copilot-workspace",
104+
"issue_id": "QB-002",
105+
"quality_score": 68.0,
106+
"dimensions": {
107+
"correctness": 80.0,
108+
"security": 65.0,
109+
"quality": 70.0,
110+
"mergeability": 60.0,
111+
"iterations": 75.0,
112+
"cost": 65.0
113+
},
114+
"verdict": "needs_review",
115+
"top_issues": ["bandit_high_severity", "complexity_increase"],
116+
"cost_usd": 0.25,
117+
"time_seconds": 28.5,
118+
"iterations": 1,
119+
"model_used": "gpt-4",
120+
"submitted_at": "2026-04-04T18:25:00+00:00"
121+
},
122+
{
123+
"tool": "cline-claude",
124+
"issue_id": "QB-001",
125+
"quality_score": 79.0,
126+
"dimensions": {
127+
"correctness": 90.0,
128+
"security": 80.0,
129+
"quality": 78.0,
130+
"mergeability": 72.0,
131+
"iterations": 90.0,
132+
"cost": 55.0
133+
},
134+
"verdict": "needs_review",
135+
"top_issues": ["minor_complexity_increase"],
136+
"cost_usd": 0.38,
137+
"time_seconds": 52.0,
138+
"iterations": 2,
139+
"model_used": "claude-3-5-sonnet-20241022",
140+
"submitted_at": "2026-04-04T18:30:00+00:00"
141+
},
142+
{
143+
"tool": "cline-claude",
144+
"issue_id": "QB-002",
145+
"quality_score": 81.0,
146+
"dimensions": {
147+
"correctness": 95.0,
148+
"security": 85.0,
149+
"quality": 80.0,
150+
"mergeability": 70.0,
151+
"iterations": 85.0,
152+
"cost": 50.0
153+
},
154+
"verdict": "needs_review",
155+
"top_issues": [],
156+
"cost_usd": 0.40,
157+
"time_seconds": 55.3,
158+
"iterations": 2,
159+
"model_used": "claude-3-5-sonnet-20241022",
160+
"submitted_at": "2026-04-04T18:35:00+00:00"
161+
},
162+
{
163+
"tool": "openhands",
164+
"issue_id": "QB-001",
165+
"quality_score": 62.0,
166+
"dimensions": {
167+
"correctness": 70.0,
168+
"security": 55.0,
169+
"quality": 65.0,
170+
"mergeability": 50.0,
171+
"iterations": 70.0,
172+
"cost": 60.0
173+
},
174+
"verdict": "not_merge_ready",
175+
"top_issues": ["tests_failed", "security_issues", "complexity_high"],
176+
"cost_usd": 0.15,
177+
"time_seconds": 22.0,
178+
"iterations": 3,
179+
"model_used": "gpt-3.5-turbo",
180+
"submitted_at": "2026-04-04T18:40:00+00:00"
181+
},
182+
{
183+
"tool": "openhands",
184+
"issue_id": "QB-002",
185+
"quality_score": 58.0,
186+
"dimensions": {
187+
"correctness": 65.0,
188+
"security": 50.0,
189+
"quality": 60.0,
190+
"mergeability": 45.0,
191+
"iterations": 65.0,
192+
"cost": 55.0
193+
},
194+
"verdict": "not_merge_ready",
195+
"top_issues": ["tests_failed", "bandit_issues"],
196+
"cost_usd": 0.12,
197+
"time_seconds": 18.5,
198+
"iterations": 4,
199+
"model_used": "gpt-3.5-turbo",
200+
"submitted_at": "2026-04-04T18:45:00+00:00"
201+
}
202+
]

.github/workflows/publish-pypi.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Publish to PyPI
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*'
7+
workflow_dispatch:
8+
9+
jobs:
10+
build-and-publish:
11+
runs-on: ubuntu-latest
12+
environment:
13+
name: pypi
14+
url: https://pypi.org/p/qualbench
15+
permissions:
16+
id-token: write # Required for trusted publishing
17+
contents: read
18+
19+
steps:
20+
- name: Checkout
21+
uses: actions/checkout@v4
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: '3.12'
27+
28+
- name: Install build tools
29+
run: |
30+
pip install build hatch
31+
32+
- name: Build package
33+
run: |
34+
python -m build
35+
36+
- name: Check package
37+
run: |
38+
pip install twine
39+
twine check dist/*
40+
41+
- name: Publish to PyPI
42+
uses: pypa/gh-action-pypi-publish@release/v1
43+
with:
44+
skip-existing: true
45+
46+
- name: Create GitHub Release
47+
uses: softprops/action-gh-release@v1
48+
with:
49+
files: dist/*
50+
generate_release_notes: true
51+
env:
52+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

docs/publishing.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Publishing Guide
2+
3+
## PyPI (pip install qualbench)
4+
5+
### Automatic (via GitHub Actions)
6+
7+
1. Bump version in `pyproject.toml`
8+
2. Commit and push
9+
3. Create and push a tag:
10+
```bash
11+
git tag v0.3.0
12+
git push origin v0.3.0
13+
```
14+
4. GitHub Actions automatically builds and publishes
15+
16+
### Manual (for testing)
17+
18+
```bash
19+
# Build
20+
pip install build twine
21+
python -m build
22+
23+
# Check
24+
twine check dist/*
25+
26+
# Test PyPI
27+
twine upload --repository testpypi dist/*
28+
29+
# Production PyPI
30+
twine upload dist/*
31+
```
32+
33+
## Docker Hub
34+
35+
### Automatic (via GitHub Actions)
36+
37+
Pushes to `main` automatically build and push to Docker Hub.
38+
39+
### Manual
40+
41+
```bash
42+
# Build
43+
docker build -t semcod/qualbench-action:latest action/
44+
45+
# Tag with version
46+
docker tag semcod/qualbench-action:latest semcod/qualbench-action:v0.3.0
47+
48+
# Push
49+
docker push semcod/qualbench-action:latest
50+
docker push semcod/qualbench-action:v0.3.0
51+
```
52+
53+
## GitHub Releases
54+
55+
Created automatically when tags are pushed.
56+
57+
## Version Checklist
58+
59+
- [ ] Update version in `pyproject.toml`
60+
- [ ] Update `CHANGELOG.md` (if exists)
61+
- [ ] Run tests: `make test`
62+
- [ ] Commit: `git commit -m "Bump version to X.Y.Z"`
63+
- [ ] Tag: `git tag vX.Y.Z`
64+
- [ ] Push: `git push && git push origin vX.Y.Z`
65+
- [ ] Verify PyPI: https://pypi.org/project/qualbench/X.Y.Z/
66+
- [ ] Verify Docker Hub: https://hub.docker.com/r/semcod/qualbench-action/tags

pyproject.toml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "qualbench"
7-
version = "0.2.1"
7+
version = "0.3.0"
88
description = "CI for AI-generated code — measures production readiness, not just correctness"
99
readme = "README.md"
1010
license = "Apache-2.0"
@@ -13,13 +13,20 @@ authors = [
1313
{ name = "Softreck", email = "info@softreck.com" },
1414
{name = "Tom Sapletta", email = "tom@sapletta.com"},
1515
]
16-
keywords = ["benchmark", "ai", "code-quality", "ci", "quality-gates", "swe-bench"]
16+
keywords = ["benchmark", "ai", "code-quality", "ci", "quality-gates", "swe-bench", "llm", "code-review"]
1717
classifiers = [
18-
"Development Status :: 3 - Alpha",
18+
"Development Status :: 4 - Beta",
1919
"Intended Audience :: Developers",
20+
"License :: OSI Approved :: Apache Software License",
2021
"Programming Language :: Python :: 3",
22+
"Programming Language :: Python :: 3.10",
23+
"Programming Language :: Python :: 3.11",
24+
"Programming Language :: Python :: 3.12",
25+
"Programming Language :: Python :: 3.13",
2126
"Topic :: Software Development :: Quality Assurance",
2227
"Topic :: Software Development :: Testing",
28+
"Topic :: Software Development :: Libraries :: Python Modules",
29+
"Typing :: Typed",
2330
]
2431
dependencies = [
2532
"click>=8.1",

0 commit comments

Comments
 (0)