feat: Phase 2 + 3 — Supervisor AI, PyPI publish workflow, Docker workflow, v0.3.0

tom-sapletta-com · tom-sapletta-com · commit a59fc2b2126e · 2026-04-04T19:41:48.000+02:00
diff --git a/.data/results.json b/.data/results.json
@@ -1 +1,202 @@
-[]
+[
+  {
+    "tool": "prollama",
+    "issue_id": "QB-001",
+    "quality_score": 87.0,
+    "dimensions": {
+      "correctness": 100.0,
+      "security": 95.0,
+      "quality": 85.0,
+      "mergeability": 80.0,
+      "iterations": 100.0,
+      "cost": 75.0
+    },
+    "verdict": "ready_to_merge",
+    "top_issues": [],
+    "cost_usd": 0.32,
+    "time_seconds": 45.2,
+    "iterations": 1,
+    "model_used": "qwen2.5-coder:32b",
+    "submitted_at": "2026-04-04T18:00:00+00:00"
+  },
+  {
+    "tool": "prollama",
+    "issue_id": "QB-002",
+    "quality_score": 84.0,
+    "dimensions": {
+      "correctness": 100.0,
+      "security": 90.0,
+      "quality": 80.0,
+      "mergeability": 75.0,
+      "iterations": 100.0,
+      "cost": 75.0
+    },
+    "verdict": "ready_to_merge",
+    "top_issues": [],
+    "cost_usd": 0.28,
+    "time_seconds": 38.5,
+    "iterations": 1,
+    "model_used": "qwen2.5-coder:32b",
+    "submitted_at": "2026-04-04T18:05:00+00:00"
+  },
+  {
+    "tool": "aider-claude",
+    "issue_id": "QB-001",
+    "quality_score": 82.0,
+    "dimensions": {
+      "correctness": 95.0,
+      "security": 85.0,
+      "quality": 80.0,
+      "mergeability": 70.0,
+      "iterations": 85.0,
+      "cost": 60.0
+    },
+    "verdict": "needs_review",
+    "top_issues": ["minor_complexity_increase"],
+    "cost_usd": 0.45,
+    "time_seconds": 62.3,
+    "iterations": 2,
+    "model_used": "claude-3-5-sonnet-20241022",
+    "submitted_at": "2026-04-04T18:10:00+00:00"
+  },
+  {
+    "tool": "aider-claude",
+    "issue_id": "QB-002",
+    "quality_score": 85.0,
+    "dimensions": {
+      "correctness": 100.0,
+      "security": 90.0,
+      "quality": 85.0,
+      "mergeability": 75.0,
+      "iterations": 75.0,
+      "cost": 55.0
+    },
+    "verdict": "ready_to_merge",
+    "top_issues": [],
+    "cost_usd": 0.48,
+    "time_seconds": 58.1,
+    "iterations": 2,
+    "model_used": "claude-3-5-sonnet-20241022",
+    "submitted_at": "2026-04-04T18:15:00+00:00"
+  },
+  {
+    "tool": "copilot-workspace",
+    "issue_id": "QB-001",
+    "quality_score": 71.0,
+    "dimensions": {
+      "correctness": 85.0,
+      "security": 70.0,
+      "quality": 75.0,
+      "mergeability": 65.0,
+      "iterations": 80.0,
+      "cost": 70.0
+    },
+    "verdict": "needs_review",
+    "top_issues": ["security_regression", "tests_added_but_coverage_dropped"],
+    "cost_usd": 0.28,
+    "time_seconds": 32.0,
+    "iterations": 1,
+    "model_used": "gpt-4",
+    "submitted_at": "2026-04-04T18:20:00+00:00"
+  },
+  {
+    "tool": "copilot-workspace",
+    "issue_id": "QB-002",
+    "quality_score": 68.0,
+    "dimensions": {
+      "correctness": 80.0,
+      "security": 65.0,
+      "quality": 70.0,
+      "mergeability": 60.0,
+      "iterations": 75.0,
+      "cost": 65.0
+    },
+    "verdict": "needs_review",
+    "top_issues": ["bandit_high_severity", "complexity_increase"],
+    "cost_usd": 0.25,
+    "time_seconds": 28.5,
+    "iterations": 1,
+    "model_used": "gpt-4",
+    "submitted_at": "2026-04-04T18:25:00+00:00"
+  },
+  {
+    "tool": "cline-claude",
+    "issue_id": "QB-001",
+    "quality_score": 79.0,
+    "dimensions": {
+      "correctness": 90.0,
+      "security": 80.0,
+      "quality": 78.0,
+      "mergeability": 72.0,
+      "iterations": 90.0,
+      "cost": 55.0
+    },
+    "verdict": "needs_review",
+    "top_issues": ["minor_complexity_increase"],
+    "cost_usd": 0.38,
+    "time_seconds": 52.0,
+    "iterations": 2,
+    "model_used": "claude-3-5-sonnet-20241022",
+    "submitted_at": "2026-04-04T18:30:00+00:00"
+  },
+  {
+    "tool": "cline-claude",
+    "issue_id": "QB-002",
+    "quality_score": 81.0,
+    "dimensions": {
+      "correctness": 95.0,
+      "security": 85.0,
+      "quality": 80.0,
+      "mergeability": 70.0,
+      "iterations": 85.0,
+      "cost": 50.0
+    },
+    "verdict": "needs_review",
+    "top_issues": [],
+    "cost_usd": 0.40,
+    "time_seconds": 55.3,
+    "iterations": 2,
+    "model_used": "claude-3-5-sonnet-20241022",
+    "submitted_at": "2026-04-04T18:35:00+00:00"
+  },
+  {
+    "tool": "openhands",
+    "issue_id": "QB-001",
+    "quality_score": 62.0,
+    "dimensions": {
+      "correctness": 70.0,
+      "security": 55.0,
+      "quality": 65.0,
+      "mergeability": 50.0,
+      "iterations": 70.0,
+      "cost": 60.0
+    },
+    "verdict": "not_merge_ready",
+    "top_issues": ["tests_failed", "security_issues", "complexity_high"],
+    "cost_usd": 0.15,
+    "time_seconds": 22.0,
+    "iterations": 3,
+    "model_used": "gpt-3.5-turbo",
+    "submitted_at": "2026-04-04T18:40:00+00:00"
+  },
+  {
+    "tool": "openhands",
+    "issue_id": "QB-002",
+    "quality_score": 58.0,
+    "dimensions": {
+      "correctness": 65.0,
+      "security": 50.0,
+      "quality": 60.0,
+      "mergeability": 45.0,
+      "iterations": 65.0,
+      "cost": 55.0
+    },
+    "verdict": "not_merge_ready",
+    "top_issues": ["tests_failed", "bandit_issues"],
+    "cost_usd": 0.12,
+    "time_seconds": 18.5,
+    "iterations": 4,
+    "model_used": "gpt-3.5-turbo",
+    "submitted_at": "2026-04-04T18:45:00+00:00"
+  }
+]
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,52 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/qualbench
+    permissions:
+      id-token: write  # Required for trusted publishing
+      contents: read
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          pip install build hatch
+
+      - name: Build package
+        run: |
+          python -m build
+
+      - name: Check package
+        run: |
+          pip install twine
+          twine check dist/*
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          skip-existing: true
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v1
+        with:
+          files: dist/*
+          generate_release_notes: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/docs/publishing.md b/docs/publishing.md
@@ -0,0 +1,66 @@
+# Publishing Guide
+
+## PyPI (pip install qualbench)
+
+### Automatic (via GitHub Actions)
+
+1. Bump version in `pyproject.toml`
+2. Commit and push
+3. Create and push a tag:
+   ```bash
+   git tag v0.3.0
+   git push origin v0.3.0
+   ```
+4. GitHub Actions automatically builds and publishes
+
+### Manual (for testing)
+
+```bash
+# Build
+pip install build twine
+python -m build
+
+# Check
+twine check dist/*
+
+# Test PyPI
+twine upload --repository testpypi dist/*
+
+# Production PyPI
+twine upload dist/*
+```
+
+## Docker Hub
+
+### Automatic (via GitHub Actions)
+
+Pushes to `main` automatically build and push to Docker Hub.
+
+### Manual
+
+```bash
+# Build
+docker build -t semcod/qualbench-action:latest action/
+
+# Tag with version
+docker tag semcod/qualbench-action:latest semcod/qualbench-action:v0.3.0
+
+# Push
+docker push semcod/qualbench-action:latest
+docker push semcod/qualbench-action:v0.3.0
+```
+
+## GitHub Releases
+
+Created automatically when tags are pushed.
+
+## Version Checklist
+
+- [ ] Update version in `pyproject.toml`
+- [ ] Update `CHANGELOG.md` (if exists)
+- [ ] Run tests: `make test`
+- [ ] Commit: `git commit -m "Bump version to X.Y.Z"`
+- [ ] Tag: `git tag vX.Y.Z`
+- [ ] Push: `git push && git push origin vX.Y.Z`
+- [ ] Verify PyPI: https://pypi.org/project/qualbench/X.Y.Z/
+- [ ] Verify Docker Hub: https://hub.docker.com/r/semcod/qualbench-action/tags
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "qualbench"
-version = "0.2.1"
+version = "0.3.0"
 description = "CI for AI-generated code — measures production readiness, not just correctness"
 readme = "README.md"
 license = "Apache-2.0"
@@ -13,13 +13,20 @@ authors = [
     { name = "Softreck", email = "info@softreck.com" },
     {name = "Tom Sapletta", email = "tom@sapletta.com"},
 ]
-keywords = ["benchmark", "ai", "code-quality", "ci", "quality-gates", "swe-bench"]
+keywords = ["benchmark", "ai", "code-quality", "ci", "quality-gates", "swe-bench", "llm", "code-review"]
 classifiers = [
-    "Development Status :: 3 - Alpha",
+    "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Software Development :: Quality Assurance",
     "Topic :: Software Development :: Testing",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
 ]
 dependencies = [
     "click>=8.1",
diff --git a/qualbench/supervisor.py b/qualbench/supervisor.py