paiml · noahgift · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/qwen-story-daily.yml b/.github/workflows/qwen-story-daily.yml
@@ -0,0 +1,143 @@
+# Nightly Qwen-story regression cron + pmat bug-hunt manifest.
+#
+# Contract: contracts/qwen-story-v1.yaml § FALSIFY-QWEN-STORY-006.
+# Runs `scripts/qwen-story.sh` against the canonical Qwen model registry,
+# emits a pmat manifest of high-risk untested code in each beat's command
+# modules, and opens (or comments on) a tracking issue when the manifest
+# grows beyond a threshold.
+#
+# Self-hosted runners only — the story exercises ~30 GB of model files that
+# don't fit on GitHub-hosted runners. Per memory rule
+# `feedback_self_hosted_only`.
+
+name: qwen-story-daily
+
+on:
+  schedule:
+    # 04:17 UTC daily — off-peak for the self-hosted fleet.
+    - cron: '17 4 * * *'
+  workflow_dispatch:
+    inputs:
+      pmat_hunt:
+        description: 'Run pmat bug-hunt manifest (1 = yes, 0 = skip for speed)'
+        required: false
+        default: '1'
+      file_issue:
+        description: 'File a tracking issue if manifest grows'
+        required: false
+        default: 'true'
+
+permissions:
+  contents: read
+  issues: write   # to open/comment on the tracking issue
+
+concurrency:
+  group: qwen-story-daily
+  cancel-in-progress: false
+
+jobs:
+  story:
+    name: Qwen story (apr-cli E2E)
+    runs-on: [self-hosted, gpu]
+    timeout-minutes: 30
+    env:
+      MODELS_DIR: /home/runner/models
+      PMAT_HUNT: ${{ github.event.inputs.pmat_hunt || '1' }}
+      RUST_BACKTRACE: '1'
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Install apr from HEAD
+        run: |
+          cargo install --path crates/apr-cli --force --features cuda 2>&1 | tail -5
+          apr --version
+
+      - name: Run scripts/qwen-story.sh
+        id: story
+        continue-on-error: true
+        run: |
+          set +e
+          bash scripts/qwen-story.sh 2>&1 | tee /tmp/story.log
+          echo "exit_code=$?" >> "$GITHUB_OUTPUT"
+
+      - name: Extract pmat bug-hunt manifest
+        id: manifest
+        run: |
+          # Pull the pmat hunt lines out of the log into a structured artifact.
+          mkdir -p /tmp/qwen-story-artifacts
+          grep -E '^(    gap|    churn|    fault)' /tmp/story.log \
+            > /tmp/qwen-story-artifacts/pmat-manifest.txt || true
+          LINES=$(wc -l < /tmp/qwen-story-artifacts/pmat-manifest.txt)
+          echo "manifest_lines=$LINES" >> "$GITHUB_OUTPUT"
+          # Compare against the last successful run's manifest (artifact from prior workflow).
+          # If the manifest grew by >5 lines, flag it.
+          PREV_LINES=$(gh run list --workflow=qwen-story-daily.yml --status=success --limit=1 \
+            --json databaseId --jq '.[0].databaseId' 2>/dev/null \
+            | xargs -I {} gh run download {} -n pmat-manifest -D /tmp/prev 2>/dev/null \
+            && wc -l < /tmp/prev/pmat-manifest.txt 2>/dev/null || echo 0)
+          GROWTH=$((LINES - PREV_LINES))
+          echo "growth=$GROWTH" >> "$GITHUB_OUTPUT"
+          echo "Previous manifest: $PREV_LINES lines, current: $LINES lines, growth: $GROWTH"
+
+      - name: Upload pmat manifest artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pmat-manifest
+          path: /tmp/qwen-story-artifacts/pmat-manifest.txt
+          retention-days: 90
+
+      - name: Upload story log artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: story-log
+          path: /tmp/story.log
+          retention-days: 30
+
+      - name: File / update tracking issue on failure
+        if: |
+          (steps.story.outputs.exit_code != '0' ||
+           fromJson(steps.manifest.outputs.growth || '0') > 5) &&
+          (github.event.inputs.file_issue != 'false')
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TITLE="qwen-story-daily: story exit=${{ steps.story.outputs.exit_code }} manifest_growth=${{ steps.manifest.outputs.growth }}"
+          # Look for an existing open issue with the qwen-story-daily label.
+          EXISTING=$(gh issue list --label qwen-story-daily --state open --limit 1 \
+            --json number --jq '.[0].number // empty')
+          {
+            echo "## Qwen story daily run — $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+            echo ""
+            echo "- Story exit code: \`${{ steps.story.outputs.exit_code }}\`"
+            echo "- Pmat manifest lines: \`${{ steps.manifest.outputs.manifest_lines }}\` (growth: ${{ steps.manifest.outputs.growth }})"
+            echo "- Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+            echo ""
+            echo "### Story log (tail)"
+            echo '```'
+            tail -50 /tmp/story.log
+            echo '```'
+            echo ""
+            echo "### Manifest (top 20 lines)"
+            echo '```'
+            head -20 /tmp/qwen-story-artifacts/pmat-manifest.txt 2>/dev/null || echo "(empty)"
+            echo '```'
+          } > /tmp/issue-body.md
+          if [ -n "$EXISTING" ]; then
+            echo "Commenting on existing issue #$EXISTING"
+            gh issue comment "$EXISTING" --body-file /tmp/issue-body.md
+          else
+            echo "Filing new tracking issue"
+            gh issue create --title "$TITLE" \
+              --label qwen-story-daily,regression \
+              --body-file /tmp/issue-body.md
+          fi
+
+      - name: Fail the job if the story failed
+        if: steps.story.outputs.exit_code != '0'
+        run: |
+          echo "::error::Qwen story exited with code ${{ steps.story.outputs.exit_code }}. See artifact 'story-log' for details."
+          exit 1
diff --git a/README.md b/README.md
@@ -93,38 +93,110 @@ cargo install aprender    # installs the `apr` binary
 apr --version
 ```
 
-## CLI examples
+## A Qwen story
+
+Eight beats, one narrative, every core command group. Anchored on the Qwen
+series so the story scales from a 494-MB safetensors model to a 30 B-parameter
+MoE GGUF. Every beat is a falsifier in
+[`contracts/qwen-story-v1.yaml`](contracts/qwen-story-v1.yaml); the runnable
+form is [`scripts/qwen-story.sh`](scripts/qwen-story.sh); nightly cron is
+[`.github/workflows/qwen-story-daily.yml`](.github/workflows/qwen-story-daily.yml);
+the dogfood gate is `/dogfood` Gate 18.
+
+```bash
+# Reproduce locally (uses ~/models cache; ~3-5 min on RTX 4090):
+bash scripts/qwen-story.sh
+```
+
+### Beat 1 — Discover (Registry)
+
+```bash
+apr pull hf://Qwen/Qwen2.5-Coder-0.5B-Instruct      # 494 MB safetensors
+apr list                                            # confirm cached
+```
+
+### Beat 2 — Trust (QA gates)
+
+```bash
+apr qa qwen2.5-coder-1.5b-instruct-q4k              # 12 falsifiable gates
+apr validate qwen2.5-coder-1.5b-instruct-q4k --quality   # 100-pt structural audit
+apr lint qwen2.5-coder-1.5b-instruct-q4k            # best-practice signals
+```
+
+### Beat 3 — Explore (Inspection)
+
+```bash
+apr inspect --json qwen2.5-coder-1.5b-instruct-q4k  # arch, params, tensors
+apr tensors --json qwen2.5-coder-1.5b-instruct-q4k  # 339 tensors with shapes
+apr tree qwen2.5-coder-1.5b-instruct-q4k            # layer architecture
+```
+
+### Beat 4 — Adapt (Model ops)
 
 ```bash
-# Run inference (local or HF)
-apr run hf://Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF "Explain quicksort"
-apr chat hf://meta-llama/Llama-3-8B-Instruct-GGUF
+apr export qwen2.5-coder-1.5b-instruct-q4k --format gguf -o roundtrip.gguf
+apr diff qwen2.5-coder-1.5b-instruct-q4k roundtrip.gguf  # tensor-by-tensor delta
+apr convert model.safetensors --quantize q4_k -o quantized.apr
+```
 
-# Serve
-apr serve model.gguf --port 8080
+### Beat 5 — Use (Inference)
 
-# Inspect
-apr inspect model.gguf
-apr validate model.apr --quality --strict
-apr tensors model.gguf | head -20
+```bash
+apr run qwen2.5-coder-1.5b-instruct-q4k "fn sum(a: i32, b: i32) -> i32 {" --max-tokens 16
+apr chat qwen2.5-coder-1.5b-instruct-q4k            # interactive REPL
+apr code -p "review this Python function" --max-turns 1   # agent mode (PMAT-182)
+```
 
-# Fine-tune with LoRA
-apr finetune model.gguf --adapter lora --rank 64 --data train.jsonl
+### Beat 6 — Serve (REST API)
 
-# Convert formats
-apr convert model.safetensors --quantize q4_k -o model.gguf
-apr export model.apr --format gguf -o model.gguf
+```bash
+apr serve run qwen2.5-coder-1.5b-instruct-q4k --port 8080
+curl -s localhost:8080/v1/chat/completions \
+  -H 'Content-Type: application/json' \
+  -d '{"model":"qwen","messages":[{"role":"user","content":"What is 2+2?"}],"max_tokens":8}'
+# → {"choices":[{"message":{"content":"2 + 2 equals 4."}}],...}
+```
+
+### Beat 7 — Operate (Profiling)
+
+```bash
+apr profile qwen2.5-coder-7b-instruct-q4_k_m        # Roofline analysis
+apr gpu --json                                      # VRAM, sm_*, cuda version
+apr serve plan qwen2.5-coder-7b-instruct-q4_k_m     # capacity plan before run
+```
 
-# Profile
-apr profile model.gguf --roofline
-apr bench model.gguf --assert-tps 100
+### Beat 8 — Scale (MoE introspection)
 
-# Publish to HuggingFace Hub (see SPEC-HF-PUBLISH-001 for the full 12-file pipeline)
+```bash
+apr inspect --json Qwen3-Coder-30B-A3B-Instruct     # arch=qwen3moe, 30 B params
+apr tensors --json Qwen3-Coder-30B-A3B-Instruct     # 579 tensors (MoE expert layout)
+```
+
+### Publish (separate flow)
+
+```bash
+# Publish a derived model to HuggingFace Hub (see SPEC-HF-PUBLISH-001 for the 12-file pipeline)
 apr stamp ckpt.apr --tokenizer /path/to/qwen-tokenizer --license Apache-2.0 -o staging/model.apr
 apr export staging/model.apr --format gguf --quantize int4 -o staging/model-q4k.gguf
 apr publish staging/ paiml/my-model-v1 --library-name aprender --license Apache-2.0
 ```
 
+> **The bug-hunt layer.** When run with `PMAT_HUNT=1` (default), each beat
+> emits a manifest of high-risk untested code in the command modules it just
+> exercised:
+>
+> ```
+> -- pmat bug-hunt manifest (run chat code) --
+>     gap   crates/apr-cli/src/commands/run.rs:resolve_model_alias (impact=42.3)
+>     churn crates/apr-cli/src/commands/code.rs:dispatch_agent (commits=11)
+>     fault crates/aprender-serve/src/api/cuda_chat_backend.rs:try_qwen3_moe (unwrap,panic)
+> ```
+>
+> The nightly cron opens an issue when this manifest grows, so untested
+> branches in command handlers can't accumulate quietly. See
+> [`contracts/qwen-story-v1.yaml`](contracts/qwen-story-v1.yaml) §
+> `pmat_audit_per_beat`.
+
 > **Publishing a model? Read [SPEC-HF-PUBLISH-001](docs/specifications/aprender-train/model-hf-publish-pipeline-spec.md).**
 > It documents the 12-file minimum (.apr/.gguf/.safetensors/model.safetensors alias/config/tokenizer/LICENSE/etc.), the YAML front-matter schema, the three-path verification protocol, and the HF API gotchas (NDJSON commits, LFS batch for 5MB-5GB, empty `model-index` rejected, Q4_K K%256==0). First applied 2026-05-18 for [paiml/albor-370m-v1](https://huggingface.co/paiml/albor-370m-v1).