Fix retained x_attn capture without enabling Qwen QKV Freivalds

unbalancedparentheses · unbalancedparentheses · commit 92f7be4c1863 · 2026-04-12T14:36:31.000+02:00
- Populate RetainedLayerState.x_attn_i8 with captured GPU x_attn
  (was always None — prover used captured x_attn for QKV accumulators
  but verifier fell back to bridge-derived x_attn, causing mismatch)
- Keep Qwen supports_qkv_freivalds=false until GPU validation confirms
  the fix makes Freivalds pass
- Add skipped field to V4VerifyReport for explicit unsupported reporting
- Expose skipped in Python verify dict and Display impl
- Update roadmap: QKV Freivalds gated by profile
diff --git a/crates/verilm-core/src/types.rs b/crates/verilm-core/src/types.rs
@@ -91,7 +91,7 @@ impl VerificationProfile {
             max_validated_context: 1164,
             requires_score_anchoring: false,
             score_anchor_threshold: None, // anchor gap ~14, too loose for strong tier
-            supports_qkv_freivalds: false, // bridge replay can't match GPU INT8 GEMM for Qwen
+            supports_qkv_freivalds: false, // pending: prover now populates x_attn_i8 in retained state, needs GPU validation
         }
     }
 
diff --git a/crates/verilm-prover/src/lib.rs b/crates/verilm-prover/src/lib.rs
@@ -148,16 +148,20 @@ pub fn build_retained_from_captures(
                 let a_dim = entry.a_i8.len() / batch_size;
                 let a = entry.a_i8[b * a_dim..(b + 1) * a_dim].to_vec();
 
-                if let Some(ref xa) = entry.x_attn_i8 {
+                let (retained_xa, retained_scale_xa) = if let Some(ref xa) = entry.x_attn_i8 {
                     let x_dim = xa.len() / batch_size;
-                    token_x_attn.push(xa[b * x_dim..(b + 1) * x_dim].to_vec());
-                }
+                    let slice = xa[b * x_dim..(b + 1) * x_dim].to_vec();
+                    token_x_attn.push(slice.clone());
+                    (Some(slice), Some(entry.scale_x_attn[b]))
+                } else {
+                    (None, None)
+                };
 
                 layers.push(RetainedLayerState {
                     a,
                     scale_a: entry.scale_a[b],
-                    x_attn_i8: None,
-                    scale_x_attn: None,
+                    x_attn_i8: retained_xa,
+                    scale_x_attn: retained_scale_xa,
                 });
                 token_scales.push(CapturedLayerScales {
                     scale_x_attn: entry.scale_x_attn[b],

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ impl VerificationProfile {`
`91`	`91`	`max_validated_context: 1164,`
`92`	`92`	`requires_score_anchoring: false,`
`93`	`93`	`score_anchor_threshold: None, // anchor gap ~14, too loose for strong tier`
`94`		`- supports_qkv_freivalds: false, // bridge replay can't match GPU INT8 GEMM for Qwen`
	`94`	`+ supports_qkv_freivalds: false, // pending: prover now populates x_attn_i8 in retained state, needs GPU validation`
`95`	`95`	`}`
`96`	`96`	`}`
`97`	`97`