structured-world · polaz · Apr 6, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
@@ -17,7 +17,7 @@ BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)"
 trap 'rm -f "$BENCH_RAW_FILE"' EXIT
 
 export STRUCTURED_ZSTD_EMIT_REPORT=1
-cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE"
+cargo bench --bench compare_ffi -p structured-zstd --features dict_builder -- --output-format bencher | tee "$BENCH_RAW_FILE"
 
 echo "Parsing results..." >&2
 
@@ -38,6 +38,9 @@ MEM_RE = re.compile(
 DICT_RE = re.compile(
     r'^REPORT_DICT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
 )
+DICT_TRAIN_RE = re.compile(
+    r'^REPORT_DICT_TRAIN scenario=(\S+) label="((?:[^"\\]|\\.)+)" training_bytes=(\d+) dict_bytes_requested=(\d+) rust_train_ms=([0-9.]+) ffi_train_ms=([0-9.]+) rust_dict_bytes=(\d+) ffi_dict_bytes=(\d+) rust_fastcover_score=(\d+)$'
+)
 
 def unescape_report_label(value):
     output = []
@@ -71,8 +74,10 @@ timings = []
 ratios = []
 memory_rows = []
 dictionary_rows = []
+dictionary_training_rows = []
 timing_rows = []
 scenario_input_bytes = {}
+scenario_training_bytes = {}
 raw_path = os.environ["BENCH_RAW_FILE"]
 
 DELTA_LOW = 0.99
@@ -104,6 +109,14 @@ def parse_benchmark_name(name):
             "source": None,
             "implementation": parts[4],
         }
+    if len(parts) == 5 and parts[0] == "dict-train" and parts[3] == "matrix":
+        return {
+            "stage": "dict-train",
+            "level": parts[1],
+            "scenario": parts[2],
+            "source": None,
+            "implementation": parts[4],
+        }
     raise ValueError(f"Unsupported benchmark name format: {name} (parts={parts})")
 
 def canonical_key(stage, scenario, level, source):
@@ -227,6 +240,41 @@ with open(raw_path) as f:
                 "ffi_no_dict_ratio": float(ffi_no_dict_ratio),
                 "ffi_with_dict_ratio": float(ffi_with_dict_ratio),
             })
+            continue
+
+        dict_train_match = DICT_TRAIN_RE.match(line)
+        if dict_train_match:
+            (
+                scenario,
+                label,
+                training_bytes,
+                dict_bytes_requested,
+                rust_train_ms,
+                ffi_train_ms,
+                rust_dict_bytes,
+                ffi_dict_bytes,
+                rust_fastcover_score,
+            ) = dict_train_match.groups()
+            label = unescape_report_label(label)
+            delta = None
+            rust_train_ms_float = float(rust_train_ms)
+            ffi_train_ms_float = float(ffi_train_ms)
+            if rust_train_ms_float > 0.0:
+                delta = ffi_train_ms_float / rust_train_ms_float
+            dictionary_training_rows.append({
+                "scenario": scenario,
+                "label": label,
+                "training_bytes": int(training_bytes),
+                "dict_bytes_requested": int(dict_bytes_requested),
+                "rust_train_ms": rust_train_ms_float,
+                "ffi_train_ms": ffi_train_ms_float,
+                "rust_dict_bytes": int(rust_dict_bytes),
+                "ffi_dict_bytes": int(ffi_dict_bytes),
+                "rust_fastcover_score": int(rust_fastcover_score),
+                "delta_ffi_over_rust": delta,
+                "status": classify_speed_delta(delta),
+            })
+            scenario_training_bytes[scenario] = int(training_bytes)
 
 if not benchmark_results:
     print("ERROR: No benchmark results parsed!", file=sys.stderr)
@@ -246,6 +294,13 @@ if not memory_rows:
 if not dictionary_rows:
     print("WARN: No REPORT_DICT lines parsed; dictionary section will be empty.", file=sys.stderr)
 
+if not dictionary_training_rows:
+    print(
+        "ERROR: No REPORT_DICT_TRAIN lines parsed; dictionary training section would be empty.",
+        file=sys.stderr,
+    )
+    sys.exit(1)
+
 with open("benchmark-results.json", "w") as f:
     json.dump(benchmark_results, f, indent=2)
 
@@ -302,7 +357,10 @@ for key in all_keys:
     scenario = meta["scenario"] if meta else key.split(" + ")[0]
     level = meta["level"] if meta else "unknown"
     source = meta["source"] if meta else None
-    input_bytes = scenario_input_bytes.get(scenario)
+    if stage == "dict-train":
+        input_bytes = scenario_training_bytes.get(scenario)
+    else:
+        input_bytes = scenario_input_bytes.get(scenario)
 
     speed_series = {}
     for impl_name, impl_row in speed_index.get(key, {}).items():
@@ -325,7 +383,11 @@ for key in all_keys:
     speed_delta = (
         rust_bps / ffi_bps
         if (rust_bps is not None and ffi_bps is not None and ffi_bps > 0.0)
-        else None
+        else (
+            ffi_ms / rust_ms
+            if (rust_ms is not None and ffi_ms is not None and rust_ms > 0.0)
+            else None
+        )
     )
 
     has_comparable_ratio = (
@@ -368,7 +430,7 @@ for key in all_keys:
                     "delta_low": DELTA_LOW,
                     "delta_high": DELTA_HIGH,
                 },
-                "interpretation": "delta>1 means Rust faster than FFI; delta<1 means slower",
+                "interpretation": "delta>1 means Rust faster than FFI; throughput ratio uses rust_bytes_per_sec/ffi_bytes_per_sec when available, otherwise fallback is ffi_ms_per_iter/rust_ms_per_iter",
             },
         }
     )
@@ -421,6 +483,22 @@ for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["lev
         f'| {row["scenario"]} | {label} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
     )
 
+lines.extend([
+    "",
+    "## Dictionary Training (Rust FastCOVER vs C FFI)",
+    "",
+    "| Scenario | Label | Dict bytes (requested) | Rust train ms | C train ms | Rust dict bytes | C dict bytes | Rust FastCOVER score | Delta (C/Rust) | Status |",
+    "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |",
+])
+
+for row in sorted(dictionary_training_rows, key=lambda item: item["scenario"]):
+    label = markdown_table_escape(row["label"])
+    delta = row["delta_ffi_over_rust"]
+    delta_cell = f"{delta:.4f}" if delta is not None else "n/a"
+    lines.append(
+        f'| {row["scenario"]} | {label} | {row["dict_bytes_requested"]} | {row["rust_train_ms"]:.3f} | {row["ffi_train_ms"]:.3f} | {row["rust_dict_bytes"]} | {row["ffi_dict_bytes"]} | {row["rust_fastcover_score"]} | {delta_cell} | {row["status"]} |'
+    )
+
 lines.extend([
     "",
     "## Timing Metrics",
@@ -502,7 +580,7 @@ delta_lines.extend(
         "",
         "## Speed pack",
         "",
-        "Interpretation: higher speed is better (`rust_bytes_per_sec / ffi_bytes_per_sec`).",
+        "Interpretation: higher speed is better; delta uses `rust_bytes_per_sec / ffi_bytes_per_sec` when throughput exists, otherwise fallback is `ffi_ms_per_iter / rust_ms_per_iter`.",
         "",
         "### Rust speed",
         "",
@@ -564,6 +642,7 @@ print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json"
 print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr)
 print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr)
 print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr)
+print(f"Wrote {len(dictionary_training_rows)} dictionary training rows to benchmark-report.md", file=sys.stderr)
 print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.json", file=sys.stderr)
 print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.md", file=sys.stderr)
 PYEOF
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
@@ -38,9 +38,10 @@ encoder:
 - `structured-zstd::Better` vs `zstd` level `7`
 - `structured-zstd::Best` vs `zstd` level `11`
 
-Dictionary benchmarks are tracked separately with C FFI `with_dict` vs `without_dict` runs, using a
-dictionary trained from scenario samples. Pure Rust dictionary compression is still pending and is
-therefore not part of the pure-Rust-vs-C timing matrix yet.
+Dictionary benchmarks currently include:
+
+- C FFI `with_dict` vs `without_dict` compression runs
+- dictionary training timing comparison (`dict-train`) between Rust FastCOVER and C FFI trainer
 
 ## Issue #24 Acceptance Mapping
 
@@ -55,7 +56,7 @@ therefore not part of the pure-Rust-vs-C timing matrix yet.
 Run the full Criterion matrix:
 
 ```bash
-cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher
+cargo bench --bench compare_ffi -p structured-zstd --features dict_builder -- --output-format bencher
 ```
 
 Generate the CI-style JSON and markdown report locally:
@@ -85,6 +86,7 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st
   - compression ratio tables (`REPORT`)
   - input+output buffer size estimate tables (`REPORT_MEM`)
   - dictionary compression tables (`REPORT_DICT`)
+  - dictionary training comparison tables (`REPORT_DICT_TRAIN`)
   - timing rows for all benchmark functions
 - `benchmark-delta.json` with canonical `(scenario + params)` rows including:
   - raw Rust/FFI ratio values and `rust/ffi` ratio delta
@@ -96,7 +98,9 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st
 Delta interpretation (direct same-run comparison on the same environment):
 
 - **Ratio delta** (`rust_ratio / ffi_ratio`): lower is better for Rust
-- **Speed delta** (`rust_bytes_per_sec / ffi_bytes_per_sec`): higher is better for Rust
+- **Speed delta**: higher is better for Rust
+  - throughput form: `rust_bytes_per_sec / ffi_bytes_per_sec`
+  - fallback form (when throughput is unavailable): `ffi_ms_per_iter / rust_ms_per_iter`
 
 Status labels in `benchmark-delta` are derived directly from the same-run deltas (no environment
 calibration/pre-test coefficients):

diff --git a/README.md b/README.md
@@ -54,7 +54,12 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe
 
 ### Dictionary Generation
 
-When the `dict_builder` feature is enabled, the `dictionary` module can create raw content dictionaries. Within 0.2% of the official implementation on the `github-users` sample set.
+When the `dict_builder` feature is enabled, the `dictionary` module can:
+- build raw dictionaries with COVER (`create_raw_dict_from_source`)
+- build raw dictionaries with FastCOVER (`create_fastcover_raw_dict_from_source`)
+- finalize raw content into full zstd dictionary format (`finalize_raw_dict`)
+- train+finalize in one pure-Rust flow (`create_fastcover_dict_from_source`)
+- propagate I/O failures from dictionary-building APIs via `io::Result` return values
 
 ## Benchmarking
 

diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml
@@ -52,8 +52,14 @@ harness = false
 [[bench]]
 name = "compare_ffi"
 harness = false
+required-features = ["dict_builder"]
 
 [[bench]]
 name = "bitstream"
 harness = false
 required-features = ["bench_internals"]
+
+[[bench]]
+name = "dict_builder_fastcover"
+harness = false
+required-features = ["dict_builder"]