Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 84 additions & 5 deletions .github/scripts/run-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)"
trap 'rm -f "$BENCH_RAW_FILE"' EXIT

export STRUCTURED_ZSTD_EMIT_REPORT=1
cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE"
cargo bench --bench compare_ffi -p structured-zstd --features dict_builder -- --output-format bencher | tee "$BENCH_RAW_FILE"

echo "Parsing results..." >&2

Expand All @@ -38,6 +38,9 @@ MEM_RE = re.compile(
DICT_RE = re.compile(
r'^REPORT_DICT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
)
DICT_TRAIN_RE = re.compile(
r'^REPORT_DICT_TRAIN scenario=(\S+) label="((?:[^"\\]|\\.)+)" training_bytes=(\d+) dict_bytes_requested=(\d+) rust_train_ms=([0-9.]+) ffi_train_ms=([0-9.]+) rust_dict_bytes=(\d+) ffi_dict_bytes=(\d+) rust_fastcover_score=(\d+)$'
)

def unescape_report_label(value):
output = []
Expand Down Expand Up @@ -71,8 +74,10 @@ timings = []
ratios = []
memory_rows = []
dictionary_rows = []
dictionary_training_rows = []
timing_rows = []
scenario_input_bytes = {}
scenario_training_bytes = {}
raw_path = os.environ["BENCH_RAW_FILE"]

DELTA_LOW = 0.99
Expand Down Expand Up @@ -104,6 +109,14 @@ def parse_benchmark_name(name):
"source": None,
"implementation": parts[4],
}
if len(parts) == 5 and parts[0] == "dict-train" and parts[3] == "matrix":
return {
"stage": "dict-train",
"level": parts[1],
"scenario": parts[2],
"source": None,
"implementation": parts[4],
}
raise ValueError(f"Unsupported benchmark name format: {name} (parts={parts})")

def canonical_key(stage, scenario, level, source):
Expand Down Expand Up @@ -227,6 +240,41 @@ with open(raw_path) as f:
"ffi_no_dict_ratio": float(ffi_no_dict_ratio),
"ffi_with_dict_ratio": float(ffi_with_dict_ratio),
})
continue

dict_train_match = DICT_TRAIN_RE.match(line)
if dict_train_match:
(
scenario,
label,
training_bytes,
dict_bytes_requested,
rust_train_ms,
ffi_train_ms,
rust_dict_bytes,
ffi_dict_bytes,
rust_fastcover_score,
) = dict_train_match.groups()
label = unescape_report_label(label)
delta = None
rust_train_ms_float = float(rust_train_ms)
ffi_train_ms_float = float(ffi_train_ms)
if rust_train_ms_float > 0.0:
delta = ffi_train_ms_float / rust_train_ms_float
dictionary_training_rows.append({
"scenario": scenario,
"label": label,
"training_bytes": int(training_bytes),
"dict_bytes_requested": int(dict_bytes_requested),
"rust_train_ms": rust_train_ms_float,
"ffi_train_ms": ffi_train_ms_float,
"rust_dict_bytes": int(rust_dict_bytes),
"ffi_dict_bytes": int(ffi_dict_bytes),
"rust_fastcover_score": int(rust_fastcover_score),
"delta_ffi_over_rust": delta,
"status": classify_speed_delta(delta),
})
scenario_training_bytes[scenario] = int(training_bytes)

if not benchmark_results:
print("ERROR: No benchmark results parsed!", file=sys.stderr)
Expand All @@ -246,6 +294,13 @@ if not memory_rows:
if not dictionary_rows:
print("WARN: No REPORT_DICT lines parsed; dictionary section will be empty.", file=sys.stderr)

if not dictionary_training_rows:
print(
"ERROR: No REPORT_DICT_TRAIN lines parsed; dictionary training section would be empty.",
file=sys.stderr,
)
sys.exit(1)

with open("benchmark-results.json", "w") as f:
json.dump(benchmark_results, f, indent=2)

Expand Down Expand Up @@ -302,7 +357,10 @@ for key in all_keys:
scenario = meta["scenario"] if meta else key.split(" + ")[0]
level = meta["level"] if meta else "unknown"
source = meta["source"] if meta else None
input_bytes = scenario_input_bytes.get(scenario)
if stage == "dict-train":
input_bytes = scenario_training_bytes.get(scenario)
else:
input_bytes = scenario_input_bytes.get(scenario)

speed_series = {}
for impl_name, impl_row in speed_index.get(key, {}).items():
Expand All @@ -325,7 +383,11 @@ for key in all_keys:
speed_delta = (
rust_bps / ffi_bps
if (rust_bps is not None and ffi_bps is not None and ffi_bps > 0.0)
else None
else (
ffi_ms / rust_ms
if (rust_ms is not None and ffi_ms is not None and rust_ms > 0.0)
else None
)
)

has_comparable_ratio = (
Expand Down Expand Up @@ -368,7 +430,7 @@ for key in all_keys:
"delta_low": DELTA_LOW,
"delta_high": DELTA_HIGH,
},
"interpretation": "delta>1 means Rust faster than FFI; delta<1 means slower",
"interpretation": "delta>1 means Rust faster than FFI; throughput ratio uses rust_bytes_per_sec/ffi_bytes_per_sec when available, otherwise fallback is ffi_ms_per_iter/rust_ms_per_iter",
},
}
)
Expand Down Expand Up @@ -421,6 +483,22 @@ for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["lev
f'| {row["scenario"]} | {label} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
)

lines.extend([
"",
"## Dictionary Training (Rust FastCOVER vs C FFI)",
"",
"| Scenario | Label | Dict bytes (requested) | Rust train ms | C train ms | Rust dict bytes | C dict bytes | Rust FastCOVER score | Delta (C/Rust) | Status |",
"| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |",
])

for row in sorted(dictionary_training_rows, key=lambda item: item["scenario"]):
label = markdown_table_escape(row["label"])
delta = row["delta_ffi_over_rust"]
delta_cell = f"{delta:.4f}" if delta is not None else "n/a"
lines.append(
f'| {row["scenario"]} | {label} | {row["dict_bytes_requested"]} | {row["rust_train_ms"]:.3f} | {row["ffi_train_ms"]:.3f} | {row["rust_dict_bytes"]} | {row["ffi_dict_bytes"]} | {row["rust_fastcover_score"]} | {delta_cell} | {row["status"]} |'
)

lines.extend([
"",
"## Timing Metrics",
Expand Down Expand Up @@ -502,7 +580,7 @@ delta_lines.extend(
"",
"## Speed pack",
"",
"Interpretation: higher speed is better (`rust_bytes_per_sec / ffi_bytes_per_sec`).",
"Interpretation: higher speed is better; delta uses `rust_bytes_per_sec / ffi_bytes_per_sec` when throughput exists, otherwise fallback is `ffi_ms_per_iter / rust_ms_per_iter`.",
"",
"### Rust speed",
"",
Expand Down Expand Up @@ -564,6 +642,7 @@ print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json"
print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr)
print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr)
print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr)
print(f"Wrote {len(dictionary_training_rows)} dictionary training rows to benchmark-report.md", file=sys.stderr)
print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.json", file=sys.stderr)
print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.md", file=sys.stderr)
PYEOF
14 changes: 9 additions & 5 deletions BENCHMARKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ encoder:
- `structured-zstd::Better` vs `zstd` level `7`
- `structured-zstd::Best` vs `zstd` level `11`

Dictionary benchmarks are tracked separately with C FFI `with_dict` vs `without_dict` runs, using a
dictionary trained from scenario samples. Pure Rust dictionary compression is still pending and is
therefore not part of the pure-Rust-vs-C timing matrix yet.
Dictionary benchmarks currently include:

- C FFI `with_dict` vs `without_dict` compression runs
- dictionary training timing comparison (`dict-train`) between Rust FastCOVER and C FFI trainer

## Issue #24 Acceptance Mapping

Expand All @@ -55,7 +56,7 @@ therefore not part of the pure-Rust-vs-C timing matrix yet.
Run the full Criterion matrix:

```bash
cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher
cargo bench --bench compare_ffi -p structured-zstd --features dict_builder -- --output-format bencher
```

Generate the CI-style JSON and markdown report locally:
Expand Down Expand Up @@ -85,6 +86,7 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st
- compression ratio tables (`REPORT`)
- input+output buffer size estimate tables (`REPORT_MEM`)
- dictionary compression tables (`REPORT_DICT`)
- dictionary training comparison tables (`REPORT_DICT_TRAIN`)
- timing rows for all benchmark functions
- `benchmark-delta.json` with canonical `(scenario + params)` rows including:
- raw Rust/FFI ratio values and `rust/ffi` ratio delta
Expand All @@ -96,7 +98,9 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st
Delta interpretation (direct same-run comparison on the same environment):

- **Ratio delta** (`rust_ratio / ffi_ratio`): lower is better for Rust
- **Speed delta** (`rust_bytes_per_sec / ffi_bytes_per_sec`): higher is better for Rust
- **Speed delta**: higher is better for Rust
- throughput form: `rust_bytes_per_sec / ffi_bytes_per_sec`
- fallback form (when throughput is unavailable): `ffi_ms_per_iter / rust_ms_per_iter`

Status labels in `benchmark-delta` are derived directly from the same-run deltas (no environment
calibration/pre-test coefficients):
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe

### Dictionary Generation

When the `dict_builder` feature is enabled, the `dictionary` module can create raw content dictionaries. Within 0.2% of the official implementation on the `github-users` sample set.
When the `dict_builder` feature is enabled, the `dictionary` module can:
- build raw dictionaries with COVER (`create_raw_dict_from_source`)
- build raw dictionaries with FastCOVER (`create_fastcover_raw_dict_from_source`)
- finalize raw content into full zstd dictionary format (`finalize_raw_dict`)
- train+finalize in one pure-Rust flow (`create_fastcover_dict_from_source`)
- propagate I/O failures from dictionary-building APIs via `io::Result` return values

## Benchmarking

Expand Down
6 changes: 6 additions & 0 deletions zstd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,14 @@ harness = false
[[bench]]
name = "compare_ffi"
harness = false
required-features = ["dict_builder"]

[[bench]]
name = "bitstream"
harness = false
required-features = ["bench_internals"]

[[bench]]
name = "dict_builder_fastcover"
harness = false
required-features = ["dict_builder"]
Loading
Loading