Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions barretenberg/cpp/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,42 @@ Typical workflow
2. Build native code: `cd barretenberg/cpp && ./bootstrap.sh build_native`
3. Check VKs: `cd scripts && ./test_chonk_standalone_vks_havent_changed.sh`
4. If VKs changed intentionally: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs`

## Example IVC inputs

Example IVC inputs (msgpack files) for `bb prove --scheme chonk` are generated by e2e benchmark tests. Run the full bootstrap from the repo root to populate them:

```bash
cd $(git rev-parse --show-toplevel) && ./bootstrap.sh
```

This creates `yarn-project/end-to-end/example-app-ivc-inputs-out/<flow>/ivc-inputs.msgpack`. The inputs are generated by the `build_bench` function in `yarn-project/end-to-end/bootstrap.sh`, which runs client flow tests with `CAPTURE_IVC_FOLDER` set. In CI, these are cached as `bb-chonk-captures-<hash>.tar.gz`.

## Memory profiling

The `--memory_profile_out <file>` flag on `bb prove` outputs a JSON array of RSS checkpoints at key proving stages (after alloc, trace, oink, sumcheck, accumulate) for each circuit, with circuit names and indices.

```bash
cd barretenberg/cpp
./build/bin/bb prove \
--scheme chonk \
--ivc_inputs_path <path-to>/ivc-inputs.msgpack \
-o /tmp/proof-out \
-v \
--memory_profile_out /tmp/proof-out/memory_profile.json
```

For a visual timeline of a single run, pipe verbose output to `plot_memory.py`:

```bash
bb prove --scheme chonk ... -v 2>&1 | python3 scripts/plot_memory.py > memory.html
```

The extraction script converts the JSON into dashboard benchmark entries (one overlaid line per circuit stage, tracked across commits):

```bash
echo '[]' > /tmp/proof-out/benchmarks.bench.json
python3 scripts/extract_memory_benchmarks.py /tmp/proof-out "app-proving/flow/native"
```

In CI, this is integrated into `ci_benchmark_ivc_flows.sh` (native only) and uploaded to the benchmark dashboard.
4 changes: 2 additions & 2 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
"CC": "clang-20",
"CXX": "clang++-20",
"CFLAGS": "-gdwarf-4",
"CXXFLAGS": "-gdwarf-4",
"CXXFLAGS": "-gdwarf-4 -D_GLIBCXX_DEBUG",
"LDFLAGS": "-gdwarf-4"
},
"cacheVariables": {
Expand Down Expand Up @@ -162,7 +162,7 @@
"binaryDir": "build-debug-fast",
"environment": {
"CFLAGS": "-O2 -gdwarf",
"CXXFLAGS": "-O2 -gdwarf-4",
"CXXFLAGS": "-O2 -gdwarf-4 -D_GLIBCXX_DEBUG",
"LDFLAGS": "-O2 -gdwarf-4"
},
"cacheVariables": {
Expand Down
24 changes: 22 additions & 2 deletions barretenberg/cpp/scripts/ci_benchmark_ivc_flows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ function run_bb_cli_bench {

if [[ "$runtime" == "native" ]]; then
# Add --bench_out_hierarchical flag for native builds to capture hierarchical op counts and timings
memusage "./$native_build_dir/bin/bb" "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" || {
echo "bb native failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json"
memusage "./$native_build_dir/bin/bb" "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" "--memory_profile_out" "$output/memory_profile.json" || {
echo "bb native failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json --memory_profile_out $output/memory_profile.json"
exit 1
}
else # wasm
export WASMTIME_ALLOWED_DIRS="--dir=$flow_folder --dir=$output"
# Add --bench_out_hierarchical flag for wasm builds to capture hierarchical op counts and timings
# Note: --memory_profile_out is native-only (getrusage not available in wasm)
memusage scripts/wasmtime.sh $WASMTIME_ALLOWED_DIRS ./build-wasm-threads/bin/bb "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" || {
echo "bb wasm failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json"
exit 1
Expand Down Expand Up @@ -139,6 +140,12 @@ EOF
echo "Extracting component timings from hierarchical breakdown..."
python3 scripts/extract_component_benchmarks.py "$output" "$name_path"
fi

# Extract memory profile metrics if available
if [[ -f "$output/memory_profile.json" ]]; then
echo "Extracting memory profile metrics..."
python3 scripts/extract_memory_benchmarks.py "$output" "$name_path"
fi
}

export -f verify_ivc_flow run_bb_cli_bench
Expand Down Expand Up @@ -178,4 +185,17 @@ if [[ "${CI:-}" == "1" ]] && [[ "${CI_USE_BUILD_INSTANCE_KEY:-0}" == "1" ]]; the
else
echo "Warning: benchmark breakdown file not found at $benchmark_breakdown_file"
fi

# Upload memory profile to S3
memory_profile_file="bench-out/app-proving/$flow_name/$runtime/memory_profile.json"
if [[ -f "$memory_profile_file" ]]; then
tmp_memory_file="/tmp/memory_profile_${runtime}_${flow_name}_$$.json"
cp "$memory_profile_file" "$tmp_memory_file"
memory_disk_key="memory-${runtime}-${flow_name}-${current_sha}"
{
cat "$tmp_memory_file" | gzip | cache_s3_transfer_to "bench/bb-breakdown" "$memory_disk_key"
rm -f "$tmp_memory_file"
} &
echo "Uploaded memory profile to S3: bench/bb-breakdown/$memory_disk_key"
fi
fi
62 changes: 62 additions & 0 deletions barretenberg/cpp/scripts/extract_memory_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Extracts memory profile metrics from a memory profile JSON and appends
them to the benchmark JSON file as dashboard entries.

Usage: extract_memory_benchmarks.py <output_dir> <name_path>

The output_dir must contain:
- memory_profile.json (memory profile data from bb --memory_profile_out)
- benchmarks.bench.json (existing benchmark results to append to)

The memory profile JSON format is documented in memory_profile.cpp.
"""
import json
import sys

if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <output_dir> <name_path>", file=sys.stderr)
sys.exit(1)

output_dir = sys.argv[1]
name_path = sys.argv[2]

try:
with open(f"{output_dir}/memory_profile.json", "r") as f:
data = json.load(f)

entries = []

# RSS timeline: each checkpoint becomes a line on the per-commit dashboard chart
# Stage ordering: prefix with sequence number so alphabetical sort = execution order
STAGE_ORDER = {
"after_alloc": "0_alloc",
"after_trace": "1_trace",
"after_oink": "2_oink",
"after_sumcheck": "3_sumcheck",
"after_accumulate": "4_accumulate",
}
# JSON is a flat array of checkpoints (msgpack-serialized from C++)
for cp in data:
circuit_name = cp.get("circuit_name", "")
idx = cp["circuit_index"]
stage = STAGE_ORDER.get(cp["stage"], cp["stage"])
label = f"{idx:02d}_{circuit_name}_{stage}" if circuit_name else f"{idx:02d}_{stage}"
entries.append({
"name": f"{name_path}/{label}",
"unit": "MB",
"value": cp["heap_mb"],
"extra": f"stacked:{name_path}/heap_over_stages"
})

# Append to existing benchmarks file
with open(f"{output_dir}/benchmarks.bench.json", "r") as f:
existing = json.load(f)

existing.extend(entries)

with open(f"{output_dir}/benchmarks.bench.json", "w") as f:
json.dump(existing, f, indent=2)

print(f"Extracted {len(entries)} memory profile metrics")
except Exception as e:
print(f"Warning: Could not extract memory profile: {e}", file=sys.stderr)
20 changes: 20 additions & 0 deletions barretenberg/cpp/src/barretenberg/bb/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "barretenberg/common/assert.hpp"
#include "barretenberg/common/bb_bench.hpp"
#include "barretenberg/common/get_bytecode.hpp"
#include "barretenberg/common/memory_profile.hpp"
#include "barretenberg/common/thread.hpp"
#include "barretenberg/common/version.hpp"
#include "barretenberg/dsl/acir_format/serde/index.hpp"
Expand Down Expand Up @@ -389,6 +390,15 @@ int parse_and_run_cli_command(int argc, char* argv[])
"parent-child relationships) as json.")
->group(advanced_group);
};
std::string memory_profile_out;
const auto add_memory_profile_out_option = [&](CLI::App* subcommand) {
return subcommand
->add_option("--memory_profile_out",
memory_profile_out,
"Path to write memory profile data (polynomial breakdown by category, RSS "
"checkpoints, CRS size) as json.")
->group(advanced_group);
};

/***************************************************************************************************************
* Top-level flags
Expand Down Expand Up @@ -482,6 +492,7 @@ int parse_and_run_cli_command(int argc, char* argv[])
add_print_bench_flag(prove);
add_bench_out_option(prove);
add_bench_out_hierarchical_option(prove);
add_memory_profile_out_option(prove);
add_storage_budget_option(prove);
add_output_format_option(prove);

Expand Down Expand Up @@ -811,6 +822,10 @@ int parse_and_run_cli_command(int argc, char* argv[])
if (!flags.storage_budget.empty()) {
storage_budget = parse_size_string(flags.storage_budget);
}
if (!memory_profile_out.empty()) {
bb::detail::use_memory_profile = true;
vinfo("Memory profiling enabled via --memory_profile_out");
}
if (print_bench || !bench_out.empty() || !bench_out_hierarchical.empty()) {
bb::detail::use_bb_bench = true;
vinfo("BB_BENCH enabled via --print_bench or --bench_out");
Expand Down Expand Up @@ -987,6 +1002,11 @@ int parse_and_run_cli_command(int argc, char* argv[])
bb::detail::GLOBAL_BENCH_STATS.serialize_aggregate_data_json(file);
}
#endif
if (!memory_profile_out.empty()) {
std::ofstream file(memory_profile_out);
bb::detail::GLOBAL_MEMORY_PROFILE.serialize_json(file);
vinfo("Memory profile written to ", memory_profile_out);
}
return 0;
}
if (check->parsed()) {
Expand Down
4 changes: 4 additions & 0 deletions barretenberg/cpp/src/barretenberg/bbapi/bbapi_chonk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "barretenberg/commitment_schemes/ipa/ipa.hpp"
#include "barretenberg/commitment_schemes/verification_key.hpp"
#include "barretenberg/common/log.hpp"
#include "barretenberg/common/memory_profile.hpp"
#include "barretenberg/common/serialize.hpp"
#include "barretenberg/common/throw_or_abort.hpp"
#include "barretenberg/dsl/acir_format/acir_format.hpp"
Expand Down Expand Up @@ -90,6 +91,9 @@ ChonkAccumulate::Response ChonkAccumulate::execute(BBApiRequest& request) &&
}

info("ChonkAccumulate - accumulating circuit '", request.loaded_circuit_name, "'");
if (detail::use_memory_profile) {
detail::GLOBAL_MEMORY_PROFILE.set_circuit_name(request.loaded_circuit_name);
}
request.ivc_in_progress->accumulate(circuit, precomputed_vk);
request.ivc_stack_depth++;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#include <benchmark/benchmark.h>

#include "barretenberg/commitment_schemes/ipa/ipa.hpp"
#include "barretenberg/ecc/curves/bn254/fq.hpp"
#include "barretenberg/eccvm/eccvm_circuit_builder.hpp"
#include "barretenberg/eccvm/eccvm_prover.hpp"
#include "barretenberg/eccvm/eccvm_verifier.hpp"
#include "barretenberg/srs/global_crs.hpp"

using namespace benchmark;
using namespace bb;
Expand Down Expand Up @@ -40,6 +43,9 @@ Builder generate_trace(size_t target_num_gates)
op_queue->merge();
}

using Fq = curve::BN254::BaseField;
op_queue->append_hiding_op(Fq::random_element(), Fq::random_element());

Builder builder{ op_queue };
return builder;
}
Expand All @@ -63,12 +69,35 @@ void eccvm_prove(State& state) noexcept
std::shared_ptr<Transcript> prover_transcript = std::make_shared<Transcript>();
ECCVMProver prover(builder, prover_transcript);
for (auto _ : state) {
auto [proof, ipa_claim] = prover.construct_proof();
auto [proof, opening_claim] = prover.construct_proof();
auto ipa_transcript = std::make_shared<Transcript>();
IPA<Flavor::Curve>::compute_opening_proof(prover.key->commitment_key, opening_claim, ipa_transcript);
};
}

void eccvm_ipa(State& state) noexcept
{
size_t target_num_gates = 1 << static_cast<size_t>(state.range(0));
Builder builder = generate_trace(target_num_gates);
std::shared_ptr<Transcript> prover_transcript = std::make_shared<Transcript>();
ECCVMProver prover(builder, prover_transcript);
auto [proof, opening_claim] = prover.construct_proof();
for (auto _ : state) {
auto ipa_transcript = std::make_shared<Transcript>();
IPA<Flavor::Curve>::compute_opening_proof(prover.key->commitment_key, opening_claim, ipa_transcript);
};
}

BENCHMARK(eccvm_generate_prover)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
BENCHMARK(eccvm_prove)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
BENCHMARK(eccvm_ipa)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
} // namespace

BENCHMARK_MAIN();
int main(int argc, char** argv)
{
bb::srs::init_file_crs_factory(bb::srs::bb_crs_path());
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ class BatchedHonkTranslatorTests : public ::testing::Test {
// ── Round 3: translator Z_PERM, then joint alpha + gate challenges ────────
m.add_entry(round, "Z_PERM", G);
m.add_challenge(round, "Sumcheck:alpha");
for (size_t i = 0; i < JOINT_LOG_N; ++i) {
m.add_challenge(round, "Sumcheck:gate_challenge_" + std::to_string(i));
}
m.add_challenge(round, "Sumcheck:gate_challenge");
round++;

// ── Round 4: Libra masking commitment + Sum ───────────────────────────────
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,8 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
const FF alpha = transcript->template get_challenge<FF>("Sumcheck:alpha");

// Draw joint gate challenges (17 total).
std::vector<FF> gate_challenges(JOINT_LOG_N);
for (size_t i = 0; i < JOINT_LOG_N; i++) {
gate_challenges[i] = transcript->template get_challenge<FF>("Sumcheck:gate_challenge_" + std::to_string(i));
}
std::vector<FF> gate_challenges =
transcript->template get_dyadic_powers_of_challenge<FF>("Sumcheck:gate_challenge", JOINT_LOG_N);

// Compute α^{K_H}: offset for translator subrelation separators.
FF alpha_power_KH = FF(1);
Expand All @@ -95,12 +93,9 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
MegaZKCommitmentKey small_ck(1 << (log_subgroup_size + 1));
zk_sumcheck_data = ZKData(JOINT_LOG_N, transcript, small_ck);

// Gate separator polynomials:
// MegaZK circuit uses gate_challenges[0..mega_zk_log_n-1] for beta_products (real rounds only).
// During virtual rounds, only betas[] and partial_evaluation_result are accessed.
// Translator uses all JOINT_LOG_N challenges.
GateSeparatorPolynomial<FF> mega_zk_gate_sep(gate_challenges, mega_zk_log_n);
GateSeparatorPolynomial<FF> translator_gate_sep(gate_challenges, JOINT_LOG_N);
// Single gate separator for both circuits: beta_products has size 2^JOINT_LOG_N which covers
// both the MegaZK real rounds (2^mega_zk_log_n) and translator rounds (2^JOINT_LOG_N).
GateSeparatorPolynomial<FF> gate_sep(gate_challenges, JOINT_LOG_N);

// Round helper objects.
MegaZKProverRound mega_zk_round(static_cast<size_t>(1) << mega_zk_log_n);
Expand Down Expand Up @@ -146,8 +141,7 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
TranslatorFlavor::get_minicircuit_evaluations(translator_partial));
}
zk_sumcheck_data.update_zk_sumcheck_data(u, round_idx);
mega_zk_gate_sep.partially_evaluate(u);
translator_gate_sep.partially_evaluate(u);
gate_sep.partially_evaluate(u);
translator_round.round_size >>= 1;
};

Expand All @@ -159,13 +153,13 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
auto do_round = [&](auto& hpolys, auto& tpolys, size_t round_idx) -> FF {
U_joint = SumcheckRoundUnivariate::zero();

auto U_H = mega_zk_round.compute_univariate(hpolys, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas);
auto U_H = mega_zk_round.compute_univariate(hpolys, mega_zk_params, gate_sep, mega_zk_alphas);
U_H += mega_zk_round.compute_disabled_contribution(
hpolys, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas, rdp, masking_tail);
hpolys, mega_zk_params, gate_sep, mega_zk_alphas, rdp, masking_tail);
U_joint += U_H;

auto U_T = translator_round.compute_univariate(
tpolys, translator_relation_parameters, translator_gate_sep, translator_alphas);
auto U_T =
translator_round.compute_univariate(tpolys, translator_relation_parameters, gate_sep, translator_alphas);
for (auto& eval : U_T.evaluations) {
eval *= alpha_power_KH;
}
Expand Down Expand Up @@ -235,13 +229,13 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
for (size_t round_idx = mega_zk_log_n; round_idx < JOINT_LOG_N; round_idx++) {
U_joint = SumcheckRoundUnivariate::zero();

auto U_H = mega_zk_round.compute_virtual_contribution(
mega_zk_partial, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas);
auto U_H =
mega_zk_round.compute_virtual_contribution(mega_zk_partial, mega_zk_params, gate_sep, mega_zk_alphas);
U_H *= rdp_scalar;
U_joint += U_H;

auto U_T = translator_round.compute_univariate(
translator_partial, translator_relation_parameters, translator_gate_sep, translator_alphas);
translator_partial, translator_relation_parameters, gate_sep, translator_alphas);
for (auto& eval : U_T.evaluations) {
eval *= alpha_power_KH;
}
Expand Down
Loading
Loading