AztecProtocol · AztecBot · Apr 2, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/barretenberg/cpp/CLAUDE.md b/barretenberg/cpp/CLAUDE.md
@@ -155,3 +155,42 @@ Typical workflow
 2. Build native code: `cd barretenberg/cpp && ./bootstrap.sh build_native`
 3. Check VKs: `cd scripts && ./test_chonk_standalone_vks_havent_changed.sh`
 4. If VKs changed intentionally: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs`
+
+## Example IVC inputs
+
+Example IVC inputs (msgpack files) for `bb prove --scheme chonk` are generated by e2e benchmark tests. Run the full bootstrap from the repo root to populate them:
+
+```bash
+cd $(git rev-parse --show-toplevel) && ./bootstrap.sh
+```
+
+This creates `yarn-project/end-to-end/example-app-ivc-inputs-out/<flow>/ivc-inputs.msgpack`. The inputs are generated by the `build_bench` function in `yarn-project/end-to-end/bootstrap.sh`, which runs client flow tests with `CAPTURE_IVC_FOLDER` set. In CI, these are cached as `bb-chonk-captures-<hash>.tar.gz`.
+
+## Memory profiling
+
+The `--memory_profile_out <file>` flag on `bb prove` outputs a JSON array of RSS checkpoints at key proving stages (after alloc, trace, oink, sumcheck, accumulate) for each circuit, with circuit names and indices.
+
+```bash
+cd barretenberg/cpp
+./build/bin/bb prove \
+  --scheme chonk \
+  --ivc_inputs_path <path-to>/ivc-inputs.msgpack \
+  -o /tmp/proof-out \
+  -v \
+  --memory_profile_out /tmp/proof-out/memory_profile.json
+```
+
+For a visual timeline of a single run, pipe verbose output to `plot_memory.py`:
+
+```bash
+bb prove --scheme chonk ... -v 2>&1 | python3 scripts/plot_memory.py > memory.html
+```
+
+The extraction script converts the JSON into dashboard benchmark entries (one overlaid line per circuit stage, tracked across commits):
+
+```bash
+echo '[]' > /tmp/proof-out/benchmarks.bench.json
+python3 scripts/extract_memory_benchmarks.py /tmp/proof-out "app-proving/flow/native"
+```
+
+In CI, this is integrated into `ci_benchmark_ivc_flows.sh` (native only) and uploaded to the benchmark dashboard.
diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json
@@ -90,7 +90,7 @@
         "CC": "clang-20",
         "CXX": "clang++-20",
         "CFLAGS": "-gdwarf-4",
-        "CXXFLAGS": "-gdwarf-4",
+        "CXXFLAGS": "-gdwarf-4 -D_GLIBCXX_DEBUG",
         "LDFLAGS": "-gdwarf-4"
       },
       "cacheVariables": {
@@ -162,7 +162,7 @@
       "binaryDir": "build-debug-fast",
       "environment": {
         "CFLAGS": "-O2 -gdwarf",
-        "CXXFLAGS": "-O2 -gdwarf-4",
+        "CXXFLAGS": "-O2 -gdwarf-4 -D_GLIBCXX_DEBUG",
         "LDFLAGS": "-O2 -gdwarf-4"
       },
       "cacheVariables": {

diff --git a/barretenberg/cpp/scripts/ci_benchmark_ivc_flows.sh b/barretenberg/cpp/scripts/ci_benchmark_ivc_flows.sh
@@ -59,13 +59,14 @@ function run_bb_cli_bench {
 
   if [[ "$runtime" == "native" ]]; then
     # Add --bench_out_hierarchical flag for native builds to capture hierarchical op counts and timings
-    memusage "./$native_build_dir/bin/bb" "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" || {
-      echo "bb native failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json"
+    memusage "./$native_build_dir/bin/bb" "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" "--memory_profile_out" "$output/memory_profile.json" || {
+      echo "bb native failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json --memory_profile_out $output/memory_profile.json"
       exit 1
     }
   else # wasm
     export WASMTIME_ALLOWED_DIRS="--dir=$flow_folder --dir=$output"
     # Add --bench_out_hierarchical flag for wasm builds to capture hierarchical op counts and timings
+    # Note: --memory_profile_out is native-only (getrusage not available in wasm)
     memusage scripts/wasmtime.sh $WASMTIME_ALLOWED_DIRS ./build-wasm-threads/bin/bb "$@" "--bench_out_hierarchical" "$output/benchmark_breakdown.json" || {
       echo "bb wasm failed with args: $@ --bench_out_hierarchical $output/benchmark_breakdown.json"
       exit 1
@@ -139,6 +140,12 @@ EOF
     echo "Extracting component timings from hierarchical breakdown..."
     python3 scripts/extract_component_benchmarks.py "$output" "$name_path"
   fi
+
+  # Extract memory profile metrics if available
+  if [[ -f "$output/memory_profile.json" ]]; then
+    echo "Extracting memory profile metrics..."
+    python3 scripts/extract_memory_benchmarks.py "$output" "$name_path"
+  fi
 }
 
 export -f verify_ivc_flow run_bb_cli_bench
@@ -178,4 +185,17 @@ if [[ "${CI:-}" == "1" ]] && [[ "${CI_USE_BUILD_INSTANCE_KEY:-0}" == "1" ]]; the
   else
     echo "Warning: benchmark breakdown file not found at $benchmark_breakdown_file"
   fi
+
+  # Upload memory profile to S3
+  memory_profile_file="bench-out/app-proving/$flow_name/$runtime/memory_profile.json"
+  if [[ -f "$memory_profile_file" ]]; then
+    tmp_memory_file="/tmp/memory_profile_${runtime}_${flow_name}_$$.json"
+    cp "$memory_profile_file" "$tmp_memory_file"
+    memory_disk_key="memory-${runtime}-${flow_name}-${current_sha}"
+    {
+      cat "$tmp_memory_file" | gzip | cache_s3_transfer_to "bench/bb-breakdown" "$memory_disk_key"
+      rm -f "$tmp_memory_file"
+    } &
+    echo "Uploaded memory profile to S3: bench/bb-breakdown/$memory_disk_key"
+  fi
 fi
diff --git a/barretenberg/cpp/scripts/extract_memory_benchmarks.py b/barretenberg/cpp/scripts/extract_memory_benchmarks.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""Extracts memory profile metrics from a memory profile JSON and appends
+them to the benchmark JSON file as dashboard entries.
+
+Usage: extract_memory_benchmarks.py <output_dir> <name_path>
+
+The output_dir must contain:
+  - memory_profile.json (memory profile data from bb --memory_profile_out)
+  - benchmarks.bench.json (existing benchmark results to append to)
+
+The memory profile JSON format is documented in memory_profile.cpp.
+"""
+import json
+import sys
+
+if len(sys.argv) != 3:
+    print(f"Usage: {sys.argv[0]} <output_dir> <name_path>", file=sys.stderr)
+    sys.exit(1)
+
+output_dir = sys.argv[1]
+name_path = sys.argv[2]
+
+try:
+    with open(f"{output_dir}/memory_profile.json", "r") as f:
+        data = json.load(f)
+
+    entries = []
+
+    # RSS timeline: each checkpoint becomes a line on the per-commit dashboard chart
+    # Stage ordering: prefix with sequence number so alphabetical sort = execution order
+    STAGE_ORDER = {
+        "after_alloc": "0_alloc",
+        "after_trace": "1_trace",
+        "after_oink": "2_oink",
+        "after_sumcheck": "3_sumcheck",
+        "after_accumulate": "4_accumulate",
+    }
+    # JSON is a flat array of checkpoints (msgpack-serialized from C++)
+    for cp in data:
+        circuit_name = cp.get("circuit_name", "")
+        idx = cp["circuit_index"]
+        stage = STAGE_ORDER.get(cp["stage"], cp["stage"])
+        label = f"{idx:02d}_{circuit_name}_{stage}" if circuit_name else f"{idx:02d}_{stage}"
+        entries.append({
+            "name": f"{name_path}/{label}",
+            "unit": "MB",
+            "value": cp["heap_mb"],
+            "extra": f"stacked:{name_path}/heap_over_stages"
+        })
+
+    # Append to existing benchmarks file
+    with open(f"{output_dir}/benchmarks.bench.json", "r") as f:
+        existing = json.load(f)
+
+    existing.extend(entries)
+
+    with open(f"{output_dir}/benchmarks.bench.json", "w") as f:
+        json.dump(existing, f, indent=2)
+
+    print(f"Extracted {len(entries)} memory profile metrics")
+except Exception as e:
+    print(f"Warning: Could not extract memory profile: {e}", file=sys.stderr)
diff --git a/barretenberg/cpp/src/barretenberg/bb/cli.cpp b/barretenberg/cpp/src/barretenberg/bb/cli.cpp
@@ -29,6 +29,7 @@
 #include "barretenberg/common/assert.hpp"
 #include "barretenberg/common/bb_bench.hpp"
 #include "barretenberg/common/get_bytecode.hpp"
+#include "barretenberg/common/memory_profile.hpp"
 #include "barretenberg/common/thread.hpp"
 #include "barretenberg/common/version.hpp"
 #include "barretenberg/dsl/acir_format/serde/index.hpp"
@@ -389,6 +390,15 @@ int parse_and_run_cli_command(int argc, char* argv[])
                          "parent-child relationships) as json.")
             ->group(advanced_group);
     };
+    std::string memory_profile_out;
+    const auto add_memory_profile_out_option = [&](CLI::App* subcommand) {
+        return subcommand
+            ->add_option("--memory_profile_out",
+                         memory_profile_out,
+                         "Path to write memory profile data (polynomial breakdown by category, RSS "
+                         "checkpoints, CRS size) as json.")
+            ->group(advanced_group);
+    };
 
     /***************************************************************************************************************
      * Top-level flags
@@ -482,6 +492,7 @@ int parse_and_run_cli_command(int argc, char* argv[])
     add_print_bench_flag(prove);
     add_bench_out_option(prove);
     add_bench_out_hierarchical_option(prove);
+    add_memory_profile_out_option(prove);
     add_storage_budget_option(prove);
     add_output_format_option(prove);
 
@@ -811,6 +822,10 @@ int parse_and_run_cli_command(int argc, char* argv[])
     if (!flags.storage_budget.empty()) {
         storage_budget = parse_size_string(flags.storage_budget);
     }
+    if (!memory_profile_out.empty()) {
+        bb::detail::use_memory_profile = true;
+        vinfo("Memory profiling enabled via --memory_profile_out");
+    }
     if (print_bench || !bench_out.empty() || !bench_out_hierarchical.empty()) {
         bb::detail::use_bb_bench = true;
         vinfo("BB_BENCH enabled via --print_bench or --bench_out");
@@ -987,6 +1002,11 @@ int parse_and_run_cli_command(int argc, char* argv[])
                     bb::detail::GLOBAL_BENCH_STATS.serialize_aggregate_data_json(file);
                 }
 #endif
+                if (!memory_profile_out.empty()) {
+                    std::ofstream file(memory_profile_out);
+                    bb::detail::GLOBAL_MEMORY_PROFILE.serialize_json(file);
+                    vinfo("Memory profile written to ", memory_profile_out);
+                }
                 return 0;
             }
             if (check->parsed()) {

diff --git a/barretenberg/cpp/src/barretenberg/bbapi/bbapi_chonk.cpp b/barretenberg/cpp/src/barretenberg/bbapi/bbapi_chonk.cpp
@@ -5,6 +5,7 @@
 #include "barretenberg/commitment_schemes/ipa/ipa.hpp"
 #include "barretenberg/commitment_schemes/verification_key.hpp"
 #include "barretenberg/common/log.hpp"
+#include "barretenberg/common/memory_profile.hpp"
 #include "barretenberg/common/serialize.hpp"
 #include "barretenberg/common/throw_or_abort.hpp"
 #include "barretenberg/dsl/acir_format/acir_format.hpp"
@@ -90,6 +91,9 @@ ChonkAccumulate::Response ChonkAccumulate::execute(BBApiRequest& request) &&
     }
 
     info("ChonkAccumulate - accumulating circuit '", request.loaded_circuit_name, "'");
+    if (detail::use_memory_profile) {
+        detail::GLOBAL_MEMORY_PROFILE.set_circuit_name(request.loaded_circuit_name);
+    }
     request.ivc_in_progress->accumulate(circuit, precomputed_vk);
     request.ivc_stack_depth++;
 

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/goblin_bench/eccvm.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/goblin_bench/eccvm.bench.cpp
@@ -1,8 +1,11 @@
 #include <benchmark/benchmark.h>
 
+#include "barretenberg/commitment_schemes/ipa/ipa.hpp"
+#include "barretenberg/ecc/curves/bn254/fq.hpp"
 #include "barretenberg/eccvm/eccvm_circuit_builder.hpp"
 #include "barretenberg/eccvm/eccvm_prover.hpp"
 #include "barretenberg/eccvm/eccvm_verifier.hpp"
+#include "barretenberg/srs/global_crs.hpp"
 
 using namespace benchmark;
 using namespace bb;
@@ -40,6 +43,9 @@ Builder generate_trace(size_t target_num_gates)
         op_queue->merge();
     }
 
+    using Fq = curve::BN254::BaseField;
+    op_queue->append_hiding_op(Fq::random_element(), Fq::random_element());
+
     Builder builder{ op_queue };
     return builder;
 }
@@ -63,12 +69,35 @@ void eccvm_prove(State& state) noexcept
     std::shared_ptr<Transcript> prover_transcript = std::make_shared<Transcript>();
     ECCVMProver prover(builder, prover_transcript);
     for (auto _ : state) {
-        auto [proof, ipa_claim] = prover.construct_proof();
+        auto [proof, opening_claim] = prover.construct_proof();
+        auto ipa_transcript = std::make_shared<Transcript>();
+        IPA<Flavor::Curve>::compute_opening_proof(prover.key->commitment_key, opening_claim, ipa_transcript);
+    };
+}
+
+void eccvm_ipa(State& state) noexcept
+{
+    size_t target_num_gates = 1 << static_cast<size_t>(state.range(0));
+    Builder builder = generate_trace(target_num_gates);
+    std::shared_ptr<Transcript> prover_transcript = std::make_shared<Transcript>();
+    ECCVMProver prover(builder, prover_transcript);
+    auto [proof, opening_claim] = prover.construct_proof();
+    for (auto _ : state) {
+        auto ipa_transcript = std::make_shared<Transcript>();
+        IPA<Flavor::Curve>::compute_opening_proof(prover.key->commitment_key, opening_claim, ipa_transcript);
     };
 }
 
 BENCHMARK(eccvm_generate_prover)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
 BENCHMARK(eccvm_prove)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
+BENCHMARK(eccvm_ipa)->Unit(kMillisecond)->DenseRange(12, CONST_ECCVM_LOG_N);
 } // namespace
 
-BENCHMARK_MAIN();
+int main(int argc, char** argv)
+{
+    bb::srs::init_file_crs_factory(bb::srs::bb_crs_path());
+    benchmark::Initialize(&argc, argv);
+    benchmark::RunSpecifiedBenchmarks();
+    benchmark::Shutdown();
+    return 0;
+}
diff --git a/...nberg/cpp/src/barretenberg/chonk/batched_honk_translator/batched_honk_translator.test.cpp b/...nberg/cpp/src/barretenberg/chonk/batched_honk_translator/batched_honk_translator.test.cpp
@@ -187,9 +187,7 @@ class BatchedHonkTranslatorTests : public ::testing::Test {
         // ── Round 3: translator Z_PERM, then joint alpha + gate challenges ────────
         m.add_entry(round, "Z_PERM", G);
         m.add_challenge(round, "Sumcheck:alpha");
-        for (size_t i = 0; i < JOINT_LOG_N; ++i) {
-            m.add_challenge(round, "Sumcheck:gate_challenge_" + std::to_string(i));
-        }
+        m.add_challenge(round, "Sumcheck:gate_challenge");
         round++;
 
         // ── Round 4: Libra masking commitment + Sum ───────────────────────────────

diff --git a/...erg/cpp/src/barretenberg/chonk/batched_honk_translator/batched_honk_translator_prover.cpp b/...erg/cpp/src/barretenberg/chonk/batched_honk_translator/batched_honk_translator_prover.cpp
@@ -69,10 +69,8 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
     const FF alpha = transcript->template get_challenge<FF>("Sumcheck:alpha");
 
     // Draw joint gate challenges (17 total).
-    std::vector<FF> gate_challenges(JOINT_LOG_N);
-    for (size_t i = 0; i < JOINT_LOG_N; i++) {
-        gate_challenges[i] = transcript->template get_challenge<FF>("Sumcheck:gate_challenge_" + std::to_string(i));
-    }
+    std::vector<FF> gate_challenges =
+        transcript->template get_dyadic_powers_of_challenge<FF>("Sumcheck:gate_challenge", JOINT_LOG_N);
 
     // Compute α^{K_H}: offset for translator subrelation separators.
     FF alpha_power_KH = FF(1);
@@ -95,12 +93,9 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
     MegaZKCommitmentKey small_ck(1 << (log_subgroup_size + 1));
     zk_sumcheck_data = ZKData(JOINT_LOG_N, transcript, small_ck);
 
-    // Gate separator polynomials:
-    //   MegaZK circuit uses gate_challenges[0..mega_zk_log_n-1] for beta_products (real rounds only).
-    //   During virtual rounds, only betas[] and partial_evaluation_result are accessed.
-    //   Translator uses all JOINT_LOG_N challenges.
-    GateSeparatorPolynomial<FF> mega_zk_gate_sep(gate_challenges, mega_zk_log_n);
-    GateSeparatorPolynomial<FF> translator_gate_sep(gate_challenges, JOINT_LOG_N);
+    // Single gate separator for both circuits: beta_products has size 2^JOINT_LOG_N which covers
+    // both the MegaZK real rounds (2^mega_zk_log_n) and translator rounds (2^JOINT_LOG_N).
+    GateSeparatorPolynomial<FF> gate_sep(gate_challenges, JOINT_LOG_N);
 
     // Round helper objects.
     MegaZKProverRound mega_zk_round(static_cast<size_t>(1) << mega_zk_log_n);
@@ -146,8 +141,7 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
                                          TranslatorFlavor::get_minicircuit_evaluations(translator_partial));
         }
         zk_sumcheck_data.update_zk_sumcheck_data(u, round_idx);
-        mega_zk_gate_sep.partially_evaluate(u);
-        translator_gate_sep.partially_evaluate(u);
+        gate_sep.partially_evaluate(u);
         translator_round.round_size >>= 1;
     };
 
@@ -159,13 +153,13 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
     auto do_round = [&](auto& hpolys, auto& tpolys, size_t round_idx) -> FF {
         U_joint = SumcheckRoundUnivariate::zero();
 
-        auto U_H = mega_zk_round.compute_univariate(hpolys, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas);
+        auto U_H = mega_zk_round.compute_univariate(hpolys, mega_zk_params, gate_sep, mega_zk_alphas);
         U_H += mega_zk_round.compute_disabled_contribution(
-            hpolys, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas, rdp, masking_tail);
+            hpolys, mega_zk_params, gate_sep, mega_zk_alphas, rdp, masking_tail);
         U_joint += U_H;
 
-        auto U_T = translator_round.compute_univariate(
-            tpolys, translator_relation_parameters, translator_gate_sep, translator_alphas);
+        auto U_T =
+            translator_round.compute_univariate(tpolys, translator_relation_parameters, gate_sep, translator_alphas);
         for (auto& eval : U_T.evaluations) {
             eval *= alpha_power_KH;
         }
@@ -235,13 +229,13 @@ void BatchedHonkTranslatorProver::execute_joint_sumcheck_rounds()
     for (size_t round_idx = mega_zk_log_n; round_idx < JOINT_LOG_N; round_idx++) {
         U_joint = SumcheckRoundUnivariate::zero();
 
-        auto U_H = mega_zk_round.compute_virtual_contribution(
-            mega_zk_partial, mega_zk_params, mega_zk_gate_sep, mega_zk_alphas);
+        auto U_H =
+            mega_zk_round.compute_virtual_contribution(mega_zk_partial, mega_zk_params, gate_sep, mega_zk_alphas);
         U_H *= rdp_scalar;
         U_joint += U_H;
 
         auto U_T = translator_round.compute_univariate(
-            translator_partial, translator_relation_parameters, translator_gate_sep, translator_alphas);
+            translator_partial, translator_relation_parameters, gate_sep, translator_alphas);
         for (auto& eval : U_T.evaluations) {
             eval *= alpha_power_KH;
         }