Skip to content

Commit b25cbd7

Browse files
committed
add canonical CMR
1 parent 32d86f0 commit b25cbd7

37 files changed

Lines changed: 547 additions & 2 deletions

cli/Cargo.lock

Lines changed: 311 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ serde = { version = "1", features = ["derive"] }
1717
serde_json = "1"
1818
toml_edit = { version = "0.22", features = ["serde"] }
1919
dotenvy = "0.15"
20+
simplicity-lang = { version = "0.7.0", features = ["base64"] }

cli/src/canonicalize.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
use anyhow::{Context, Result};
2+
use simplicity::{
3+
dag::{DagLike, MaxSharing},
4+
jet::Elements,
5+
node::{Commit, CommitNode, Inner},
6+
BitIter, Cmr, Word,
7+
};
8+
9+
/// Decode a Simplicity program from a base64 string and return its canonical CMR as hex.
10+
///
11+
/// The compiled `.simb` format and `simc` output are base64-encoded CommitNodes.
12+
pub fn canonical_cmr_from_base64(b64: &str) -> Result<String> {
13+
let node = CommitNode::<Elements>::from_str(b64)
14+
.context("failed to decode Simplicity CommitNode from base64")?;
15+
Ok(compute_canonical_cmr(&node).to_string())
16+
}
17+
18+
/// Decode a Simplicity program from raw bytes and return its canonical CMR as hex.
19+
#[allow(dead_code)]
20+
pub fn canonical_cmr_from_bytes(bytes: &[u8]) -> Result<String> {
21+
let bits = BitIter::from(bytes);
22+
let node = CommitNode::<Elements>::decode(bits)
23+
.context("failed to decode Simplicity CommitNode")?;
24+
Ok(compute_canonical_cmr(&node).to_string())
25+
}
26+
27+
/// Compute the canonical CMR of an already-decoded [`CommitNode`].
28+
///
29+
/// Walks the DAG in post-order and recomputes the CMR with:
30+
/// - Every `Word` node replaced by an all-zeros word of the same bit-width
31+
/// - Every hidden-branch CMR in `AssertL`/`AssertR` replaced with `Cmr::unit()`
32+
///
33+
/// Programs that share the same template but differ only in baked-in constants
34+
/// (e.g. a public key parameter) will produce the same canonical CMR.
35+
pub fn compute_canonical_cmr(node: &CommitNode<Elements>) -> Cmr {
36+
let mut cmrs: Vec<Cmr> = Vec::new();
37+
38+
for data in node.post_order_iter::<MaxSharing<Commit<Elements>>>() {
39+
let lc = data.left_index.map(|i| cmrs[i]);
40+
let rc = data.right_index.map(|i| cmrs[i]);
41+
42+
let canonical = match data.node.inner() {
43+
Inner::Iden => Cmr::iden(),
44+
Inner::Unit => Cmr::unit(),
45+
Inner::InjL(_) => Cmr::injl(lc.unwrap()),
46+
Inner::InjR(_) => Cmr::injr(lc.unwrap()),
47+
Inner::Take(_) => Cmr::take(lc.unwrap()),
48+
Inner::Drop(_) => Cmr::drop(lc.unwrap()),
49+
Inner::Comp(_, _) => Cmr::comp(lc.unwrap(), rc.unwrap()),
50+
Inner::Case(_, _) => Cmr::case(lc.unwrap(), rc.unwrap()),
51+
Inner::Pair(_, _) => Cmr::pair(lc.unwrap(), rc.unwrap()),
52+
Inner::Disconnect(_, _) => Cmr::disconnect(lc.unwrap()),
53+
// AssertL(child, hidden_right_cmr): replace hidden CMR with unit.
54+
Inner::AssertL(_, _) => Cmr::case(lc.unwrap(), Cmr::unit()),
55+
// AssertR(hidden_left_cmr, child): sole DAG child is treated as left in iteration.
56+
Inner::AssertR(_, _) => Cmr::case(Cmr::unit(), lc.unwrap()),
57+
Inner::Witness(_) => Cmr::witness(),
58+
Inner::Fail(entropy) => Cmr::fail(*entropy),
59+
Inner::Jet(jet) => Cmr::jet(*jet),
60+
// Replace word value with all-zeros of the same bit-width.
61+
Inner::Word(w) => Cmr::const_word(&zero_word(w.n())),
62+
};
63+
cmrs.push(canonical);
64+
}
65+
66+
cmrs.pop().expect("CommitNode is non-empty")
67+
}
68+
69+
/// Create an all-zeros [`Word`] of type `2^(2^n)`.
70+
fn zero_word(n: u32) -> Word {
71+
match n {
72+
0 => Word::u1(0),
73+
1 => Word::u2(0),
74+
2 => Word::u4(0),
75+
3 => Word::u8(0),
76+
4 => Word::u16(0),
77+
5 => Word::u32(0),
78+
6 => Word::u64(0),
79+
7 => Word::u128(0),
80+
8 => Word::u256([0u8; 32]),
81+
9 => Word::u512([0u8; 64]),
82+
_ => {
83+
let half = zero_word(n - 1);
84+
half.product(zero_word(n - 1))
85+
.expect("same-sized zero words can always be combined")
86+
}
87+
}
88+
}

cli/src/main.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
mod canonicalize;
2+
13
use std::collections::BTreeMap;
24
use std::fs;
35
use std::io::{BufRead, BufReader};
@@ -150,6 +152,17 @@ enum Commands {
150152
#[arg(long)]
151153
rebuild_docker: bool,
152154
},
155+
156+
/// Backfill canonical_cmr into all TOML entries without re-running Docker.
157+
///
158+
/// Reads the compiled .simb files already sitting next to each TOML,
159+
/// decodes them, computes the canonical CMR (all Word values and pruned-branch
160+
/// CMRs zeroed out), and writes canonical_cmr into the TOML.
161+
///
162+
/// Programs that share the same template but differ only in baked-in
163+
/// constants (e.g. different public keys) will get the same canonical_cmr,
164+
/// making it possible to match on-chain spends back to a known template.
165+
Backfill,
153166
}
154167

155168
// ---------------------------------------------------------------------------
@@ -604,6 +617,24 @@ fn apply_simc_to_toml(doc: &mut DocumentMut, s: &SimcOutput, auto_tags: &[String
604617
if let Some(cmr) = &s.cmr {
605618
doc["cmr"] = sv(cmr);
606619
}
620+
621+
// Canonical CMR: CMR recomputed after zeroing all Word values and hidden-branch
622+
// CMRs. Programs sharing the same template but different baked-in constants
623+
// (e.g. public keys) will have the same canonical_cmr, enabling template matching.
624+
doc.remove("canonical_cmr");
625+
if let Some(program_b64) = &s.program {
626+
if s._error.as_deref().unwrap_or("").is_empty() {
627+
match canonicalize::canonical_cmr_from_base64(program_b64) {
628+
Ok(c) => {
629+
doc["canonical_cmr"] = sv(&c);
630+
}
631+
Err(e) => {
632+
eprintln!(" warning: canonical CMR computation failed: {e:#}");
633+
}
634+
}
635+
}
636+
}
637+
607638
if let Some(ta) = &s.type_arrow {
608639
doc["type_arrow"] = sv(ta);
609640
}
@@ -1056,6 +1087,87 @@ fn cmd_debug(url: &str) -> Result<()> {
10561087
Ok(())
10571088
}
10581089

1090+
fn cmd_backfill() -> Result<()> {
1091+
let dir = data_dir();
1092+
let tomls = collect_tomls(&dir);
1093+
let total = tomls.len();
1094+
let mut updated = 0usize;
1095+
let mut skipped = 0usize;
1096+
1097+
for toml_path in &tomls {
1098+
let stem = toml_path.file_stem().and_then(|s| s.to_str()).unwrap_or("?");
1099+
1100+
// Find the best available .simb: prefer the base compile, then any
1101+
// args/witness variant (they all share the same canonical CMR).
1102+
let base_simb = toml_path.with_extension("simb");
1103+
let simb_path = if base_simb.exists() {
1104+
Some(base_simb)
1105+
} else {
1106+
// Walk siblings for <stem>.*.simb
1107+
let dir = toml_path.parent().unwrap_or(Path::new("."));
1108+
fs::read_dir(dir)
1109+
.ok()
1110+
.and_then(|entries| {
1111+
entries
1112+
.filter_map(|e| e.ok())
1113+
.map(|e| e.path())
1114+
.find(|p| {
1115+
p.extension().and_then(|e| e.to_str()) == Some("simb")
1116+
&& p.file_stem()
1117+
.and_then(|s| s.to_str())
1118+
.map(|s| s.starts_with(stem))
1119+
.unwrap_or(false)
1120+
})
1121+
})
1122+
};
1123+
1124+
let Some(simb_path) = simb_path else {
1125+
skipped += 1;
1126+
continue;
1127+
};
1128+
1129+
let b64 = match fs::read_to_string(&simb_path) {
1130+
Ok(s) => s.trim().to_string(),
1131+
Err(e) => {
1132+
eprintln!(" warning: cannot read {}: {e}", simb_path.display());
1133+
skipped += 1;
1134+
continue;
1135+
}
1136+
};
1137+
1138+
let canonical_cmr = match canonicalize::canonical_cmr_from_base64(&b64) {
1139+
Ok(c) => c,
1140+
Err(e) => {
1141+
eprintln!(" warning: {stem}: {e:#}");
1142+
skipped += 1;
1143+
continue;
1144+
}
1145+
};
1146+
1147+
let raw = fs::read_to_string(toml_path)
1148+
.with_context(|| format!("cannot read {}", toml_path.display()))?;
1149+
let mut doc: DocumentMut = raw
1150+
.parse()
1151+
.with_context(|| format!("cannot parse {}", toml_path.display()))?;
1152+
1153+
// Skip if canonical_cmr is already correct.
1154+
if doc.get("canonical_cmr").and_then(|v| v.as_str()) == Some(&canonical_cmr) {
1155+
continue;
1156+
}
1157+
1158+
doc["canonical_cmr"] = Item::Value(Value::from(canonical_cmr.as_str()));
1159+
fs::write(toml_path, doc.to_string())
1160+
.with_context(|| format!("cannot write {}", toml_path.display()))?;
1161+
updated += 1;
1162+
}
1163+
1164+
println!(
1165+
"Done: {updated} TOMLs updated, {skipped} skipped (no .simb), {} unchanged",
1166+
total - updated - skipped
1167+
);
1168+
Ok(())
1169+
}
1170+
10591171
fn main() {
10601172
let _ = dotenvy::dotenv();
10611173

@@ -1100,6 +1212,7 @@ fn main() {
11001212
Commands::Debug { url } => cmd_debug(&url),
11011213
Commands::Preprocess { url } => cmd_preprocess(&url),
11021214
Commands::Compile { all, tag, slugs, rebuild_docker } => cmd_compile(all, tag.as_deref(), &slugs, rebuild_docker),
1215+
Commands::Backfill => cmd_backfill(),
11031216
};
11041217
if let Err(e) = result {
11051218
eprintln!("error: {e:#}");

data/programs/BlockstreamResearch/SimplicityHL/array_fold.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ jet_set = "core"
1212
liquid_address = "ex1p6mnj7hpa5x9e7vgs82jyu33e79f0kqshfmf0uxx7ey4a0am740pq0clqml"
1313
liquid_testnet_address = "tex1p6mnj7hpa5x9e7vgs82jyu33e79f0kqshfmf0uxx7ey4a0am740pqwpd82s"
1414
is_redeem = false
15+
canonical_cmr = "6c25da58ce743ff77ea9bc9ea6df50fc51a5f7d6877d88b0798d455cb7f4eae7"
1516
autodetected_tags = []
1617

1718
[jets]

data/programs/BlockstreamResearch/SimplicityHL/array_fold_2n.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ jet_set = "core"
1212
liquid_address = "ex1pcw25hlh6479se9xwjfeup597x9ujzcnv5mnjw08ehwf0qg6v989s4thx70"
1313
liquid_testnet_address = "tex1pcw25hlh6479se9xwjfeup597x9ujzcnv5mnjw08ehwf0qg6v989s5j9p0q"
1414
is_redeem = false
15+
canonical_cmr = "01bc069fa67262554f9db7dc87646f96b7c779573b9f5a53470366126d0bedf6"
1516
comments = ["From https://github.com/BlockstreamResearch/SimplicityHL/issues/153"]
1617
autodetected_tags = []
1718

data/programs/BlockstreamResearch/SimplicityHL/cat.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ jet_set = "core"
1212
liquid_address = "ex1p305439usq06f4maelan8txnxshktvayu9z5gnwu6zrrxm9vmlufqef9ldr"
1313
liquid_testnet_address = "tex1p305439usq06f4maelan8txnxshktvayu9z5gnwu6zrrxm9vmlufqcshcuv"
1414
is_redeem = false
15+
canonical_cmr = "0b809a8c6f73c06e13eef8a8b76bcd90909cb5d70cd13967e5785620e90518c1"
1516
autodetected_tags = []
1617

1718
[jets]

data/programs/BlockstreamResearch/SimplicityHL/ctv.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ jet_set = "core"
1212
liquid_address = "ex1p0xyxkr84prhfaykcu88k6z2mu2k42xlmjydjwdtdpvtxjvuajegq9x8cf3"
1313
liquid_testnet_address = "tex1p0xyxkr84prhfaykcu88k6z2mu2k42xlmjydjwdtdpvtxjvuajegqyl4lc7"
1414
is_redeem = false
15+
canonical_cmr = "823d2b98c7d6423cc6d48f0473c5ac4d11f18360859fcf44836685499fef68f1"
1516
comments = ["This program is an emulation of CTV using simplicity", "Instead of specifying the template hash as in BIP CTV,", "we require the user to specify all the components of the sighash", "that they want to commit."]
1617
autodetected_tags = []
1718

data/programs/BlockstreamResearch/SimplicityHL/escrow_with_delay.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ type_arrow = "1 → 1"
1212
jet_set = "core"
1313
liquid_address = "ex1pphcc2shlqhzvnavkmq9960umzx94k7jgr6x3gnpkcnc38xyjf9pqc9stus"
1414
liquid_testnet_address = "tex1pphcc2shlqhzvnavkmq9960umzx94k7jgr6x3gnpkcnc38xyjf9pqeuzvdl"
15+
canonical_cmr = "e90992e6bfce117cb75f5490e50ed1258c1f901f645c7fc44d9210f1a0c4ab33"
1516
witnesses = ["TRANSFER_OR_TIMEOUT"]
1617
comments = ["docs.ivylang.org/bitcoin/language/ExampleContracts.html#escrowwithdelay", "1 * G", "2 * G", "3 * G", "ESCROW WITH DELAY", "An escrow agent can approve the movement of coins in cooperation with the", "sender or the recipient. The escrow agent cannot steal the coins for himself.", "The sender can refund her coins after a timeout.", "https://docs.ivylang.org/bitcoin/language/ExampleContracts.html#escrowwithdelay"]
1718
autodetected_tags = []

data/programs/BlockstreamResearch/SimplicityHL/hash_loop.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ type_arrow = "1 → 1"
1111
jet_set = "core"
1212
liquid_address = "ex1pzp4xccn92zvhh44z9qwh3ap3jnv677ympuaafmyv4urgfrp2lafs6s7k32"
1313
liquid_testnet_address = "tex1pzp4xccn92zvhh44z9qwh3ap3jnv677ympuaafmyv4urgfrp2lafsmfv3q9"
14+
canonical_cmr = "dcb49abacef82397279d8425300af4335eeff069a7171779f49945a0a8fa6ba6"
1415
comments = ["Add counter to streaming hash and finalize when the loop exists", "Hash bytes 0x00 to 0xff", "Hash bytes 0x0000 to 0xffff", "This takes ~10 seconds on my computer", "let ctx: Ctx8 = jet::sha_256_ctx_8_init();", "let out: Either<u256, Ctx8> = for_while::<hash_counter_16>(ctx, ());", "let expected: u256 = 0x281f79f89f0121c31db2bea5d7151db246349b25f5901c114505c18bfaa50ba1;", "assert!(jet::eq_256(expected, unwrap_left::<Ctx8>(out)));"]
1516
autodetected_tags = []
1617

0 commit comments

Comments
 (0)