CorvidLabs · 0xLeif · May 18, 2026 · May 18, 2026 · gemini-code-assist · May 18, 2026
diff --git a/src/chunking.ts b/src/chunking.ts
@@ -0,0 +1,60 @@
+/**
+ * Chunking for memories that don't fit in a single Algorand note.
+ *
+ * Algorand caps tx notes at 1024 bytes. Subtract the JSON envelope
+ * (~150 bytes with book/page/total fields) and the encryption layer
+ * (envelope adds ~40 bytes, base64 inflates by 4/3) and you have
+ * ~600 bytes of plaintext per chunk that reliably fit.
+ *
+ * On save: the caller decides whether to chunk. `chunkValue` splits on
+ * a fixed byte boundary; chunks are reassembled in `joinChunks` by
+ * sorting on `page` ascending.
+ *
+ * On recall: callers collect all txs/ASAs for a given key, group by the
+ * save's `created` timestamp (one save = one book), require all pages
+ * to be present, and concatenate.
+ */
+
+/**
+ * Max plaintext bytes per chunk. Conservative — leaves headroom for
+ * UTF-8 multi-byte expansion, envelope JSON, and the encryption
+ * envelope overhead in `@corvidlabs/ts-algochat`.
+ */
+export const MAX_CLEARTEXT_PER_CHUNK = 600;
+
+/**
+ * Split `value` into N chunks of at most `MAX_CLEARTEXT_PER_CHUNK`
+ * bytes each. Operates on UTF-8 byte boundaries — if a multi-byte
+ * codepoint straddles the cut, the chunk boundary is pushed back to
+ * the prior codepoint start so we never produce invalid UTF-8.
+ */
+export function chunkValue(value: string): string[] {
+  const bytes = Buffer.from(value, "utf-8");
+  if (bytes.length <= MAX_CLEARTEXT_PER_CHUNK) return [value];
+
+  const chunks: string[] = [];
+  let offset = 0;
+  while (offset < bytes.length) {
+    let end = Math.min(offset + MAX_CLEARTEXT_PER_CHUNK, bytes.length);
+    // Walk back if we landed in the middle of a UTF-8 continuation byte
+    // (0b10xxxxxx, i.e. (byte & 0xC0) === 0x80). We stop walking once
+    // we hit a leading byte; this caps regression at 3 bytes.
+    while (end < bytes.length && (bytes[end] & 0xc0) === 0x80) end--;
+    chunks.push(bytes.slice(offset, end).toString("utf-8"));
+    offset = end;
+  }
+  return chunks;
+}
+
+/**
+ * Reassemble pages back into the original value. Caller is responsible
+ * for passing the pages in correct order (page 1..N).
+ */
+export function joinChunks(chunks: string[]): string {
+  return chunks.join("");
+}
+
+/** Heuristic: does this value need chunking? */
+export function needsChunking(value: string): boolean {
+  return Buffer.byteLength(value, "utf-8") > MAX_CLEARTEXT_PER_CHUNK;
+}
diff --git a/src/permanent.ts b/src/permanent.ts
diff --git a/test/chunking.test.ts b/test/chunking.test.ts
@@ -0,0 +1,83 @@
+import { test, expect, describe } from "bun:test";
+import { chunkValue, joinChunks, needsChunking, MAX_CLEARTEXT_PER_CHUNK } from "../src/chunking.js";
+
+describe("chunkValue", () => {
+  test("short input returns a single chunk", () => {
+    const v = "hello world";
+    const chunks = chunkValue(v);
+    expect(chunks).toEqual([v]);
+  });
+
+  test("input at exactly MAX_CLEARTEXT_PER_CHUNK returns single chunk", () => {
+    const v = "a".repeat(MAX_CLEARTEXT_PER_CHUNK);
+    const chunks = chunkValue(v);
+    expect(chunks.length).toBe(1);
+    expect(chunks[0].length).toBe(MAX_CLEARTEXT_PER_CHUNK);
+  });
+
+  test("input one byte over the limit produces two chunks", () => {
+    const v = "a".repeat(MAX_CLEARTEXT_PER_CHUNK + 1);
+    const chunks = chunkValue(v);
+    expect(chunks.length).toBe(2);
+    expect(chunks[0].length).toBe(MAX_CLEARTEXT_PER_CHUNK);
+    expect(chunks[1].length).toBe(1);
+  });
+
+  test("3000-byte input produces 5 chunks of 600 bytes", () => {
+    const v = "a".repeat(3000);
+    const chunks = chunkValue(v);
+    expect(chunks.length).toBe(5);
+    for (const c of chunks) {
+      expect(Buffer.byteLength(c, "utf-8")).toBeLessThanOrEqual(MAX_CLEARTEXT_PER_CHUNK);
+    }
+  });
+
+  test("round-trip: chunkValue → joinChunks preserves ASCII content", () => {
+    const v = "x".repeat(2500);
+    expect(joinChunks(chunkValue(v))).toBe(v);
+  });
+
+  test("multi-byte UTF-8 codepoints are never split mid-character", () => {
+    // 🌟 is 4 bytes in UTF-8. Repeat enough to span chunk boundaries.
+    const v = "🌟".repeat(200); // 800 bytes
+    const chunks = chunkValue(v);
+    for (const c of chunks) {
+      // Decoding shouldn't throw or insert U+FFFD replacement chars.
+      const decoded = Buffer.from(c, "utf-8").toString("utf-8");
+      expect(decoded).toBe(c);
+      expect(decoded).not.toContain("�");
+    }
+    expect(joinChunks(chunks)).toBe(v);
+  });
+
+  test("round-trip preserves mixed ASCII + emoji + CJK + accented", () => {
+    const segment = "Hello 世界 ñoño 🚀 — مرحبا — ";
+    const v = segment.repeat(60);
+    expect(joinChunks(chunkValue(v))).toBe(v);
+  });
+
+  test("empty string returns one empty chunk", () => {
+    expect(chunkValue("")).toEqual([""]);
+  });
+});
+
+describe("needsChunking", () => {
+  test("short ASCII does not need chunking", () => {
+    expect(needsChunking("hello")).toBe(false);
+  });
+
+  test("input at boundary does not need chunking", () => {
+    expect(needsChunking("a".repeat(MAX_CLEARTEXT_PER_CHUNK))).toBe(false);
+  });
+
+  test("input over boundary needs chunking", () => {
+    expect(needsChunking("a".repeat(MAX_CLEARTEXT_PER_CHUNK + 1))).toBe(true);
+  });
+
+  test("UTF-8 multi-byte expansion can push a short string over", () => {
+    // 200 emoji = 800 bytes (each is 4 bytes UTF-8)
+    expect(needsChunking("🌟".repeat(200))).toBe(true);
+    // But the same JS .length is 400, well under MAX
+    expect("🌟".repeat(200).length).toBe(400);
+  });
+});
diff --git a/test/permanent-reassemble.test.ts b/test/permanent-reassemble.test.ts
@@ -0,0 +1,99 @@
+import { test, expect, describe } from "bun:test";
+import { __test } from "../src/permanent.js";
+
+const { reassemble } = __test;
+
+interface E {
+  key: string;
+  value: string;
+  txid: string;
+  created: string;
+  round: number;
+  tombstone: boolean;
+  book?: string;
+  page?: number;
+  total?: number;
+}
+
+const ent = (over: Partial<E>): E => ({
+  key: "k",
+  value: "",
+  txid: "tx",
+  created: "2026-05-18T00:00:00Z",
+  round: 1,
+  tombstone: false,
+  ...over,
+});
+
+describe("permanent reassemble", () => {
+  test("legacy single-chunk entries pass through unchanged", () => {
+    const input: E[] = [ent({ key: "a", value: "hello", round: 5 })];
+    expect(reassemble(input)).toEqual(input);
+  });
+
+  test("tombstones pass through unchanged", () => {
+    const input: E[] = [ent({ key: "a", tombstone: true, round: 10 })];
+    expect(reassemble(input)).toEqual(input);
+  });
+
+  test("two pages with matching key+created are joined in page order", () => {
+    const input: E[] = [
+      ent({ key: "k", value: "World", round: 2, book: "k", page: 2, total: 2, txid: "tx2" }),
+      ent({ key: "k", value: "Hello ", round: 1, book: "k", page: 1, total: 2, txid: "tx1" }),
+    ];
+    const out = reassemble(input);
+    expect(out.length).toBe(1);
+    expect(out[0].value).toBe("Hello World");
+    expect(out[0].round).toBe(2); // max round across pages
+    expect(out[0].total).toBe(2);
+  });
+
+  test("missing pages drop the whole record (not partial)", () => {
+    // total=3 but only 2 pages present
+    const input: E[] = [
+      ent({ key: "k", value: "A", page: 1, total: 3 }),
+      ent({ key: "k", value: "C", page: 3, total: 3 }),
+    ];
+    const out = reassemble(input);
+    expect(out.length).toBe(0);
+  });
+
+  test("two separate saves of same key produce two reassembled records", () => {
+    // Save 1 (older): 2 chunks
+    // Save 2 (newer): 2 chunks at a different timestamp
+    const input: E[] = [
+      ent({ key: "k", value: "OldA", created: "2026-05-17T00:00:00Z", round: 1, page: 1, total: 2 }),
+      ent({ key: "k", value: "OldB", created: "2026-05-17T00:00:00Z", round: 2, page: 2, total: 2 }),
+      ent({ key: "k", value: "NewA", created: "2026-05-18T00:00:00Z", round: 5, page: 1, total: 2 }),
+      ent({ key: "k", value: "NewB", created: "2026-05-18T00:00:00Z", round: 6, page: 2, total: 2 }),
+    ];
+    const out = reassemble(input);
+    expect(out.length).toBe(2);
+    // Both reassembled — caller's "latest by round" picks the newer one
+    const values = out.map(e => e.value).sort();
+    expect(values).toEqual(["NewANewB", "OldAOldB"]);
+  });
+
+  test("mixed single-chunk and multi-chunk entries are both preserved", () => {
+    const input: E[] = [
+      ent({ key: "single", value: "lonely" }),
+      ent({ key: "multi", value: "X", page: 1, total: 2 }),
+      ent({ key: "multi", value: "Y", page: 2, total: 2 }),
+    ];
+    const out = reassemble(input);
+    expect(out.length).toBe(2);
+    const byKey = Object.fromEntries(out.map(e => [e.key, e.value]));
+    expect(byKey).toEqual({ single: "lonely", multi: "XY" });
+  });
+
+  test("page numbering must be contiguous 1..total — gap drops the record", () => {
+    // total=3 but pages [1, 1, 3] — duplicate page 1, missing page 2
+    const input: E[] = [
+      ent({ key: "k", value: "A", page: 1, total: 3, txid: "t1" }),
+      ent({ key: "k", value: "A'", page: 1, total: 3, txid: "t2" }),
+      ent({ key: "k", value: "C", page: 3, total: 3, txid: "t3" }),
+    ];
+    const out = reassemble(input);
+    expect(out.length).toBe(0);
+  });
+});