fix(memory): splice memories into ~/.claude/CLAUDE.md (stock Claude Code path)

dgokeeffe · dgokeeffe · commit f2447f566fa0 · 2026-04-26T10:24:33.000+10:00
The previous fix targeted ~/.claude/projects/&lt;encoded-cwd&gt;/memory/MEMORY.md
on the assumption Claude Code auto-loads that path. Verified empirically
with a fresh `claude -p` session in the deployed CODA container running
Claude Code 2.1.19: that path is NOT auto-loaded.

The auto-memory-dir mechanism is a HARNESS-LEVEL feature (added by some
agent system prompts), not part of stock Claude Code. Stock Claude Code
auto-loads CLAUDE.md files from cwd-walk-up parents and ~/.claude/CLAUDE.md.

Switched to splicing the rendered memory section into ~/.claude/CLAUDE.md
between explicit BEGIN/END markers so future regenerations replace just our
section without clobbering other content. Also dropped the per-project
iteration in setup_memory.py (now a single splice with project_name=None
= all memories) — multiple splices to the same file would have clobbered
each other anyway.

Co-authored-by: Isaac
diff --git a/memory/extractor.py b/memory/extractor.py
@@ -229,13 +229,13 @@ def stop_hook_handler() -> None:
         _trace(f"Lakebase write error: {type(e).__name__}: {e}")
         return
 
-    # Regenerate MEMORY.md in the per-project auto-load dir so the next session
-    # opens with these memories already in context. The dir name is derived from
-    # `cwd` (not `project_name`) — Claude Code's encoding hashes the full path.
+    # Splice the full memory set into ~/.claude/CLAUDE.md so the next Claude
+    # session sees everything — cross-project lessons too, since the user may
+    # cd anywhere. project_name=None pulls all of this owner's memories.
     try:
         from memory.injector import regenerate_memory_file
 
-        path = regenerate_memory_file(owner_email, project_name, cwd=cwd or None)
+        path = regenerate_memory_file(owner_email, None)
         if path:
             _trace(f"memory file updated: {path}")
     except Exception as e:
diff --git a/memory/injector.py b/memory/injector.py
@@ -1,15 +1,19 @@
-"""Regenerate Claude Code's MEMORY.md from Lakebase-backed memories.
-
-Claude Code auto-loads `MEMORY.md` from a per-project directory at
-`~/.claude/projects/<encoded-cwd>/memory/MEMORY.md`, where `<encoded-cwd>` is
-the absolute project path with every `/` and `.` replaced by `-`. Other `*.md`
-files in that dir are referenced from MEMORY.md and loaded on demand. By
-writing MEMORY.md there after each session, memories are injected into the
-next session's context at zero extra cost.
-
-Reference: this is the same encoding the running Claude Code harness uses for
-its own auto-memory dir — verified against an active session at
-`~/.claude/projects/-Users-...-coding-agents-databricks-apps/memory/`.
+"""Inject Lakebase-backed memories into Claude Code's auto-loaded CLAUDE.md.
+
+Stock Claude Code (used in CODA) auto-loads `CLAUDE.md` files from:
+  - `./CLAUDE.md` and parents walked up (project-level)
+  - `~/.claude/CLAUDE.md` (user global)
+
+We write the rendered memories into `~/.claude/CLAUDE.md`, between explicit
+markers so we can update just our section on each Stop hook without clobbering
+any user-authored content above or below. The user-global path means memories
+are visible in every Claude session regardless of cwd, which matches the way
+Lakebase already aggregates rows across projects under `owner_email`.
+
+(The `~/.claude/projects/<encoded>/memory/MEMORY.md` path some harnesses use
+for auto-memory is NOT part of stock Claude Code 2.x — verified empirically
+against `claude --version` 2.1.19 in the CODA container, which did not load
+files from that path.)
 """
 from __future__ import annotations
 
@@ -27,72 +31,78 @@
 
 _CAP_PER_SECTION = 12
 
+_BEGIN_MARKER = "<!-- BEGIN CODA MEMORY -->"
+_END_MARKER = "<!-- END CODA MEMORY -->"
 
-def _encode_cwd(cwd: str) -> str:
-    """Match Claude Code's per-project memory dir encoding: replace / and . with -."""
-    return cwd.replace("/", "-").replace(".", "-")
 
-
-def _memory_dir(cwd: str | None) -> Path:
-    """Return the Claude Code memory directory for a project (or global).
-
-    Per-project: `~/.claude/projects/<encoded-cwd>/memory/`
-    Global (cwd=None): `~/.claude/memory/`
-    """
+def _claude_md_path() -> Path:
+    """Return the user-global CLAUDE.md path that Claude Code auto-loads."""
     home = Path(os.environ.get("HOME", "/app/python/source_code"))
+    path = home / ".claude" / "CLAUDE.md"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    return path
 
-    if cwd:
-        mem_dir = home / ".claude" / "projects" / _encode_cwd(cwd) / "memory"
-    else:
-        mem_dir = home / ".claude" / "memory"
-
-    mem_dir.mkdir(parents=True, exist_ok=True)
-    return mem_dir
-
-
-def regenerate_memory_file(
-    owner_email: str,
-    project_name: str | None,
-    cwd: str | None = None,
-) -> Path | None:
-    """Write MEMORY.md to Claude Code's auto-loaded memory directory.
-
-    `project_name` is the leaf-name tag used for filtering Lakebase rows
-    (matches the `project_name` column). `cwd` is the absolute project path
-    used to compute the auto-load directory; pass None for the global path.
-
-    Returns the path written, or None if there were no memories to write.
-    """
-    from memory.store import load_memories
-
-    memories = load_memories(owner_email, project_name, limit=60)
-    if not memories:
-        return None
 
+def _render_memory_section(memories: list[dict]) -> str:
+    """Render the memories as a CLAUDE.md fragment between markers."""
     by_type: dict[str, list[dict]] = {}
     for mem in memories:
         by_type.setdefault(mem["type"], []).append(mem)
 
     now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
     lines: list[str] = [
+        _BEGIN_MARKER,
         "# CODA Memory",
         f"_Synced from Lakebase: {now}_",
         "",
         "These memories were extracted from past coding sessions and are stored in",
         "Lakebase for durability across app restarts and CODA instances.",
         "",
     ]
-
     for mem_type, heading in _TYPE_HEADINGS.items():
         items = by_type.get(mem_type, [])
         if not items:
             continue
         lines.append(heading)
         for item in items[:_CAP_PER_SECTION]:
-            project_tag = f" _(project: {item['project_name']})_" if item.get("project_name") else ""
+            project_tag = (
+                f" _(project: {item['project_name']})_"
+                if item.get("project_name")
+                else ""
+            )
             lines.append(f"- {item['content']}{project_tag}")
         lines.append("")
+    lines.append(_END_MARKER)
+    return "\n".join(lines)
+
+
+def _splice_section(existing: str, new_section: str) -> str:
+    """Replace any prior CODA-MEMORY section in `existing`, or append if absent."""
+    if _BEGIN_MARKER in existing and _END_MARKER in existing:
+        before, _, rest = existing.partition(_BEGIN_MARKER)
+        _, _, after = rest.partition(_END_MARKER)
+        return before.rstrip() + "\n\n" + new_section + after.lstrip("\n")
+    sep = "\n\n" if existing and not existing.endswith("\n") else "\n"
+    return existing + sep + new_section + "\n"
+
+
+def regenerate_memory_file(
+    owner_email: str,
+    project_name: str | None,
+    cwd: str | None = None,  # accepted for API compatibility; not used here
+) -> Path | None:
+    """Splice Lakebase-backed memories into `~/.claude/CLAUDE.md`.
+
+    Returns the CLAUDE.md path on success, or None if there were no memories.
+    """
+    from memory.store import load_memories
+
+    memories = load_memories(owner_email, project_name, limit=60)
+    if not memories:
+        return None
 
-    output_path = _memory_dir(cwd) / "MEMORY.md"
-    output_path.write_text("\n".join(lines), encoding="utf-8")
-    return output_path
+    new_section = _render_memory_section(memories)
+    path = _claude_md_path()
+    existing = path.read_text(encoding="utf-8") if path.exists() else ""
+    path.write_text(_splice_section(existing, new_section), encoding="utf-8")
+    return path
diff --git a/setup_memory.py b/setup_memory.py
@@ -46,26 +46,17 @@
 except Exception as e:
     print(f"CODA memory: schema init warning: {e}")
 
-# Regenerate MEMORY.md per-project at startup so prior memories are loaded the
-# moment the user `cd`s into a project and runs `claude`. The auto-load path
-# `~/.claude/projects/<encoded-cwd>/memory/MEMORY.md` is cwd-specific, so we
-# iterate over every project directory that has memories.
+# Splice all of this user's Lakebase memories into ~/.claude/CLAUDE.md so
+# they're available the moment a Claude session starts (Claude Code auto-loads
+# that file). project_name=None means "all projects" — the user might `cd` to
+# any of them, and cross-project lessons should be visible everywhere.
 try:
     from memory.injector import regenerate_memory_file
-    projects_root = home / "projects"
-    refreshed = 0
-    if projects_root.exists():
-        for project_dir in sorted(projects_root.iterdir()):
-            if not project_dir.is_dir():
-                continue
-            path = regenerate_memory_file(
-                app_owner, project_dir.name, cwd=str(project_dir)
-            )
-            if path:
-                refreshed += 1
-                print(f"CODA memory: refreshed {project_dir.name} → {path}")
-    if refreshed == 0:
-        print("CODA memory: no per-project memories yet (new instance)")
+    path = regenerate_memory_file(app_owner, None)
+    if path:
+        print(f"CODA memory: spliced into {path}")
+    else:
+        print("CODA memory: no memories yet (new instance)")
 except Exception as e:
     print(f"CODA memory: memory file warning: {e}")