volcengine · yshishenya · May 15, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/examples/codex-memory-plugin/README.md b/examples/codex-memory-plugin/README.md
@@ -106,8 +106,15 @@ All plugin behavior is controlled by `OPENVIKING_*` environment variables — se
 ```sh
 # ~/.zshrc — examples
 export OPENVIKING_RECALL_LIMIT=6
+export OPENVIKING_SCORE_THRESHOLD=0.35
+export OPENVIKING_MIN_INJECT_SCORE=0.55
+export OPENVIKING_MIN_BASE_INJECT_SCORE=0.55
+export OPENVIKING_FULL_CONTENT_SCORE=0.65
+export OPENVIKING_RECALL_MAX_MEMORY_CHARS=1200
 export OPENVIKING_CAPTURE_ASSISTANT_TURNS=1
 export OPENVIKING_AUTO_COMMIT_ON_COMPACT=1
+export OPENVIKING_COMMIT_POLL_TIMEOUT_MS=45000
+export OPENVIKING_COMMIT_POLL_INTERVAL_MS=1000
 export OPENVIKING_DEBUG=1
 ```
 
@@ -117,6 +124,28 @@ Full list: see the `Misc env vars` block in `scripts/config.mjs`. Every field ha
 
 Earlier plugin versions configured tuning fields under a `codex` block in `~/.openviking/ov.conf`. That still works for backward compat — every env var above has a camelCase counterpart (`OPENVIKING_RECALL_LIMIT` → `codex.recallLimit`, etc.) — but **new deployments should prefer env vars**: this is the codex CLI's per-machine plugin tuning, and the server-side `ov.conf` is the wrong place for it. (It's read from `ov.conf`, not `ovcli.conf`, by historical accident in `scripts/config.mjs`.)
 
+Auto-recall is intentionally conservative: if the best result is not
+confident enough, the hook emits `{}` and Codex receives no injected memory
+for that turn. This is the expected behavior for vague prompts such as
+"look into what happened" or diagnostic prompts about the memory plugin
+itself. The recall gate is controlled by:
+
+| Setting | Env var | Default | Purpose |
+|---|---|---:|---|
+| `recallLimit` | `OPENVIKING_RECALL_LIMIT` | `6` | Maximum candidate memories to inject after filtering. |
+| `scoreThreshold` | `OPENVIKING_SCORE_THRESHOLD` | `0.35` | Minimum raw retrieval score before local ranking. |
+| `minInjectScore` | `OPENVIKING_MIN_INJECT_SCORE` | `0.55` | Minimum final local ranking score for injection. |
+| `minBaseInjectScore` | `OPENVIKING_MIN_BASE_INJECT_SCORE` | `0.55` | Minimum raw retrieval score; ranking boosts cannot bypass this. |
+| `fullContentScore` | `OPENVIKING_FULL_CONTENT_SCORE` | `0.65` | Read full memory content only for high-confidence hits. |
+| `recallMaxMemoryChars` | `OPENVIKING_RECALL_MAX_MEMORY_CHARS` | `1200` | Per-memory injected character cap. |
+
+Captured turns are sanitized before they are appended to the OpenViking
+session. The sanitizer strips Codex/OpenClaw runtime blocks such as
+`<relevant-memories>`, `Conversation context (untrusted metadata)`,
+subagent prompts, inter-session messages, and internal OpenClaw context
+envelopes. This prevents hook-generated context from becoming future
+long-term memory.
+
 ## Architecture
 
 ```
@@ -173,14 +202,19 @@ On any /commit failure (OV unreachable, non-2xx, timeout) we **preserve state**
 
 ### Auto-recall (every UserPromptSubmit)
 
-`auto-recall.mjs` reads `prompt` from stdin, calls `/api/v1/search/find`, ranks results, reads full content for top-ranked leaves, and emits:
+`auto-recall.mjs` reads `prompt` from stdin, calls `/api/v1/search/find`,
+ranks results, applies the conservative injection gates above, reads full
+content only for high-confidence leaves, and emits:
 
 ```json
 { "hookSpecificOutput": { "hookEventName": "UserPromptSubmit", "additionalContext": "<relevant-memories>...</relevant-memories>" } }
 ```
 
 Codex injects `additionalContext` into the model turn, so memories arrive without an extra tool call.
 
+If no result survives the confidence gate, the script emits `{}`. That is a
+successful no-op, not an error.
+
 ### Stop (turn end → `add_message`, NOT `commit`)
 
 `auto-capture.mjs` derives one long-lived OpenViking session id per Codex `session_id` as `cx-<safe-session-id>` and incrementally appends every new user/assistant turn via `/api/v1/sessions/{id}/messages`. The `/messages` endpoint auto-creates the session on first append. Per-codex-session state lives at `~/.openviking/codex-plugin-state/<safe-session-id>.json`. No `/commit` per turn — that would over-fragment memory extraction.
@@ -191,7 +225,8 @@ Codex injects `additionalContext` into the model turn, so memories arrive withou
 
 1. Catch-up append for any turns Stop hasn't captured yet (race-safe via `capturedTurnCount`)
 2. Commit the long-lived OV session so the extractor runs against the full pre-compact transcript
-3. Reset `ovSessionId` to `null` so the next `Stop` re-derives the same `cx-<safe-session-id>` and appends the post-compact half under that deterministic OV session id
+3. If the server returns an async `task_id`, poll `/api/v1/tasks/{task_id}` briefly so the hook message reflects the actual extraction count instead of the immediate accepted response
+4. Reset `ovSessionId` to `null` so the next `Stop` re-derives the same `cx-<safe-session-id>` and appends the post-compact half under that deterministic OV session id
 
 ### Known gap: SIGTERM / Ctrl+C / `/exit` are silent
 

diff --git a/examples/codex-memory-plugin/VERIFICATION.md b/examples/codex-memory-plugin/VERIFICATION.md
@@ -90,7 +90,7 @@ echo '{"session_id":"verify-sess","transcript_path":"'"$STATE_DIR"'/transcript.j
     node $PLUGIN/scripts/pre-compact-capture.mjs
 ```
 
-Expect: `OpenViking session cx-verify-sess is committed`.
+Expect: `pre-compact commit: cx-verify-sess is committed; N memory item(s) extracted` (or an "extraction is still running" message if the async task exceeds the poll timeout).
 
 State file: `ovSessionId` is now `null`, `capturedTurnCount` stays at 4.
 
@@ -141,7 +141,7 @@ echo '{"session_id":"new-after-verify","source":"startup","cwd":"/tmp","model":"
     node $PLUGIN/scripts/session-start-commit.mjs
 ```
 
-Expect: `OpenViking session cx-verify-sess is committed`.
+Expect: `OpenViking session cx-verify-sess is committed; N memory item(s) extracted`.
 After this `verify-sess.json` is gone from `$STATE_DIR/state`.
 
 ### 6b. `0 active` → no-op
@@ -237,6 +237,33 @@ codex                                                                 # interact
 # then /compact (manual PreCompact) to force a commit, then exit.
 ```
 
+## 9. Auto-recall relevance probe
+
+Run the live auto-recall probe against the configured OpenViking server:
+
+```bash
+OPENVIKING_CONFIG_FILE=$OV_CONF \
+  node $PLUGIN/scripts/probe-auto-recall.mjs
+```
+
+Expect all cases to print `ok`. The default cases verify that:
+
+- vague Russian prompts do not inject memory;
+- diagnostic OpenViking/Codex prompts do not inject unrelated memory;
+- concrete Russian prompts still recall relevant memories when the server has
+  matching memories.
+
+For a different tenant or data set, pass custom cases:
+
+```bash
+OPENVIKING_RECALL_PROBE_CASES='[
+  {"name":"vague prompt","prompt":"Посмотри что там было","expect":"none"},
+  {"name":"project memory","prompt":"Напомни решение по проекту X","contains":["project X"]}
+]' \
+OPENVIKING_CONFIG_FILE=$OV_CONF \
+  node $PLUGIN/scripts/probe-auto-recall.mjs
+```
+
 Verify with steps 4 + 7 above.
 
 ---

diff --git a/examples/codex-memory-plugin/scripts/auto-capture.mjs b/examples/codex-memory-plugin/scripts/auto-capture.mjs
@@ -26,6 +26,7 @@
  */
 
 import { readFile } from "node:fs/promises";
+import { sanitizeCapturedText } from "./captured-text.mjs";
 import { loadConfig } from "./config.mjs";
 import { createLogger } from "./debug-log.mjs";
 import { loadState, resolveOvSessionId, saveState } from "./session-state.mjs";
@@ -115,7 +116,7 @@ function extractTurns(rolloutEntries) {
 
     if (role !== "user" && role !== "assistant") continue;
     if (role === "assistant" && !cfg.captureAssistantTurns) continue;
-    const trimmed = text.trim();
+    const trimmed = sanitizeCapturedText(text);
     if (!trimmed) continue;
 
     const capped = trimmed.length > cfg.captureMaxLength

diff --git a/examples/codex-memory-plugin/scripts/auto-recall.mjs b/examples/codex-memory-plugin/scripts/auto-recall.mjs
@@ -71,10 +71,13 @@ function clampScore(v) {
 
 const PREFERENCE_QUERY_RE = /prefer|preference|favorite|favourite|like|偏好|喜欢|爱好|更倾向/i;
 const TEMPORAL_QUERY_RE = /when|what time|date|day|month|year|yesterday|today|tomorrow|last|next|什么时候|何时|哪天|几月|几年|昨天|今天|明天/i;
-const QUERY_TOKEN_RE = /[a-z0-9一-龥]{2,}/gi;
+const QUERY_TOKEN_RE = /[\p{L}\p{N}]{2,}/gu;
 const STOPWORDS = new Set([
   "what", "when", "where", "which", "who", "whom", "whose", "why", "how", "did", "does",
   "is", "are", "was", "were", "the", "and", "for", "with", "from", "that", "this", "your", "you",
+  "это", "что", "как", "где", "когда", "почему", "зачем", "или", "для", "про", "при", "его",
+  "она", "оно", "они", "мне", "мой", "моя", "мои", "твой", "твоя", "твои", "наш", "ваш",
+  "посмотри", "разберись", "найди", "продумай", "исправить", "сделать", "давай",
 ]);
 
 function buildQueryProfile(query) {
@@ -95,18 +98,19 @@ function lexicalOverlapBoost(tokens, text) {
   for (const token of tokens.slice(0, 8)) {
     if (haystack.includes(token)) matched += 1;
   }
-  return Math.min(0.2, (matched / Math.min(tokens.length, 4)) * 0.2);
+  return Math.min(0.12, (matched / Math.min(tokens.length, 4)) * 0.12);
 }
 
 function getRankingBreakdown(item, profile) {
   const base = clampScore(item.score);
   const abstract = (item.abstract || item.overview || "").trim();
   const cat = (item.category || "").toLowerCase();
   const uri = item.uri.toLowerCase();
-  const leafBoost = (item.level === 2 || uri.endsWith(".md")) ? 0.12 : 0;
-  const eventBoost = profile.wantsTemporal && (cat === "events" || uri.includes("/events/")) ? 0.1 : 0;
-  const prefBoost = profile.wantsPreference && (cat === "preferences" || uri.includes("/preferences/")) ? 0.08 : 0;
-  const overlapBoost = lexicalOverlapBoost(profile.tokens, `${item.uri} ${abstract}`);
+  const baseIsUseful = base >= 0.35;
+  const leafBoost = baseIsUseful && (item.level === 2 || uri.endsWith(".md")) ? 0.06 : 0;
+  const eventBoost = baseIsUseful && profile.wantsTemporal && (cat === "events" || uri.includes("/events/")) ? 0.06 : 0;
+  const prefBoost = baseIsUseful && profile.wantsPreference && (cat === "preferences" || uri.includes("/preferences/")) ? 0.05 : 0;
+  const overlapBoost = baseIsUseful ? lexicalOverlapBoost(profile.tokens, `${item.uri} ${abstract}`) : 0;
   return {
     baseScore: base,
     leafBoost,
@@ -131,6 +135,11 @@ function dedupeByAbstract(items) {
   });
 }
 
+function isNoisyRecallCandidate(item) {
+  const text = `${item.uri}\n${item.abstract || ""}\n${item.overview || ""}`;
+  return /You are running as a subagent|Conversation context \(untrusted metadata\)|Inter-session message|OPENCLAW_INTERNAL_CONTEXT|Full hook output saved to/i.test(text);
+}
+
 function pickMemories(items, limit, queryText) {
   if (items.length === 0 || limit <= 0) return [];
   const profile = buildQueryProfile(queryText);
@@ -148,12 +157,32 @@ function pickMemories(items, limit, queryText) {
   return picked;
 }
 
+function truncateMemoryContent(content, maxChars) {
+  const trimmed = String(content || "").trim();
+  if (!trimmed || trimmed.length <= maxChars) return trimmed;
+  return `${trimmed.slice(0, maxChars).trimEnd()}\n...(truncated by Codex OpenViking auto-recall)`;
+}
+
+async function formatMemoryLine(item) {
+  const label = item.category || "memory";
+  const score = clampScore(item.score).toFixed(2);
+  const abstract = (item.abstract || item.overview || item.uri).trim();
+  if (item.level === 2 && clampScore(item.score) >= cfg.fullContentScore) {
+    const content = await readMemoryContent(item.uri);
+    if (content) {
+      return `- [${label} score=${score} uri=${item.uri}] ${truncateMemoryContent(content, cfg.recallMaxMemoryChars)}`;
+    }
+  }
+  return `- [${label} score=${score} uri=${item.uri}] ${truncateMemoryContent(abstract, cfg.recallMaxMemoryChars)}`;
+}
+
 function postProcess(items, limit, threshold) {
   const seen = new Set();
   const sorted = [...items].sort((a, b) => clampScore(b.score) - clampScore(a.score));
   const result = [];
   for (const item of sorted) {
     if (item.level !== 2) continue;
+    if (isNoisyRecallCandidate(item)) continue;
     if (clampScore(item.score) < threshold) continue;
     const cat = (item.category || "").toLowerCase() || "unknown";
     const abs = (item.abstract || item.overview || "").trim().toLowerCase();
@@ -277,7 +306,12 @@ async function main() {
   log("start", {
     query: userPrompt.slice(0, 200),
     queryLength: userPrompt.length,
-    config: { recallLimit: cfg.recallLimit, scoreThreshold: cfg.scoreThreshold },
+    config: {
+      recallLimit: cfg.recallLimit,
+      scoreThreshold: cfg.scoreThreshold,
+      minInjectScore: cfg.minInjectScore,
+      minBaseInjectScore: cfg.minBaseInjectScore,
+    },
   });
 
   if (!userPrompt || userPrompt.length < cfg.minQueryLength) {
@@ -327,16 +361,45 @@ async function main() {
     return;
   }
 
-  log("picked", { pickedCount: memories.length, uris: memories.map((m) => m.uri) });
+  const injectableMemories = memories.filter((m) =>
+    rankForInjection(m, profile) >= cfg.minInjectScore &&
+    clampScore(m.score) >= cfg.minBaseInjectScore
+  );
+  const topMemoryScore = Math.max(...memories.map((m) => rankForInjection(m, profile)));
+  const topBaseScore = Math.max(...memories.map((m) => clampScore(m.score)));
+  if (injectableMemories.length === 0) {
+    log("skip", {
+      stage: "confidence_gate",
+      reason: "no candidate passed combined inject confidence",
+      topMemoryScore,
+      topBaseScore,
+      minInjectScore: cfg.minInjectScore,
+      minBaseInjectScore: cfg.minBaseInjectScore,
+    });
+    emit();
+    return;
+  }
+  if (topMemoryScore < cfg.minInjectScore || topBaseScore < cfg.minBaseInjectScore) {
+    log("skip", {
+      stage: "confidence_gate",
+      reason: "top result below inject confidence",
+      topMemoryScore,
+      topBaseScore,
+      minInjectScore: cfg.minInjectScore,
+      minBaseInjectScore: cfg.minBaseInjectScore,
+    });
+    emit();
+    return;
+  }
+
+  log("picked", {
+    pickedCount: injectableMemories.length,
+    discardedLowConfidenceCount: memories.length - injectableMemories.length,
+    uris: injectableMemories.map((m) => m.uri),
+  });
 
   const lines = await Promise.all(
-    memories.map(async (item) => {
-      if (item.level === 2) {
-        const content = await readMemoryContent(item.uri);
-        if (content) return `- [${item.category || "memory"}] ${content}`;
-      }
-      return `- [${item.category || "memory"}] ${(item.abstract || item.overview || item.uri).trim()}`;
-    }),
+    injectableMemories.map((item) => formatMemoryLine(item)),
   );
 
   const memoryContext =

diff --git a/examples/codex-memory-plugin/scripts/captured-text.mjs b/examples/codex-memory-plugin/scripts/captured-text.mjs
@@ -0,0 +1,13 @@
+export function sanitizeCapturedText(text) {
+  let out = String(text || "");
+  if (/^\s*You are running as a subagent\b/i.test(out)) return "";
+  out = out
+    .replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>/gi, "")
+    .replace(/<<<BEGIN_OPENCLAW_INTERNAL_CONTEXT>>>[\s\S]*?<<<END_OPENCLAW_INTERNAL_CONTEXT>>>/g, "")
+    .replace(/Conversation context \(untrusted metadata\):\s*```json[\s\S]*?```\s*/gi, "")
+    .replace(/\[Inter-session message][\s\S]*?(?=\n\[[a-z]+]:|\n?$)/gi, "")
+    .replace(/Full hook output saved to:\s*\S+/gi, "")
+    .trim();
+  if (/^(?:\[user]:\s*)*$/i.test(out)) return "";
+  return out;
+}
diff --git a/examples/codex-memory-plugin/scripts/commit-task.mjs b/examples/codex-memory-plugin/scripts/commit-task.mjs
@@ -0,0 +1,74 @@
+export function countExtracted(result) {
+  if (!result?.memories_extracted) return 0;
+  if (typeof result.memories_extracted === "number") return result.memories_extracted;
+  if (typeof result.memories_extracted === "object") {
+    return Object.values(result.memories_extracted).reduce(
+      (a, b) => a + (typeof b === "number" ? b : 0),
+      0,
+    );
+  }
+  return 0;
+}
+
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function taskStatus(task) {
+  return typeof task?.status === "string" ? task.status.toLowerCase() : "";
+}
+
+export async function waitForCommitTask(commit, fetchJSON, cfg, log = () => {}) {
+  if (!commit?.task_id) {
+    return { commit, final: commit, task: null, status: "immediate" };
+  }
+
+  const taskId = commit.task_id;
+  const timeoutMs = Math.max(0, cfg.commitPollTimeoutMs || 0);
+  const intervalMs = Math.max(250, cfg.commitPollIntervalMs || 1000);
+  const deadline = Date.now() + timeoutMs;
+
+  while (Date.now() < deadline) {
+    await sleep(Math.min(intervalMs, Math.max(0, deadline - Date.now())));
+    const task = await fetchJSON(`/api/v1/tasks/${encodeURIComponent(taskId)}`);
+    const status = taskStatus(task);
+
+    if (!task) {
+      log("commit_task_poll_miss", { taskId });
+      continue;
+    }
+
+    if (status === "completed" || status === "succeeded" || status === "done") {
+      const final = task.result && typeof task.result === "object"
+        ? { ...commit, ...task.result, task_status: status }
+        : { ...commit, task_status: status };
+      return { commit, final, task, status };
+    }
+
+    if (status === "failed" || status === "error" || status === "cancelled" || status === "canceled") {
+      return {
+        commit,
+        final: { ...commit, task_status: status, task_error: task.error || task.result?.error || null },
+        task,
+        status,
+      };
+    }
+
+    log("commit_task_poll_pending", { taskId, status: status || "unknown" });
+  }
+
+  return { commit, final: commit, task: null, status: "timeout" };
+}
+
+export function describeCommitOutcome(ovSessionId, outcome, prefix = "OpenViking session") {
+  const taskId = outcome?.commit?.task_id;
+  if (outcome?.status === "timeout") {
+    return `${prefix} ${ovSessionId} is committed; extraction is still running${taskId ? ` (${taskId})` : ""}`;
+  }
+  if (["failed", "error", "cancelled", "canceled"].includes(outcome?.status)) {
+    return `${prefix} ${ovSessionId} is committed; extraction task failed${taskId ? ` (${taskId})` : ""}`;
+  }
+  const extracted = countExtracted(outcome?.final);
+  const suffix = extracted === 1 ? "memory item" : "memory item(s)";
+  return `${prefix} ${ovSessionId} is committed; ${extracted} ${suffix} extracted`;
+}