MemTensor · Sanjays2402 · May 9, 2026 · May 9, 2026
diff --git a/apps/memos-local-openclaw/index.ts b/apps/memos-local-openclaw/index.ts
@@ -31,6 +31,7 @@ import { SkillInstaller } from "./src/skill/installer";
 import { Summarizer } from "./src/ingest/providers";
 import { MEMORY_GUIDE_SKILL_MD } from "./src/skill/bundled-memory-guide";
 import { Telemetry } from "./src/telemetry";
+import { withTimeout } from "./src/shared/with-timeout";
 
 
 /** Remove near-duplicate hits based on summary word overlap (>70%). Keeps first (highest-scored) hit. */
@@ -1895,7 +1896,25 @@ Groups: ${groupNames.length > 0 ? groupNames.join(", ") : "(none)"}`,
               .catch((err: any) => { ctx.log.debug(`auto-recall: hub search failed (${err})`); return { hits: [] as any[], meta: {} }; })
           : Promise.resolve({ hits: [] as any[], meta: {} });
 
-        const [result, arHubResult] = await Promise.all([arLocalP, arHubP]);
+        // #1452: hard timeout around the parallel recall fan-out so a slow
+        // embedder/LLM can never block the prompt-build critical path. On
+        // timeout we fail open with no candidates and the hook returns
+        // without injecting memories.
+        const autoRecallTimeoutMs =
+          ctx.config.recall?.autoRecallTimeoutMs ?? DEFAULTS.autoRecallTimeoutMs;
+        const phase1 = await withTimeout(
+          Promise.all([arLocalP, arHubP]),
+          autoRecallTimeoutMs,
+          "auto-recall.search",
+          ctx.log,
+        );
+        if (phase1 === null) {
+          const dur = performance.now() - recallT0;
+          store.recordToolCall("memory_search", dur, false);
+          try { store.recordApiLog("memory_search", { type: "auto_recall", query }, `timeout after ${autoRecallTimeoutMs}ms`, dur, false); } catch (_) { /* best-effort */ }
+          return;
+        }
+        const [result, arHubResult] = phase1;
 
         const localHits = result.hits.filter((h) => h.origin !== "hub-memory");
         const hubLocalHits = result.hits.filter((h) => h.origin === "hub-memory");
@@ -1986,7 +2005,16 @@ Groups: ${groupNames.length > 0 ? groupNames.join(", ") : "(none)"}`,
         let filteredHits = allRawHits;
         let sufficient = false;
 
-        const filterResult = await summarizer.filterRelevant(query, mergedForFilter);
+        // #1452: hard timeout around the recall LLM filter so a slow model
+        // can never block the prompt-build critical path. Fail open with the
+        // unfiltered candidate set; the deduper + later prompt size guards
+        // still apply.
+        const filterResult = await withTimeout(
+          summarizer.filterRelevant(query, mergedForFilter),
+          autoRecallTimeoutMs,
+          "auto-recall.filter",
+          ctx.log,
+        );
         if (filterResult !== null) {
           sufficient = filterResult.sufficient;
           if (filterResult.relevant.length > 0) {

diff --git a/apps/memos-local-openclaw/src/config.ts b/apps/memos-local-openclaw/src/config.ts
@@ -66,6 +66,7 @@ export function resolveConfig(raw: Partial<MemosLocalConfig> | undefined, stateD
       mmrLambda: cfg.recall?.mmrLambda ?? DEFAULTS.mmrLambda,
       recencyHalfLifeDays: cfg.recall?.recencyHalfLifeDays ?? DEFAULTS.recencyHalfLifeDays,
       vectorSearchMaxChunks: cfg.recall?.vectorSearchMaxChunks ?? DEFAULTS.vectorSearchMaxChunks,
+      autoRecallTimeoutMs: cfg.recall?.autoRecallTimeoutMs ?? DEFAULTS.autoRecallTimeoutMs,
     },
     dedup: {
       similarityThreshold: cfg.dedup?.similarityThreshold ?? DEFAULTS.dedupSimilarityThreshold,

diff --git a/apps/memos-local-openclaw/src/shared/with-timeout.ts b/apps/memos-local-openclaw/src/shared/with-timeout.ts
@@ -0,0 +1,37 @@
+/**
+ * Race a promise against a timeout. Resolves to `null` on timeout instead of
+ * rejecting — clean fail-open semantics for best-effort work like auto-recall
+ * where a slow LLM should never block the critical path (#1452).
+ *
+ * The underlying promise is NOT cancelled (we can't cancel a fetch from here);
+ * we just stop waiting on it. Caller must treat the returned `null` as "give
+ * up, proceed without this result".
+ *
+ * @param p     The promise to race.
+ * @param ms    Timeout in milliseconds. Non-positive = no timeout (returns `p`).
+ * @param label Short label for the warn log on timeout.
+ * @param log   Optional logger; logs a warning when the timeout fires.
+ */
+export function withTimeout<T>(
+  p: Promise<T>,
+  ms: number,
+  label: string,
+  log?: { warn: (msg: string) => void },
+): Promise<T | null> {
+  if (!Number.isFinite(ms) || ms <= 0) return p as Promise<T | null>;
+  let timer: ReturnType<typeof setTimeout> | undefined;
+  const timeout = new Promise<null>((resolve) => {
+    timer = setTimeout(() => {
+      log?.warn(`${label}: timed out after ${ms}ms; falling back`);
+      resolve(null);
+    }, ms);
+    // Don't keep the event loop alive solely for this timer.
+    if (typeof (timer as any)?.unref === "function") (timer as any).unref();
+  });
+  return Promise.race<T | null>([
+    p.finally(() => {
+      if (timer !== undefined) clearTimeout(timer);
+    }),
+    timeout,
+  ]);
+}
diff --git a/apps/memos-local-openclaw/src/types.ts b/apps/memos-local-openclaw/src/types.ts
@@ -312,6 +312,17 @@ export interface MemosLocalConfig {
     recencyHalfLifeDays?: number;
     /** Cap vector search to this many most recent chunks. 0 = no cap (search all; may get slower with 200k+ chunks). If you set a cap for performance, use a large value (e.g. 200000–300000) so older memories are still in the window; FTS always searches all. */
     vectorSearchMaxChunks?: number;
+    /**
+     * Hard timeout (ms) for the auto-recall path inside `before_prompt_build`.
+     *
+     * Auto-recall is best-effort enrichment: if the recall LLM/embedding work
+     * takes longer than this, we abandon it and let the prompt build proceed
+     * with no auto-injected memories. Prevents a slow LLM from blocking
+     * gateway startup or first-turn long enough to trip health checks (#1452).
+     *
+     * Default: 8000 ms.
+     */
+    autoRecallTimeoutMs?: number;
   };
   dedup?: {
     similarityThreshold?: number;
@@ -360,6 +371,7 @@ export const DEFAULTS = {
   skillPreferUpgrade: true,
   skillRedactSensitive: true,
   taskAutoFinalizeHours: 4,
+  autoRecallTimeoutMs: 8000,
 } as const;
 
 // ─── Plugin Hooks (OpenClaw integration) ───

diff --git a/apps/memos-local-openclaw/tests/with-timeout.test.ts b/apps/memos-local-openclaw/tests/with-timeout.test.ts
@@ -0,0 +1,64 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { withTimeout } from "../src/shared/with-timeout";
+
+describe("withTimeout", () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("resolves with the underlying value when the promise wins the race", async () => {
+    const fast = new Promise<string>((resolve) => setTimeout(() => resolve("ok"), 5));
+    const racePromise = withTimeout(fast, 100, "test.fast");
+    await vi.advanceTimersByTimeAsync(5);
+    const result = await racePromise;
+    expect(result).toBe("ok");
+  });
+
+  it("returns null when the timeout fires first (fail-open semantics)", async () => {
+    const slow = new Promise<string>((resolve) => setTimeout(() => resolve("late"), 100));
+    const racePromise = withTimeout(slow, 10, "test.slow");
+    await vi.advanceTimersByTimeAsync(10);
+    const result = await racePromise;
+    expect(result).toBeNull();
+  });
+
+  it("logs a warning on timeout via the supplied logger", async () => {
+    const warn = vi.fn();
+    const slow = new Promise<string>((resolve) => setTimeout(() => resolve("late"), 100));
+    const racePromise = withTimeout(slow, 5, "test.warn", { warn });
+    await vi.advanceTimersByTimeAsync(5);
+    await racePromise;
+    expect(warn).toHaveBeenCalledTimes(1);
+    expect(warn.mock.calls[0][0]).toContain("test.warn");
+    expect(warn.mock.calls[0][0]).toContain("timed out");
+  });
+
+  it("does not time out when ms <= 0 (timeout disabled)", async () => {
+    const p = new Promise<string>((resolve) => setTimeout(() => resolve("done"), 5));
+    const racePromise = withTimeout(p, 0, "test.disabled");
+    await vi.advanceTimersByTimeAsync(5);
+    const result = await racePromise;
+    expect(result).toBe("done");
+  });
+
+  it("propagates rejections from the underlying promise unchanged", async () => {
+    const failing = Promise.reject(new Error("boom"));
+    await expect(withTimeout(failing, 100, "test.reject")).rejects.toThrow("boom");
+  });
+
+  it("simulates the auto-recall hang path: a 30s LLM call falls back well before completion", async () => {
+    // Mimic a slow recall LLM that would hang the gateway critical path.
+    const hangingLLM = new Promise<{ relevant: number[]; sufficient: boolean }>(
+      (resolve) => setTimeout(() => resolve({ relevant: [1, 2], sufficient: true }), 30_000),
+    );
+    const racePromise = withTimeout(hangingLLM, 8000, "auto-recall.filter");
+    // Advance just past the 8s timeout — the underlying 30s promise has not resolved yet.
+    await vi.advanceTimersByTimeAsync(8001);
+    const result = await racePromise;
+    expect(result).toBeNull();
+  });
+});