Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions scripts/ci-test-manifest.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,18 @@ export const CI_TEST_MANIFEST = [
// Issue #629 batch embedding fix
{ group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" },
// Issue #665 bulkStore tests
// Issue #690 cross-call batch accumulator tests
{ group: "storage-and-schema", runner: "node", file: "test/issue-690-cross-call-batch.test.mjs", args: ["--test"] },
// Issue #665 bulkStore tests (from upstream)
{ group: "storage-and-schema", runner: "node", file: "test/bulk-store.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/bulk-store-edge-cases.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] },
// Issue #680 regression tests (from upstream)
// Issue #680 regression tests
{ group: "core-regression", runner: "node", file: "test/memory-reflection-issue680-tdd.test.mjs", args: ["--test"] },
// Issue #606 SDK migration Bug 2 regression tests
{ group: "core-regression", runner: "node", file: "test/issue606_sdk-migration.test.mjs" },
// PR #713 inference regression tests - inferProviderFromBaseURL + model fallback
{ group: "core-regression", runner: "node", file: "test/infer-provider-from-baseurl.test.mjs" },
// Issue #736 recall governance - isRecallUsed() unit tests
{ group: "core-regression", runner: "node", file: "test/is-recall-used.test.mjs", args: ["--test"] },
// Issue #492 agentId validation tests
{ group: "core-regression", runner: "node", file: "test/agentid-validation.test.mjs", args: ["--test"] },
{ group: "core-regression", runner: "node", file: "test/command-reflection-guard.test.mjs", args: ["--test"] },
// Tier 1 memory counter fix
{ group: "core-regression", runner: "node", file: "test/tier1-counters.test.mjs", args: ["--test"] },
// Issue #693 extraction write validation tests
{ group: "core-regression", runner: "node", file: "test/extraction-validation.test.mjs", args: ["--test"] },
{ group: "core-regression", runner: "node", file: "test/dedup-false-alarm.test.mjs", args: ["--test"] },
Comment on lines +65 to +67
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Restore the unrelated regression tests in the CI manifest

This manifest is what the GitHub CI jobs run via npm run test:core-regression / test:storage-and-schema (.github/workflows/ci.yml -> scripts/run-ci-tests.mjs), so replacing the existing block with only the new Issue #693 tests silently stops CI from running the Issue #690, #606, provider inference, recall-governance, and tier1 counter regressions. Unless those tests are intentionally retired elsewhere, keep the existing entries and append the new validation tests so these regressions remain covered.

Useful? React with 👍 / 👎.

];

export function getEntriesForGroup(group) {
Expand All @@ -81,4 +73,4 @@ export function getEntriesForGroup(group) {
}

return CI_TEST_MANIFEST.filter((entry) => entry.group === group);
}
}
18 changes: 18 additions & 0 deletions src/memory-categories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,24 @@ export type ExtractionStats = {
superseded?: number; // temporal fact replacements
};

/**
* Payload delivered to `ExtractPersistOptions.onExtractionValidationFailed`
* when the number of entries actually written to the store differs from
* the number of candidates produced by the LLM.
*
* @see ExtractPersistOptions.onExtractionValidationFailed
*/
export type ExtractionValidation = {
/** Number of candidates the LLM intended to create (createEntries.length) */
expected: number;
/** Number of rows actually written (countAfter - countBefore) */
actual: number;
/** expected - actual; positive = under-write, negative = over-write (concurrent delete) */
mismatch: number;
/** Session key passed to extractAndPersist */
sessionKey: string;
};

/** Validate and normalize a category string. */
export function normalizeCategory(raw: string): MemoryCategory | null {
const lower = raw.toLowerCase().trim();
Expand Down
40 changes: 40 additions & 0 deletions src/smart-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,23 @@ export interface ExtractPersistOptions {
* - pass a non-empty array to restrict reads to those scopes
*/
scopeFilter?: string[];
/**
* Callback invoked when the number of entries actually written to the store
* differs from the number of LLM-generated candidates.
*
* This detects write-path anomalies including:
* - SIGKILL / OOM during bulkStore (partial write)
* - Concurrent compactor deletions (count window between bulkStore and count())
*
* The callback is NOT invoked when createEntries is empty (no-op write).
* The mismatch field: positive = under-write, negative = over-write (rare).
*/
onExtractionValidationFailed?: (validation: {
expected: number;
actual: number;
mismatch: number;
sessionKey: string;
}) => void;
}

export class SmartExtractor {
Expand Down Expand Up @@ -441,7 +458,30 @@ export class SmartExtractor {
}

if (createEntries.length > 0) {
// Phase 1: Verify write success via count-before/after.
// bulkStore is atomic per entry (randomUUID + table.add), so any
// discrepancy here indicates partial failure (SIGKILL/OOM).
// LIMITATION: This count-diff approach is only reliable under single-
// process assumption (no concurrent compactor/session writes during
// extractAndPersist). Concurrent writes inflate actualCreated and can
// cause false negatives (mismatch not detected). This is acceptable for
// Phase 1 since plugin architecture guarantees single-process extraction.
// Phase 2 will address concurrent-safe validation (UUID list compare).
const countBefore = await this.store.count();
await this.store.bulkStore(createEntries);
const countAfter = await this.store.count();
const actualCreated = countAfter - countBefore;
const expectedCreated = createEntries.length;

if (actualCreated !== expectedCreated) {
const mismatch = expectedCreated - actualCreated;
options.onExtractionValidationFailed?.({
expected: expectedCreated,
actual: actualCreated,
mismatch,
sessionKey,
});
}
}

return stats;
Expand Down
165 changes: 165 additions & 0 deletions test/dedup-false-alarm.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/**
* P0 驗證測試:確認 bulkStore 不會因 batchDedup 去重
*
* 問題:是否 bulkStore 內部呼叫 batchDedup,導致 near-duplicate entries
* 被錯誤過濾,造成 countAfter - countBefore < createEntries.length?
*
* 測試策略:
* 1. 直接構造兩個 cosine similarity = 0.95 的向量(保證是 near-duplicate)
* 2. 用 batchDedup 確認它們確實被視為 duplicates
* 3. 透過 bulkStore 寫入這兩個 entry
* 4. 驗證 count 增加 2(而非 1)→ 確認 bulkStore 不去重
*/

import { describe, it, afterEach } from "node:test";
import assert from "node:assert/strict";
import jitiFactory from "jiti";

const jiti = jitiFactory(import.meta.url, { interopDefault: true });
const { MemoryStore } = jiti("../src/store.ts");
const { batchDedup } = jiti("../src/batch-dedup.ts");

/** 256-dim constant vector with a controlled cosine similarity to a base vector */
function makeNearDuplicateVector(baseVec, similarity = 0.95) {
const dim = baseVec.length;
// Scale factor: cosine = scale * |base| / |target| = scale (since |base|=|target|=1)
// Actually: cos(base, target) = scale when target = scale * base + orth
const scale = similarity;
return baseVec.map(v => v * scale);
}

/** Two identical vectors — maximum similarity */
function makeIdenticalVector(dim = 256) {
const rng = makeRng(42);
return Array.from({ length: dim }, () => rng());
}

/** Seeded LCG RNG for deterministic vectors */
function makeRng(seed) {
let s = seed >>> 0;
return () => {
s = (Math.imul(1664525, s) + 1013904223) >>> 0;
return s / 0x100000000;
};
}

/** Cosine similarity between two vectors */
function cosineSimilarity(a, b) {
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}

const TEST_DB_PREFIX = "/tmp/test-dedup-p0-";

describe("P0: bulkStore does NOT deduplicate near-duplicate entries", () => {
/** @type {MemoryStore} */
let store;
let dbPath;

afterEach(async () => {
if (store) {
try { await store.deleteAll("test-session"); } catch {}
try { await store.destroy(); } catch {}
}
});

it("bulkStore writes both near-duplicate entries (cosine = 0.95)", async () => {
// Step 1: Create base vector and a near-duplicate (cosine = 0.95)
const dim = 256;
const baseVec = makeIdenticalVector(dim);
const dupVec = makeNearDuplicateVector(baseVec, 0.95);

const cosSim = cosineSimilarity(baseVec, dupVec);
console.log(`[P0] cosine similarity between base and near-duplicate: ${cosSim.toFixed(4)}`);
assert(cosSim > 0.94, `Near-duplicate should have cosine > 0.94, got ${cosSim}`);

// Step 2: Verify batchDedup marks one as duplicate
const dedupResult = batchDedup(
["abstract one", "abstract two"],
[baseVec, dupVec],
0.85 // default threshold
);
console.log(`[P0] batchDedup: ${dedupResult.inputCount} → ${dedupResult.outputCount}, duplicates=${JSON.stringify(dedupResult.duplicateIndices)}`);
assert(dedupResult.outputCount < dedupResult.inputCount,
`batchDedup should mark one as duplicate (input=${dedupResult.inputCount}, output=${dedupResult.outputCount})`);

// Step 3: Create MemoryStore and write both via bulkStore
dbPath = TEST_DB_PREFIX + Date.now() + "-1";
store = new MemoryStore({ dbPath, vectorDim: dim });
const countBefore = await store.count();

await store.bulkStore([
{
text: "Meeting attendance — quarterly business review",
vector: baseVec,
category: "fact",
scope: "test-session",
importance: 0.5,
metadata: JSON.stringify({ l0_abstract: "abstract one" }),
},
{
text: "Quarterly business review with team lead",
vector: dupVec,
category: "fact",
scope: "test-session",
importance: 0.5,
metadata: JSON.stringify({ l0_abstract: "abstract two" }),
},
]);

const countAfter = await store.count();
const delta = countAfter - countBefore;

console.log(`[P0 result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`);

// KEY ASSERTION: delta should be exactly 2 — bulkStore does NOT dedupe
assert.strictEqual(delta, 2,
`bulkStore should write both entries (delta=2), got delta=${delta}. ` +
`If delta=1, bulkStore is internally deduplicating near-duplicate entries — this is a P0 bug.`);
});

it("bulkStore writes all 5 entries even when batchDedup would reduce them to 1", async () => {
const dim = 256;

// Create 5 identical vectors — batchDedup with threshold 0.85 will keep only 1
const baseVec = makeIdenticalVector(dim);
const vectors = Array.from({ length: 5 }, () => baseVec);

const dedupResult = batchDedup(
Array(5).fill("abstract"),
vectors,
0.85
);
console.log(`[P0 batch] batchDedup: 5 identical vectors → ${dedupResult.outputCount} survivors`);
assert(dedupResult.outputCount < 5,
"Sanity: 5 identical vectors should produce < 5 survivors in batchDedup");

// Now write all 5 via bulkStore
dbPath = TEST_DB_PREFIX + Date.now() + "-2";
store = new MemoryStore({ dbPath, vectorDim: dim });
const countBefore = await store.count();

await store.bulkStore(vectors.map((v, i) => ({
text: `Event ${i + 1}`,
vector: v,
category: "fact",
scope: "test-session",
importance: 0.5,
metadata: JSON.stringify({ l0_abstract: `abstract ${i}` }),
})));

const countAfter = await store.count();
const delta = countAfter - countBefore;

console.log(`[P0 batch result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`);

// KEY ASSERTION: all 5 should be written despite batchDedup saying they're duplicates
assert.strictEqual(delta, 5,
`bulkStore should write all 5 entries even if batchDedup would drop 4 of them (delta=5), got delta=${delta}`);
});
});
Loading
Loading