Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/brainlayer/drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
merge_existing_chunk_content,
merge_existing_chunk_seen,
)
from .ingest_guard import recursive_mcp_output_reason
from .paths import get_db_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -159,6 +160,10 @@ def _apply_store(conn: apsw.Connection, event: dict[str, Any]) -> ApplyResult:
if not content:
logger.warning("Skipping malformed store event with empty content")
return ApplyResult()
recursive_reason = recursive_mcp_output_reason(content)
if recursive_reason:
logger.warning("Skipping recursive MCP store event: %s", recursive_reason)
return ApplyResult()
now = datetime.now(timezone.utc).isoformat()
metadata = {"memory_type": event.get("memory_type", "note")}
raw_metadata = event.get("metadata")
Expand Down Expand Up @@ -239,6 +244,10 @@ def _apply_watcher(conn: apsw.Connection, event: dict[str, Any]) -> None:
if not content:
logger.warning("Skipping malformed watcher event with empty content")
return
recursive_reason = recursive_mcp_output_reason(content)
if recursive_reason:
logger.warning("Skipping recursive MCP watcher event: %s", recursive_reason)
return
tags = event.get("tags")
_insert_or_merge_chunk(
conn,
Expand Down Expand Up @@ -270,6 +279,10 @@ def _apply_hook(conn: apsw.Connection, event: dict[str, Any]) -> None:
if not content:
logger.warning("Skipping malformed hook event with empty content")
return
recursive_reason = recursive_mcp_output_reason(content)
if recursive_reason:
logger.warning("Skipping recursive MCP hook event: %s", recursive_reason)
return
content_hash = event.get("content_hash") or hashlib.sha256(content.encode()).hexdigest()[:16]
session_id = event.get("session_id") or "unknown"
chunk_id = event.get("chunk_id") or f"rt-{str(session_id)[:8]}-{content_hash}"
Expand Down
5 changes: 5 additions & 0 deletions src/brainlayer/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def think(
embed_fn: Any,
project: str | None = None,
max_results: int = 10,
include_audit: bool = False,
) -> ThinkResult:
"""Given current task context, retrieve relevant past knowledge.

Expand Down Expand Up @@ -206,6 +207,7 @@ def think(
n_results=max_results,
project_filter=project,
importance_min=3.0, # Skip low-importance noise
include_audit=include_audit,
)

if not results["documents"][0]:
Expand Down Expand Up @@ -239,6 +241,7 @@ def recall(
topic: str | None = None,
project: str | None = None,
max_results: int = 10,
include_audit: bool = False,
) -> RecallResult:
"""Proactive smart retrieval based on file or topic.

Expand Down Expand Up @@ -278,6 +281,7 @@ def recall(
query_text=fname,
n_results=max_results,
project_filter=project,
include_audit=include_audit,
)
for doc, meta in zip(search_results["documents"][0], search_results["metadatas"][0]):
result.related_chunks.append(
Expand All @@ -299,6 +303,7 @@ def recall(
query_text=topic,
n_results=max_results,
project_filter=project,
include_audit=include_audit,
)
for doc, meta in zip(search_results["documents"][0], search_results["metadatas"][0]):
result.related_chunks.append(
Expand Down
40 changes: 40 additions & 0 deletions src/brainlayer/ingest_guard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Write-side guards for content that must never enter BrainLayer."""

from __future__ import annotations

import re

_JSONRPC_MESSAGE_RE = re.compile(r'"jsonrpc"\s*:\s*"2\.0"', re.IGNORECASE)
_INVALID_JSONRPC_MARKER = "mcp brainlayer memory: invalid json-rpc message"
_BRAIN_SEARCH_BOX_PREFIX = "┌─ brain_search:"
_BRAINLAYER_BOX_PREFIX_RE = re.compile(
r"^┌─\s*(?:brain_[a-z_]+|entity(?:\s+search)?):",
re.IGNORECASE,
)


def recursive_mcp_output_reason(content: str | None) -> str | None:
"""Return a reason when content is BrainLayer MCP output being re-ingested."""
if not content:
return None

stripped = str(content).lstrip()
if stripped.startswith(_BRAIN_SEARCH_BOX_PREFIX):
return "brain_search_output"
Comment on lines +22 to +23
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reject non-search BrainLayer MCP boxes

When the watcher or brain_store sees formatted output from other BrainLayer MCP paths, such as format_entity_card()/format_kg_search() output beginning with ┌─ Entity: or ┌─ Entity search:, this guard returns None because it only recognizes the brain_search box and the JSON-RPC markers. In that scenario the recursive MCP response can still be ingested, and the default search filters use the same detector so existing rows with those boxes are not hidden either.

Useful? React with 👍 / 👎.

if _BRAINLAYER_BOX_PREFIX_RE.match(stripped):
return "brainlayer_mcp_output"

folded = stripped.casefold()
if _INVALID_JSONRPC_MARKER in folded:
Comment on lines +27 to +28
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Restrict JSON-RPC rejection to BrainLayer output

When a legitimate memory contains a JSON-RPC 2.0 example or debugging payload, this unconditional regex match makes store_memory, upsert_chunks, watcher/drain ingestion, and updates reject or skip it as recursive MCP output even if it is just technical content. Please gate this on BrainLayer/MCP output markers or BrainLayer tool names rather than any "jsonrpc":"2.0" occurrence so valid protocol notes are not silently dropped.

Useful? React with 👍 / 👎.

return "invalid_jsonrpc_mcp_output"
if _JSONRPC_MESSAGE_RE.search(stripped):
return "jsonrpc_message"
Comment on lines +30 to +31
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Narrow the JSON-RPC recursion guard

When a user stores any legitimate note or code snippet containing a JSON-RPC 2.0 payload, this branch classifies it as recursive MCP output; reject_recursive_mcp_output() is now called from brain_store/store_memory/VectorStore.add_chunks, and the drain silently skips the same content, so memories about MCP/JSON-RPC requests that are not BrainLayer output can no longer be stored. The check needs an additional BrainLayer/MCP-output marker instead of treating every "jsonrpc":"2.0" object as recursion.

Useful? React with 👍 / 👎.

Comment on lines +30 to +31
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Narrow JSON-RPC guard to BrainLayer outputs

This check treats any text containing a JSON-RPC 2.0 object as recursive MCP output. Because recursive_mcp_output_reason() is now called by brain_store, the drain paths, watcher ingestion, and the default search filters, a legitimate memory such as notes or code snippets documenting MCP/JSON-RPC requests (e.g. {"jsonrpc":"2.0","method":"..."}) will either be rejected on write or hidden from search even when it is not BrainLayer output. Please scope this predicate to the BrainLayer error/output envelope rather than every JSON-RPC example.

Useful? React with 👍 / 👎.

Comment on lines +30 to +31
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Narrow the JSON-RPC recursion guard

This treats any content containing a JSON-RPC 2.0 object as recursive MCP output, so a legitimate memory such as an MCP implementation note or troubleshooting record with {"jsonrpc":"2.0","method":"tools/call"} is rejected by brain_store/upsert and hidden by the matching search filters. The recursive-output cases already have BrainLayer box/invalid-message markers, so the generic JSON-RPC predicate should be scoped to those envelopes instead of blocking all JSON-RPC examples.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JSON-RPC write guard blocks legitimate user content

Medium Severity

The _JSONRPC_MESSAGE_RE pattern ("jsonrpc"\s*:\s*"2\.0") is significantly broader than the other guards. While the box-drawing prefix and "MCP BrainLayer Memory" checks are specific to BrainLayer's own output format, this regex matches any content containing a JSON-RPC 2.0 payload substring. A user storing a note like "Learned that MCP uses JSON-RPC: {\"jsonrpc\": \"2.0\", ...}" would be rejected by reject_recursive_mcp_output with no override, unlike the search-side include_audit opt-in. The write guard is permanent and has no bypass.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 12f5c16. Configure here.

Comment on lines +30 to +31
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Narrow the JSON-RPC guard to BrainLayer output

This rejects any content containing a JSON-RPC 2.0 field, not just recursive BrainLayer MCP output. When a user stores or indexes legitimate MCP/JSON-RPC notes or examples such as {"jsonrpc":"2.0","method":"tools/call"}, store_memory, watcher ingestion, and drain all call this guard and will raise or silently drop that memory, so BrainLayer loses valid technical recall for JSON-RPC work.

Useful? React with 👍 / 👎.


return None


def reject_recursive_mcp_output(content: str | None) -> None:
"""Raise ValueError when content is recursive BrainLayer MCP output."""
reason = recursive_mcp_output_reason(content)
if reason:
raise ValueError(f"recursive MCP output is not stored in BrainLayer: {reason}")
44 changes: 35 additions & 9 deletions src/brainlayer/kg_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,17 +480,27 @@ def set_entity_parent(self, entity_id: str, parent_id: str) -> None:
(parent_id, entity_id),
)

def get_entity_chunks(self, entity_id: str, limit: int = 20) -> List[Dict[str, Any]]:
def get_entity_chunks(
self,
entity_id: str,
limit: int = 20,
*,
include_audit: bool = False,
) -> List[Dict[str, Any]]:
"""Get chunks linked to an entity, ordered by relevance."""
cursor = self._read_cursor()
where_clauses = ["ec.entity_id = ?"]
if not include_audit:
where_clauses.append(self._audit_recursion_exclusion_sql("c.id", "c.tags", "c.content"))
where_sql = " AND ".join(where_clauses)
rows = list(
cursor.execute(
"""
f"""
SELECT ec.chunk_id, ec.relevance, ec.context, ec.mention_type,
c.content, c.source_file, c.project, c.content_type, c.created_at
FROM kg_entity_chunks ec
JOIN chunks c ON ec.chunk_id = c.id
WHERE ec.entity_id = ?
WHERE {where_sql}
ORDER BY ec.relevance DESC
LIMIT ?
""",
Expand Down Expand Up @@ -982,6 +992,7 @@ def kg_search(
relation_type: Optional[str] = None,
limit: int = 20,
include_checkpoints: bool = False,
include_audit: bool = False,
) -> List[Dict[str, Any]]:
"""Structured KG fact retrieval. Excludes co_occurs_with noise."""
results: List[Dict[str, Any]] = []
Expand All @@ -990,19 +1001,32 @@ def kg_search(
if entity:
cursor = self._read_cursor()

checkpoint_join = ""
source_chunk_join = ""
checkpoint_filter = ""
audit_filter = ""
checkpoint_params: list[str] = []
needs_source_chunk = (
not include_checkpoints and getattr(self, "_has_chunk_origin", True)
) or not include_audit
if needs_source_chunk:
source_chunk_join = "LEFT JOIN chunks source_chunk ON r.source_chunk_id = source_chunk.id"
if not include_checkpoints and getattr(self, "_has_chunk_origin", True):
checkpoint_join = "LEFT JOIN chunks source_chunk ON r.source_chunk_id = source_chunk.id"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dead checkpoint_params variable never populated after refactor

Low Severity

checkpoint_params: list[str] = [] is initialized but never populated — the old checkpoint_params.append("precompact_checkpoint") was removed because _checkpoint_exclusion_clause now generates SQL without ? placeholders. The variable is spread into params via *checkpoint_params, which always evaluates to nothing. This dead code creates a misleading impression that params might still include checkpoint filter values.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 7ae7c7e. Configure here.

checkpoint_filter = """
checkpoint_clause = self._checkpoint_exclusion_clause("source_chunk")
checkpoint_filter = f"""
AND (
r.source_chunk_id IS NULL
OR source_chunk.id IS NULL
OR ({checkpoint_clause})
)
"""
if not include_audit:
audit_filter = f"""
AND (
r.source_chunk_id IS NULL
OR source_chunk.id IS NULL
OR COALESCE(source_chunk.chunk_origin, 'unknown') != ?
OR {self._audit_recursion_exclusion_sql("source_chunk.id", "source_chunk.tags", "source_chunk.content")}
)
"""
checkpoint_params.append("precompact_checkpoint")

if relation_type:
type_filter_src = "AND r.relation_type = ?"
Expand All @@ -1024,10 +1048,11 @@ def kg_search(
FROM kg_current_facts r
JOIN kg_entities se ON r.source_id = se.id
JOIN kg_entities te ON r.target_id = te.id
{checkpoint_join}
{source_chunk_join}
WHERE ((r.source_id = ? {type_filter_src})
OR (r.target_id = ? {type_filter_tgt}))
{checkpoint_filter}
{audit_filter}
ORDER BY r.importance DESC, r.confidence DESC
LIMIT ?
""",
Expand Down Expand Up @@ -1088,6 +1113,7 @@ def kg_hybrid_search(
relation_type=relation_type,
limit=n_results,
include_checkpoints=bool(kwargs.get("include_checkpoints", False)),
include_audit=bool(kwargs.get("include_audit", False)),
)

scored_facts = []
Expand Down
18 changes: 18 additions & 0 deletions src/brainlayer/mcp/__init__.py
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Low

elif name == "brain_recall":

When brain_recall is called with source_filter or correction_category parameters, they are silently dropped and not forwarded to _brain_recall(). This means filtered recall queries return unfiltered results, breaking the caller's intent. Consider adding source_filter and correction_category to the _brain_recall() call at lines 1333-1364.

🚀 Reply "fix it for me" or copy this AI Prompt for your agent:
In file src/brainlayer/mcp/__init__.py around line 1333:

When `brain_recall` is called with `source_filter` or `correction_category` parameters, they are silently dropped and not forwarded to `_brain_recall()`. This means filtered recall queries return unfiltered results, breaking the caller's intent. Consider adding `source_filter` and `correction_category` to the `_brain_recall()` call at lines 1333-1364.

Evidence trail:
src/brainlayer/mcp/__init__.py lines 1333-1364 (brain_recall dispatcher - no source_filter or correction_category); src/brainlayer/mcp/__init__.py lines 1242-1275 (brain_search dispatcher - passes source_filter line 1270, correction_category line 1271); src/brainlayer/mcp/search_handler.py lines 923-954 (_brain_recall signature - accepts source_filter line 951, correction_category line 952); src/brainlayer/mcp/__init__.py lines 689-843 (brain_recall tool schema - no source_filter or correction_category properties)

Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,11 @@ async def list_tools() -> list[Tool]:
"default": False,
"description": "Include PreCompact checkpoint chunks in search results. Defaults to false; use brain_resume for explicit session recovery.",
},
"include_audit": {
"type": "boolean",
"default": False,
"description": "Opt in to audit/eval and recursive MCP-output memories. Defaults false to prevent audit-recursion pollution.",
},
"detail": {
"type": "string",
"enum": ["compact", "full"],
Expand Down Expand Up @@ -828,6 +833,11 @@ async def list_tools() -> list[Tool]:
"default": False,
"description": "Include PreCompact checkpoint chunks in mode=search results. Defaults to false; use brain_resume for explicit session recovery.",
},
"include_audit": {
"type": "boolean",
"default": False,
"description": "Opt in to audit/eval and recursive MCP-output memories in mode=search. Defaults false to prevent audit-recursion pollution.",
},
},
}
),
Expand Down Expand Up @@ -934,6 +944,11 @@ async def list_tools() -> list[Tool]:
"minimum": 0,
"description": "Pagination offset for list action.",
},
"include_audit": {
"type": "boolean",
"default": False,
"description": "Opt in to audit/eval and recursive MCP-output evidence. Defaults false to prevent audit-recursion pollution.",
},
},
"required": [],
}
Expand Down Expand Up @@ -1255,6 +1270,7 @@ async def call_tool(name: str, arguments: dict[str, Any]):
source_filter=resolved_source_filter,
correction_category=arguments.get("correction_category"),
include_checkpoints=arguments.get("include_checkpoints", False),
include_audit=arguments.get("include_audit", False),
)
)

Expand Down Expand Up @@ -1343,6 +1359,7 @@ async def call_tool(name: str, arguments: dict[str, Any]):
detail=arguments.get("detail", "compact"),
entity_type=arguments.get("entity_type"),
include_checkpoints=arguments.get("include_checkpoints", False),
include_audit=arguments.get("include_audit", False),
)
)

Expand Down Expand Up @@ -1395,6 +1412,7 @@ async def call_tool(name: str, arguments: dict[str, Any]):
mode="entity",
query=query,
entity_type=arguments.get("entity_type"),
include_audit=arguments.get("include_audit", False),
)
)

Expand Down
2 changes: 2 additions & 0 deletions src/brainlayer/mcp/entity_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
async def _brain_entity(
query: str,
entity_type: str | None = None,
include_audit: bool = False,
) -> CallToolResult:
"""Handle brain_entity tool call."""
from ..pipeline.digest import entity_lookup
Expand All @@ -32,6 +33,7 @@ async def _brain_entity(
store=store,
embed_fn=model.embed_query,
entity_type=entity_type,
include_audit=include_audit,
),
)
except Exception as e:
Expand Down
Loading
Loading