-
Notifications
You must be signed in to change notification settings - Fork 7
fix: block recursive BrainLayer ingest #287
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a14c082
16ef432
b54c8ae
585cd9d
c73f013
c7b1869
7ae7c7e
80563c0
e1b8d1e
c57418c
12f5c16
36dda5f
3a0d745
e2accbc
25cbf6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| """Write-side guards for content that must never enter BrainLayer.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import re | ||
|
|
||
| _JSONRPC_MESSAGE_RE = re.compile(r'"jsonrpc"\s*:\s*"2\.0"', re.IGNORECASE) | ||
| _INVALID_JSONRPC_MARKER = "mcp brainlayer memory: invalid json-rpc message" | ||
| _BRAIN_SEARCH_BOX_PREFIX = "┌─ brain_search:" | ||
| _BRAINLAYER_BOX_PREFIX_RE = re.compile( | ||
| r"^┌─\s*(?:brain_[a-z_]+|entity(?:\s+search)?):", | ||
| re.IGNORECASE, | ||
| ) | ||
|
|
||
|
|
||
| def recursive_mcp_output_reason(content: str | None) -> str | None: | ||
| """Return a reason when content is BrainLayer MCP output being re-ingested.""" | ||
| if not content: | ||
| return None | ||
|
|
||
| stripped = str(content).lstrip() | ||
| if stripped.startswith(_BRAIN_SEARCH_BOX_PREFIX): | ||
| return "brain_search_output" | ||
| if _BRAINLAYER_BOX_PREFIX_RE.match(stripped): | ||
| return "brainlayer_mcp_output" | ||
|
|
||
| folded = stripped.casefold() | ||
| if _INVALID_JSONRPC_MARKER in folded: | ||
|
Comment on lines
+27
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a legitimate memory contains a JSON-RPC 2.0 example or debugging payload, this unconditional regex match makes Useful? React with 👍 / 👎. |
||
| return "invalid_jsonrpc_mcp_output" | ||
| if _JSONRPC_MESSAGE_RE.search(stripped): | ||
| return "jsonrpc_message" | ||
|
Comment on lines
+30
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a user stores any legitimate note or code snippet containing a JSON-RPC 2.0 payload, this branch classifies it as recursive MCP output; Useful? React with 👍 / 👎.
Comment on lines
+30
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This check treats any text containing a JSON-RPC 2.0 object as recursive MCP output. Because Useful? React with 👍 / 👎.
Comment on lines
+30
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This treats any content containing a JSON-RPC 2.0 object as recursive MCP output, so a legitimate memory such as an MCP implementation note or troubleshooting record with Useful? React with 👍 / 👎. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. JSON-RPC write guard blocks legitimate user contentMedium Severity The Additional Locations (1)Reviewed by Cursor Bugbot for commit 12f5c16. Configure here.
Comment on lines
+30
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This rejects any content containing a JSON-RPC 2.0 field, not just recursive BrainLayer MCP output. When a user stores or indexes legitimate MCP/JSON-RPC notes or examples such as Useful? React with 👍 / 👎. |
||
|
|
||
| return None | ||
|
|
||
|
|
||
| def reject_recursive_mcp_output(content: str | None) -> None: | ||
| """Raise ValueError when content is recursive BrainLayer MCP output.""" | ||
| reason = recursive_mcp_output_reason(content) | ||
| if reason: | ||
| raise ValueError(f"recursive MCP output is not stored in BrainLayer: {reason}") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -480,17 +480,27 @@ def set_entity_parent(self, entity_id: str, parent_id: str) -> None: | |
| (parent_id, entity_id), | ||
| ) | ||
|
|
||
| def get_entity_chunks(self, entity_id: str, limit: int = 20) -> List[Dict[str, Any]]: | ||
| def get_entity_chunks( | ||
| self, | ||
| entity_id: str, | ||
| limit: int = 20, | ||
| *, | ||
| include_audit: bool = False, | ||
| ) -> List[Dict[str, Any]]: | ||
| """Get chunks linked to an entity, ordered by relevance.""" | ||
| cursor = self._read_cursor() | ||
| where_clauses = ["ec.entity_id = ?"] | ||
| if not include_audit: | ||
| where_clauses.append(self._audit_recursion_exclusion_sql("c.id", "c.tags", "c.content")) | ||
| where_sql = " AND ".join(where_clauses) | ||
| rows = list( | ||
| cursor.execute( | ||
| """ | ||
| f""" | ||
| SELECT ec.chunk_id, ec.relevance, ec.context, ec.mention_type, | ||
| c.content, c.source_file, c.project, c.content_type, c.created_at | ||
| FROM kg_entity_chunks ec | ||
| JOIN chunks c ON ec.chunk_id = c.id | ||
| WHERE ec.entity_id = ? | ||
| WHERE {where_sql} | ||
| ORDER BY ec.relevance DESC | ||
| LIMIT ? | ||
| """, | ||
|
|
@@ -982,6 +992,7 @@ def kg_search( | |
| relation_type: Optional[str] = None, | ||
| limit: int = 20, | ||
| include_checkpoints: bool = False, | ||
| include_audit: bool = False, | ||
| ) -> List[Dict[str, Any]]: | ||
| """Structured KG fact retrieval. Excludes co_occurs_with noise.""" | ||
| results: List[Dict[str, Any]] = [] | ||
|
|
@@ -990,19 +1001,32 @@ def kg_search( | |
| if entity: | ||
| cursor = self._read_cursor() | ||
|
|
||
| checkpoint_join = "" | ||
| source_chunk_join = "" | ||
| checkpoint_filter = "" | ||
| audit_filter = "" | ||
| checkpoint_params: list[str] = [] | ||
| needs_source_chunk = ( | ||
| not include_checkpoints and getattr(self, "_has_chunk_origin", True) | ||
| ) or not include_audit | ||
| if needs_source_chunk: | ||
| source_chunk_join = "LEFT JOIN chunks source_chunk ON r.source_chunk_id = source_chunk.id" | ||
| if not include_checkpoints and getattr(self, "_has_chunk_origin", True): | ||
| checkpoint_join = "LEFT JOIN chunks source_chunk ON r.source_chunk_id = source_chunk.id" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dead
|
||
| checkpoint_filter = """ | ||
| checkpoint_clause = self._checkpoint_exclusion_clause("source_chunk") | ||
| checkpoint_filter = f""" | ||
| AND ( | ||
| r.source_chunk_id IS NULL | ||
| OR source_chunk.id IS NULL | ||
| OR ({checkpoint_clause}) | ||
| ) | ||
| """ | ||
| if not include_audit: | ||
| audit_filter = f""" | ||
| AND ( | ||
| r.source_chunk_id IS NULL | ||
| OR source_chunk.id IS NULL | ||
| OR COALESCE(source_chunk.chunk_origin, 'unknown') != ? | ||
| OR {self._audit_recursion_exclusion_sql("source_chunk.id", "source_chunk.tags", "source_chunk.content")} | ||
| ) | ||
| """ | ||
| checkpoint_params.append("precompact_checkpoint") | ||
|
|
||
| if relation_type: | ||
| type_filter_src = "AND r.relation_type = ?" | ||
|
|
@@ -1024,10 +1048,11 @@ def kg_search( | |
| FROM kg_current_facts r | ||
| JOIN kg_entities se ON r.source_id = se.id | ||
| JOIN kg_entities te ON r.target_id = te.id | ||
| {checkpoint_join} | ||
| {source_chunk_join} | ||
| WHERE ((r.source_id = ? {type_filter_src}) | ||
| OR (r.target_id = ? {type_filter_tgt})) | ||
| {checkpoint_filter} | ||
| {audit_filter} | ||
| ORDER BY r.importance DESC, r.confidence DESC | ||
| LIMIT ? | ||
| """, | ||
|
|
@@ -1088,6 +1113,7 @@ def kg_hybrid_search( | |
| relation_type=relation_type, | ||
| limit=n_results, | ||
| include_checkpoints=bool(kwargs.get("include_checkpoints", False)), | ||
| include_audit=bool(kwargs.get("include_audit", False)), | ||
| ) | ||
|
|
||
| scored_facts = [] | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 Low brainlayer/src/brainlayer/mcp/__init__.py Line 1333 in 25cbf6d
When 🚀 Reply "fix it for me" or copy this AI Prompt for your agent: |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When the watcher or
brain_storesees formatted output from other BrainLayer MCP paths, such asformat_entity_card()/format_kg_search()output beginning with┌─ Entity:or┌─ Entity search:, this guard returnsNonebecause it only recognizes thebrain_searchbox and the JSON-RPC markers. In that scenario the recursive MCP response can still be ingested, and the default search filters use the same detector so existing rows with those boxes are not hidden either.Useful? React with 👍 / 👎.