Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- fix(core): corrections now stored even when `LearningConfig::enabled = false` (closes #1910)
- fix(memory): sync session summaries to Qdrant on compact_context happy path (#1911) — `store_session_summary()` was only called in fallback branches; now also called after a successful `replace_conversation()` in both `compact_context` variants
- Wire `[agent.focus]` and `[memory.sidequest]` config to `AgentBuilder` in all bootstrap paths (`runner.rs`, `daemon.rs`, `acp.rs`); previously both configs were parsed but never applied, causing focus and sidequest to always use defaults (`enabled = false`) (closes #1907)
- fix(graph-memory): prevent structural noise from polluting `zeph_graph_entities` graph (closes #1912)
- Skip graph extraction entirely for `Role::User` messages containing `ToolResult` parts — tool outputs (TOML, JSON, command output) are structural data, not conversational content (FIX-1)
- Exclude `ToolResult` user messages from the context window passed to the extraction LLM call (FIX-2)
- Add `min_entity_name_bytes = 3` to `MemoryWriteValidationConfig` and enforce it in `validate_graph_extraction`; also added a matching guard in `EntityResolver::resolve()` via `MIN_ENTITY_NAME_BYTES` constant (FIX-3)
- Revise extraction prompt: restrict entity types to `person`, `project`, `technology`, `organization`, `concept`; add explicit rules against extracting structural data (config keys, file paths, tool names, TOML/JSON keys), short tokens, and raw command output (FIX-4)

### Security

Expand Down
130 changes: 123 additions & 7 deletions crates/zeph-core/src/agent/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,17 +392,35 @@ impl<C: Channel> Agent<C> {

self.check_summarization().await;

self.maybe_spawn_graph_extraction(content, has_injection_flags)
// FIX-1: skip graph extraction for tool result messages — they contain raw structured
// output (TOML, JSON, code) that pollutes the entity graph with noise.
let has_tool_result_parts = parts
.iter()
.any(|p| matches!(p, MessagePart::ToolResult { .. }));

self.maybe_spawn_graph_extraction(content, has_injection_flags, has_tool_result_parts)
.await;
}

async fn maybe_spawn_graph_extraction(&mut self, content: &str, has_injection_flags: bool) {
async fn maybe_spawn_graph_extraction(
&mut self,
content: &str,
has_injection_flags: bool,
has_tool_result_parts: bool,
) {
use zeph_memory::semantic::GraphExtractionConfig;

if self.memory_state.memory.is_none() || self.memory_state.conversation_id.is_none() {
return;
}

// FIX-1: skip extraction for tool result messages — raw tool output is structural data,
// not conversational content. Extracting entities from it produces graph noise.
if has_tool_result_parts {
tracing::debug!("graph extraction skipped: message contains ToolResult parts");
return;
}

// S2: skip extraction when injection flags detected — content is untrusted LLM input
if has_injection_flags {
tracing::warn!("graph extraction skipped: injection patterns detected in content");
Expand Down Expand Up @@ -434,12 +452,20 @@ impl<C: Channel> Agent<C> {
}
};

// M1: collect last 4 user messages as context for extraction
// FIX-2: collect last 4 genuine conversational user messages as context for extraction.
// Exclude tool result messages (Role::User with ToolResult parts) — they contain
// raw structured output and would pollute the extraction context with noise.
let context_messages: Vec<String> = self
.messages
.iter()
.rev()
.filter(|m| m.role == Role::User)
.filter(|m| {
m.role == Role::User
&& !m
.parts
.iter()
.any(|p| matches!(p, MessagePart::ToolResult { .. }))
})
.take(4)
.map(|m| m.content.clone())
.collect();
Expand Down Expand Up @@ -859,6 +885,7 @@ mod tests {
mod graph_extraction_guards {
use super::*;
use crate::config::GraphConfig;
use zeph_llm::provider::MessageMetadata;
use zeph_memory::graph::GraphStore;

fn enabled_graph_config() -> GraphConfig {
Expand Down Expand Up @@ -901,7 +928,9 @@ mod tests {
.pool()
.clone();

agent.maybe_spawn_graph_extraction("I use Rust", true).await;
agent
.maybe_spawn_graph_extraction("I use Rust", true, false)
.await;

// Give any accidental spawn time to settle.
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
Expand Down Expand Up @@ -933,7 +962,7 @@ mod tests {
.clone();

agent
.maybe_spawn_graph_extraction("I use Rust", false)
.maybe_spawn_graph_extraction("I use Rust", false, false)
.await;

tokio::time::sleep(std::time::Duration::from_millis(50)).await;
Expand Down Expand Up @@ -962,7 +991,7 @@ mod tests {
.clone();

agent
.maybe_spawn_graph_extraction("I use Rust for systems programming", false)
.maybe_spawn_graph_extraction("I use Rust for systems programming", false, false)
.await;

// Wait for the spawned task to complete.
Expand All @@ -975,6 +1004,93 @@ mod tests {
"happy-path extraction must increment extraction_count"
);
}

#[tokio::test]
async fn tool_result_parts_guard_skips_extraction() {
// FIX-1 regression: has_tool_result_parts=true → extraction must be skipped.
// Tool result messages contain raw structured output (TOML, JSON, code) — not
// conversational content. Extracting entities from them produces graph noise.
let provider = mock_provider(vec![]);
let mut agent = agent_with_graph(&provider, enabled_graph_config()).await;
let pool = agent
.memory_state
.memory
.as_ref()
.unwrap()
.sqlite()
.pool()
.clone();

agent
.maybe_spawn_graph_extraction(
"[tool_result: abc123]\nprovider_type = \"claude\"\nallowed_commands = []",
false,
true, // has_tool_result_parts
)
.await;

tokio::time::sleep(std::time::Duration::from_millis(50)).await;

let store = GraphStore::new(pool);
let count = store.get_metadata("extraction_count").await.unwrap();
assert!(
count.is_none(),
"tool result message must not trigger graph extraction"
);
}

#[tokio::test]
async fn context_filter_excludes_tool_result_messages() {
// FIX-2: context_messages must not include tool result user messages.
// When maybe_spawn_graph_extraction collects context, it filters out
// Role::User messages that contain ToolResult parts — only conversational
// user messages are included as extraction context.
//
// This test verifies the guard fires: a tool result message alone is passed
// (has_tool_result_parts=true) → extraction is skipped entirely, so context
// filtering is not exercised. We verify FIX-2 by ensuring a prior tool result
// message in agent.messages is excluded when a subsequent conversational message
// triggers extraction.
let provider = mock_provider(vec![]);
let mut agent = agent_with_graph(&provider, enabled_graph_config()).await;

// Add a tool result message to the agent's message history — this simulates
// a tool call response that arrived before the current conversational turn.
agent.messages.push(Message {
role: Role::User,
content: "[tool_result: abc]\nprovider_type = \"openai\"".to_owned(),
parts: vec![MessagePart::ToolResult {
tool_use_id: "abc".to_owned(),
content: "provider_type = \"openai\"".to_owned(),
is_error: false,
}],
metadata: MessageMetadata::default(),
});

let pool = agent
.memory_state
.memory
.as_ref()
.unwrap()
.sqlite()
.pool()
.clone();

// Trigger extraction for a conversational message (not a tool result).
agent
.maybe_spawn_graph_extraction("I prefer Rust for systems programming", false, false)
.await;

tokio::time::sleep(std::time::Duration::from_millis(200)).await;

// Extraction must have fired (conversational message, no injection flags).
let store = GraphStore::new(pool);
let count = store.get_metadata("extraction_count").await.unwrap();
assert!(
count.is_some(),
"conversational message must trigger extraction even with prior tool result in history"
);
}
}

#[tokio::test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ guard_memory_writes = true
[security.memory_validation]
enabled = true
max_content_bytes = 4096
min_entity_name_bytes = 3
max_entity_name_bytes = 256
max_fact_bytes = 1024
max_entities_per_extraction = 50
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ guard_memory_writes = true
[security.memory_validation]
enabled = true
max_content_bytes = 4096
min_entity_name_bytes = 3
max_entity_name_bytes = 256
max_fact_bytes = 1024
max_entities_per_extraction = 50
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ max_tool_retries = 2
tool_repeat_threshold = 2
max_retry_duration_secs = 30

[agent.focus]
enabled = false
compression_interval = 12
reminder_interval = 15
min_messages_per_focus = 8
max_knowledge_tokens = 4096

[llm]
provider = "ollama"
base_url = "http://localhost:11434"
Expand Down Expand Up @@ -121,8 +128,16 @@ sweep_interval_secs = 3600

[memory.compression]
strategy = "reactive"
pruning_strategy = "reactive"
model = ""

[memory.sidequest]
enabled = false
interval_turns = 4
max_eviction_ratio = 0.5
max_cursors = 30
min_cursor_tokens = 100

[memory.routing]
strategy = "heuristic"

Expand Down Expand Up @@ -257,6 +272,7 @@ guard_memory_writes = true
[security.memory_validation]
enabled = true
max_content_bytes = 4096
min_entity_name_bytes = 3
max_entity_name_bytes = 256
max_fact_bytes = 1024
max_entities_per_extraction = 50
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ guard_memory_writes = true
[security.memory_validation]
enabled = true
max_content_bytes = 4096
min_entity_name_bytes = 3
max_entity_name_bytes = 256
max_fact_bytes = 1024
max_entities_per_extraction = 50
Expand Down
44 changes: 41 additions & 3 deletions crates/zeph-core/src/sanitizer/memory_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ pub enum MemoryValidationError {
#[error("content too large: {size} bytes exceeds max {max}")]
ContentTooLarge { size: usize, max: usize },

#[error("entity name too short: '{name}' is below min {min} bytes")]
EntityNameTooShort { name: String, min: usize },

#[error("entity name too long: '{name}' exceeds max {max} bytes")]
EntityNameTooLong { name: String, max: usize },

Expand Down Expand Up @@ -74,6 +77,10 @@ fn default_max_entity_name_bytes() -> usize {
256
}

fn default_min_entity_name_bytes() -> usize {
3
}

fn default_max_fact_bytes() -> usize {
1024
}
Expand All @@ -99,6 +106,10 @@ pub struct MemoryWriteValidationConfig {
/// Maximum byte length of content passed to `memory_save`.
#[serde(default = "default_max_content_bytes")]
pub max_content_bytes: usize,
/// Minimum byte length of an entity name in graph extraction.
/// Names shorter than this are rejected as noise (e.g. "go", "cd").
#[serde(default = "default_min_entity_name_bytes")]
pub min_entity_name_bytes: usize,
/// Maximum byte length of a single entity name in graph extraction.
#[serde(default = "default_max_entity_name_bytes")]
pub max_entity_name_bytes: usize,
Expand All @@ -122,6 +133,7 @@ impl Default for MemoryWriteValidationConfig {
Self {
enabled: true,
max_content_bytes: default_max_content_bytes(),
min_entity_name_bytes: default_min_entity_name_bytes(),
max_entity_name_bytes: default_max_entity_name_bytes(),
max_fact_bytes: default_max_fact_bytes(),
max_entities_per_extraction: default_max_entities(),
Expand Down Expand Up @@ -212,7 +224,15 @@ impl MemoryWriteValidator {
}

for entity in &result.entities {
let name_len = entity.name.len();
// Trim before length checks: both min and max apply to the trimmed form
// to avoid rejecting names with leading/trailing whitespace.
let name_len = entity.name.trim().len();
if name_len < self.config.min_entity_name_bytes {
return Err(MemoryValidationError::EntityNameTooShort {
name: entity.name.clone(),
min: self.config.min_entity_name_bytes,
});
}
if name_len > self.config.max_entity_name_bytes {
return Err(MemoryValidationError::EntityNameTooLong {
name: entity.name.clone(),
Expand Down Expand Up @@ -338,7 +358,7 @@ mod tests {
max_entities_per_extraction: 2,
..MemoryWriteValidationConfig::default()
});
let r = result_with(vec![entity("A"), entity("B"), entity("C")], vec![]);
let r = result_with(vec![entity("Abc"), entity("Def"), entity("Ghi")], vec![]);
let err = v.validate_graph_extraction(&r).unwrap_err();
assert!(matches!(err, MemoryValidationError::TooManyEntities { .. }));
}
Expand Down Expand Up @@ -500,6 +520,24 @@ mod tests {
));
}

// --- min entity name length (FIX-3) ---

#[test]
fn entity_name_below_min_rejected() {
let r = result_with(vec![entity("go")], vec![]);
let err = validator().validate_graph_extraction(&r).unwrap_err();
assert!(matches!(
err,
MemoryValidationError::EntityNameTooShort { .. }
));
}

#[test]
fn entity_name_at_min_passes() {
let r = result_with(vec![entity("git")], vec![]);
assert!(validator().validate_graph_extraction(&r).is_ok());
}

// --- exact boundary: entities count ---

#[test]
Expand All @@ -508,7 +546,7 @@ mod tests {
max_entities_per_extraction: 3,
..MemoryWriteValidationConfig::default()
});
let r = result_with(vec![entity("A"), entity("B"), entity("C")], vec![]);
let r = result_with(vec![entity("Abc"), entity("Def"), entity("Ghi")], vec![]);
assert!(v.validate_graph_extraction(&r).is_ok());
}

Expand Down
Loading
Loading