Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions strix/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from strix.config import Config
from strix.llm.config import LLMConfig
from strix.llm.memory_compressor import MemoryCompressor
from strix.llm.memory_compressor import MemoryCompressor, get_message_tokens
from strix.llm.utils import (
_truncate_to_first_function,
fix_incomplete_tool_call,
Expand Down Expand Up @@ -210,7 +210,12 @@ def _prepare_messages(self, conversation_history: list[dict[str, Any]]) -> list[
}
)

compressed = list(self.memory_compressor.compress_history(conversation_history))
reserved_tokens = sum(
get_message_tokens(msg, self.config.litellm_model) for msg in messages
)
compressed = list(
self.memory_compressor.compress_history(conversation_history, reserved_tokens)
)
conversation_history.clear()
conversation_history.extend(compressed)
messages.extend(compressed)
Expand Down
13 changes: 10 additions & 3 deletions strix/llm/memory_compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def _count_tokens(text: str, model: str) -> int:
return len(text) // 4 # Rough estimate


def _get_message_tokens(msg: dict[str, Any], model: str) -> int:
def get_message_tokens(msg: dict[str, Any], model: str) -> int:
content = msg.get("content", "")
if isinstance(content, str):
return _count_tokens(content, model)
Expand Down Expand Up @@ -166,9 +166,16 @@ def __init__(
def compress_history(
self,
messages: list[dict[str, Any]],
reserved_tokens: int = 0,
) -> list[dict[str, Any]]:
"""Compress conversation history to stay within token limits.

Args:
messages: Conversation history messages to compress.
reserved_tokens: Tokens already reserved for system prompt and
other framing messages outside the conversation history.
Subtracted from the budget before checking limits.

Comment on lines +173 to +178
Copy link

Copilot AI Mar 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring says reserved_tokens is “Subtracted from the budget before checking limits”, but the implementation adds it into total_tokens and compares against the fixed budget. Either update the wording to match the behavior (reserved tokens are included in the total prompt token count) or change the logic to compute an available_budget = MAX_TOTAL_TOKENS * 0.9 - reserved_tokens and compare history tokens against that.

Copilot uses AI. Check for mistakes.
Strategy:
1. Handle image limits first
2. Keep all system messages
Expand Down Expand Up @@ -201,8 +208,8 @@ def compress_history(
# Type assertion since we ensure model_name is not None in __init__
model_name: str = self.model_name # type: ignore[assignment]

total_tokens = sum(
_get_message_tokens(msg, model_name) for msg in system_msgs + regular_msgs
total_tokens = reserved_tokens + sum(
get_message_tokens(msg, model_name) for msg in system_msgs + regular_msgs
)

if total_tokens <= MAX_TOTAL_TOKENS * 0.9:
Expand Down