Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/foundation/services/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from foundation.services.history import HistoryStore
from foundation.services.observer import EventSink, ObserverService
from foundation.services.planner import PlannerService, PlanningError
from foundation.services.provider import ProviderAdapter
from foundation.services.provider import ProviderAdapter, ProviderError
from foundation.services.shell import OutputCallback, ShellRuntime
from foundation.services.tools import LocalToolService
from foundation.settings import ApprovalMode
Expand Down Expand Up @@ -576,21 +576,31 @@ def orchestrate(self, request: UserRequest) -> OrchestrationResult:
)
return result
except Exception as exc:
# Preserve the raw (capped) provider response on parse/truncation
# failures so the persisted event log is self-diagnosing.
failure_extra: dict[str, str] = {}
if isinstance(exc, ProviderError) and exc.response_text:
failure_extra["response_text"] = exc.response_text[:4096]
self._observer.emit_exception(
EVENT_EXCEPTION,
exc,
payload={
"request_id": request_id,
"session_id": session_id,
"request_text": request.message,
**failure_extra,
},
session_id=session_id,
logger_name="foundation.services.orchestrator",
)
self._observer.emit_exception(
EVENT_PLAN_FAILED,
exc,
payload={"request_id": request_id, "session_id": session_id},
payload={
"request_id": request_id,
"session_id": session_id,
**failure_extra,
},
session_id=session_id,
logger_name="foundation.services.orchestrator",
)
Expand Down
20 changes: 15 additions & 5 deletions src/foundation/services/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,22 @@ def request_plan(
response = self._provider.complete(prompt)
except ProviderError as exc:
last_error = exc
if (
exc.code is ProviderErrorCode.INVALID_RESPONSE
and attempt < self._max_plan_attempts
):
repairable = exc.code in (
ProviderErrorCode.INVALID_RESPONSE,
ProviderErrorCode.TRUNCATED,
)
if repairable and attempt < self._max_plan_attempts:
if exc.code is ProviderErrorCode.TRUNCATED:
feedback = (
"Your previous response was truncated before the JSON closed. "
"Produce a SHORTER plan: do not inline large file contents — for "
"any sizable file body, omit `content` and provide a brief "
"`content_brief` describing what to write instead."
)
else:
feedback = "The previous response was not valid JSON."
supplemental_messages = self._repair_messages(
"The previous response was not valid JSON.",
feedback,
invalid_output=exc.response_text,
)
continue
Expand Down
55 changes: 52 additions & 3 deletions src/foundation/services/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ProviderErrorCode(StrEnum):
RATE_LIMIT = "rate_limit"
SERVER_ERROR = "server_error"
INVALID_RESPONSE = "invalid_response"
TRUNCATED = "truncated"
REFUSAL = "refusal"
UNSUPPORTED_PROVIDER = "unsupported_provider"

Expand Down Expand Up @@ -197,6 +198,7 @@ def __init__(
timeout_seconds: int = 60,
max_attempts: int = 3,
retry_backoff_seconds: float = 0.25,
max_output_tokens: int | None = None,
transport: JsonTransport | None = None,
) -> None:
self._model = model
Expand All @@ -205,6 +207,7 @@ def __init__(
self._timeout_seconds = timeout_seconds
self._max_attempts = max_attempts
self._retry_backoff_seconds = retry_backoff_seconds
self._max_output_tokens = max_output_tokens
self._transport = transport or UrllibJsonTransport()

def complete(self, prompt: ProviderPrompt) -> ProviderResponse:
Expand Down Expand Up @@ -333,6 +336,8 @@ def _build_payload(self, prompt: ProviderPrompt) -> dict[str, Any]:
for message in prompt.messages
],
}
if self._max_output_tokens is not None:
payload["max_output_tokens"] = self._max_output_tokens
if prompt.response_format is ProviderResponseFormat.JSON_OBJECT:
assert prompt.schema_name is not None
assert prompt.output_schema is not None
Expand All @@ -347,6 +352,21 @@ def _build_payload(self, prompt: ProviderPrompt) -> dict[str, Any]:
return payload

def _extract_content(self, payload: Mapping[str, Any]) -> str:
# An incomplete response means the model hit max_output_tokens; the
# output is partial. Flag it explicitly rather than parsing truncated JSON.
if payload.get("status") == "incomplete":
details = payload.get("incomplete_details")
reason = details.get("reason") if isinstance(details, Mapping) else None
if reason in (None, "max_output_tokens"):
raise ProviderError(
"Provider response was truncated before completion "
f"(status=incomplete, reason={reason}). Raise provider.max_output_tokens, "
"or have the planner emit a smaller plan (use content_brief for large "
"file bodies).",
code=ProviderErrorCode.TRUNCATED,
response_text=_coerce_optional_string(payload.get("output_text")),
)

top_level_text = payload.get("output_text")
if isinstance(top_level_text, str) and top_level_text.strip():
return top_level_text.strip()
Expand Down Expand Up @@ -423,6 +443,8 @@ def __init__(
timeout_seconds: int = 60,
max_attempts: int = 3,
retry_backoff_seconds: float = 0.25,
max_output_tokens: int | None = None,
num_ctx: int | None = None,
transport: JsonTransport | None = None,
) -> None:
self._model = model
Expand All @@ -431,6 +453,8 @@ def __init__(
self._timeout_seconds = timeout_seconds
self._max_attempts = max_attempts
self._retry_backoff_seconds = retry_backoff_seconds
self._max_output_tokens = max_output_tokens
self._num_ctx = num_ctx
self._transport = transport or UrllibJsonTransport()

def complete(self, prompt: ProviderPrompt) -> ProviderResponse:
Expand Down Expand Up @@ -586,14 +610,19 @@ def _build_payload(self, prompt: ProviderPrompt) -> dict[str, Any]:
],
"stream": False,
}
options: dict[str, Any] = {}
if self._max_output_tokens is not None:
options["num_predict"] = self._max_output_tokens
if self._num_ctx is not None:
options["num_ctx"] = self._num_ctx
if prompt.response_format is ProviderResponseFormat.JSON_OBJECT:
assert prompt.output_schema is not None
payload["format"] = prompt.output_schema
payload["options"] = {
"temperature": 0,
}
options["temperature"] = 0
if self._needs_think_for_structured_output(self._model):
payload["think"] = True
if options:
payload["options"] = options
return payload

def _extract_content(
Expand All @@ -604,6 +633,23 @@ def _extract_content(
) -> str:
json_requested = response_format is ProviderResponseFormat.JSON_OBJECT

# A truncated generation (model hit its output-token budget) leaves the
# JSON object unterminated. Surface that explicitly here instead of
# letting it fall through to a confusing json.loads failure downstream.
if payload.get("done_reason") == "length":
message = payload.get("message")
partial = ""
if isinstance(message, Mapping) and isinstance(message.get("content"), str):
partial = message["content"]
raise ProviderError(
"Provider response was truncated before completion (done_reason=length). "
"The model hit its output-token limit, so the response is incomplete. "
"Raise provider.max_output_tokens, or have the planner emit a smaller plan "
"(use content_brief for large file bodies).",
code=ProviderErrorCode.TRUNCATED,
response_text=partial,
)

# Standard Ollama local format: {"message": {"content": "...", "thinking": "..."}}
message = payload.get("message")
if isinstance(message, Mapping):
Expand Down Expand Up @@ -800,6 +846,8 @@ def build_provider_adapter(
api_key=api_key,
base_url=settings.provider.effective_base_url(),
timeout_seconds=settings.provider.request_timeout_seconds,
max_output_tokens=settings.provider.max_output_tokens,
num_ctx=settings.provider.num_ctx,
transport=transport,
)

Expand All @@ -808,5 +856,6 @@ def build_provider_adapter(
api_key=api_key or "",
base_url=settings.provider.effective_base_url(),
timeout_seconds=settings.provider.request_timeout_seconds,
max_output_tokens=settings.provider.max_output_tokens,
transport=transport,
)
2 changes: 2 additions & 0 deletions src/foundation/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ class ProviderSection(BaseModel):
model: str = "gpt-5-mini"
base_url: AnyUrl | None = None
request_timeout_seconds: PositiveInt = 60
max_output_tokens: PositiveInt | None = None
num_ctx: PositiveInt | None = None
api_key_env_var: str | None = OPENAI_DEFAULT_API_KEY_ENV_VAR
api_key_keychain: KeychainSecretRef | None = Field(default_factory=KeychainSecretRef)

Expand Down
36 changes: 36 additions & 0 deletions tests/test_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,42 @@ def test_orchestrator_retries_invalid_plans_without_duplicate_shell_execution(
assert result.execution_results[0].artifact["stdout"] == f"{workspace_root}\n"


def test_orchestrator_recovers_from_truncated_plan_with_content_brief_hint(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
from foundation.services.provider import ProviderError, ProviderErrorCode

class _TruncateThenSucceed:
def __init__(self) -> None:
self.calls: list[ProviderPrompt] = []
self._raised = False

def complete(self, prompt: ProviderPrompt) -> ProviderResponse:
self.calls.append(prompt)
if not self._raised:
self._raised = True
raise ProviderError(
"Provider response was truncated before completion (done_reason=length).",
code=ProviderErrorCode.TRUNCATED,
response_text='{"assistant_message":"writing","actions":[{"id":"w"',
)
return _provider_response({"assistant_message": "Done.", "actions": []})

provider = _TruncateThenSucceed()
orchestrator, runtime, _ = _orchestrator(tmp_path, monkeypatch, provider)

result = orchestrator.orchestrate(UserRequest(message="write a big file"))

# Truncated attempt + repaired retry, both within iteration 1's planning.
assert len(provider.calls) == 2
assert runtime.calls == 0
repair_text = "\n".join(m.content for m in provider.calls[1].messages)
assert "truncated" in repair_text.lower()
assert "content_brief" in repair_text
assert result.summary is not None


def test_orchestrator_retries_shell_cat_plan_without_executing_it(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
Expand Down
79 changes: 79 additions & 0 deletions tests/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,82 @@ def test_ollama_adapter_invalid_json_error_includes_raw() -> None:

assert exc_info.value.code is ProviderErrorCode.INVALID_RESPONSE
assert exc_info.value.response_text == garbage


def test_ollama_adapter_sets_num_predict_and_num_ctx() -> None:
transport = FakeTransport(
[{"message": {"role": "assistant", "content": '{"assistant_message":"ok","actions":[]}'}}]
)
adapter = OllamaChatAdapter(
model="glm-5.1:cloud",
base_url="http://localhost:11434/api",
max_output_tokens=2048,
num_ctx=8192,
transport=transport,
)

adapter.complete(_structured_prompt())

options = transport.calls[0]["payload"]["options"]
assert options["num_predict"] == 2048
assert options["num_ctx"] == 8192
assert options["temperature"] == 0


def test_ollama_adapter_raises_truncated_on_done_reason_length() -> None:
"""A length-truncated response surfaces a distinct TRUNCATED error, not a JSON parse error."""
partial = '{"assistant_message":"writing","actions":[{"id":"w","kind":"tool_call"'
transport = FakeTransport(
[{"message": {"role": "assistant", "content": partial}, "done_reason": "length"}]
)
adapter = OllamaChatAdapter(
model="kimi-k2.6:cloud",
base_url="http://localhost:11434/api",
max_output_tokens=128,
transport=transport,
)

with pytest.raises(ProviderError, match="truncated") as exc_info:
adapter.complete(_structured_prompt())

assert exc_info.value.code is ProviderErrorCode.TRUNCATED
assert exc_info.value.response_text == partial


def test_openai_adapter_sets_max_output_tokens() -> None:
transport = FakeTransport(
[{"id": "r", "output_text": '{"assistant_message":"ok","actions":[]}'}]
)
adapter = OpenAIResponsesAdapter(
model="gpt-5-mini",
api_key="sk-test",
max_output_tokens=4096,
transport=transport,
)

adapter.complete(_structured_prompt())

assert transport.calls[0]["payload"]["max_output_tokens"] == 4096


def test_openai_adapter_raises_truncated_on_incomplete_response() -> None:
transport = FakeTransport(
[
{
"id": "resp_1",
"status": "incomplete",
"incomplete_details": {"reason": "max_output_tokens"},
"output_text": '{"assistant_message":"partial"',
}
]
)
adapter = OpenAIResponsesAdapter(
model="gpt-5-mini",
api_key="sk-test",
transport=transport,
)

with pytest.raises(ProviderError, match="truncated") as exc_info:
adapter.complete(_structured_prompt())

assert exc_info.value.code is ProviderErrorCode.TRUNCATED
Loading