Skip to content

Commit 291be21

Browse files
smflorentinoclaude
andcommitted
refactor: extract LLM call logging into shared evaluator_utils helper
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8725ce0 commit 291be21

7 files changed

Lines changed: 244 additions & 84 deletions

File tree

packages/uipath/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.10.34"
3+
version = "2.10.35"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""Internal utilities shared across evaluators."""
2+
3+
import copy
4+
import json
5+
import logging
6+
from collections.abc import Callable
7+
from typing import Any
8+
9+
from ..models.models import UiPathEvaluationError, UiPathEvaluationErrorCategory
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
async def _call_llm_with_logging(
15+
llm_service: Callable[..., Any],
16+
request_data: dict[str, Any],
17+
model: str,
18+
) -> Any:
19+
"""Call the LLM service with detailed request/response logging and error handling.
20+
21+
Args:
22+
llm_service: The LLM chat completions callable
23+
request_data: The request payload to send
24+
model: The model name (for logging)
25+
26+
Returns:
27+
The raw LLM response
28+
29+
Raises:
30+
UiPathEvaluationError: If the LLM call fails
31+
"""
32+
# Log the request details
33+
logger.info(
34+
f"🤖 Calling LLM evaluator with model: {model} (using function calling)"
35+
)
36+
logger.debug(
37+
f"Request data: model={model}, tool_choice=required"
38+
)
39+
40+
# Log full request body for debugging
41+
request_body_for_log = copy.deepcopy(request_data)
42+
if "tool_choice" in request_body_for_log:
43+
request_body_for_log["tool_choice"] = request_body_for_log[
44+
"tool_choice"
45+
].model_dump()
46+
if "tools" in request_body_for_log:
47+
request_body_for_log["tools"] = [
48+
t.model_dump() for t in request_body_for_log["tools"]
49+
]
50+
logger.info(
51+
f"📤 Full request body:\n{json.dumps(request_body_for_log, indent=2)}"
52+
)
53+
54+
try:
55+
response = await llm_service(**request_data)
56+
except Exception as e:
57+
logger.error("=" * 80)
58+
logger.error("❌ LLM REQUEST FAILED")
59+
logger.error("=" * 80)
60+
logger.error(f"Model: {model}")
61+
logger.error("API Endpoint: Normalized API (/llm/api/chat/completions)")
62+
logger.error(f"Error Type: {type(e).__name__}")
63+
logger.error(f"Error Message: {str(e)}")
64+
65+
if hasattr(e, "response"):
66+
logger.error(
67+
f"HTTP Status Code: {e.response.status_code if hasattr(e.response, 'status_code') else 'N/A'}"
68+
)
69+
try:
70+
error_body = (
71+
e.response.json()
72+
if hasattr(e.response, "json")
73+
else str(e.response.content)
74+
)
75+
logger.error(
76+
f"Response Body: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}"
77+
)
78+
except Exception:
79+
logger.error(
80+
f"Response Body: {str(e.response.content) if hasattr(e.response, 'content') else 'N/A'}"
81+
)
82+
83+
logger.error(f"Request Details: model={model}, tool_choice=required")
84+
logger.error("=" * 80)
85+
86+
raise UiPathEvaluationError(
87+
code="FAILED_TO_GET_LLM_RESPONSE",
88+
title="Failed to get LLM response",
89+
detail=f"Model: {model}, Error: {type(e).__name__}: {str(e)}",
90+
category=UiPathEvaluationErrorCategory.SYSTEM,
91+
) from e
92+
93+
logger.info(f"✅ LLM response received successfully from {model}")
94+
logger.debug(f"Response: {response}")
95+
96+
return response

packages/uipath/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
LegacyEvaluationCriteria,
2727
LegacyEvaluatorConfig,
2828
)
29+
from .evaluator_utils import _call_llm_with_logging
2930
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
3031

3132
logger = logging.getLogger(__name__)
@@ -211,5 +212,7 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
211212
}
212213

213214
assert self.llm, "LLM should be initialized before calling this method."
214-
response = await self.llm.chat_completions(**request_data)
215+
response = await _call_llm_with_logging(
216+
self.llm.chat_completions, request_data, model
217+
)
215218
return extract_tool_call_response(response, model)

packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
LegacyEvaluationCriteria,
2828
LegacyEvaluatorConfig,
2929
)
30+
from .evaluator_utils import _call_llm_with_logging
3031
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
3132

3233
logger = logging.getLogger(__name__)
@@ -181,5 +182,7 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
181182
"tool_choice": tool_choice,
182183
}
183184

184-
response = await self.llm.chat_completions(**request_data)
185+
response = await _call_llm_with_logging(
186+
self.llm.chat_completions, request_data, model
187+
)
185188
return extract_tool_call_response(response, model)

packages/uipath/src/uipath/eval/evaluators/llm_as_judge_evaluator.py

Lines changed: 3 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
"""LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""
22

3-
import copy
4-
import json
53
import logging
64
from abc import abstractmethod
75
from collections.abc import Callable
@@ -31,6 +29,7 @@
3129
BaseEvaluatorConfig,
3230
BaseEvaluatorJustification,
3331
)
32+
from .evaluator_utils import _call_llm_with_logging
3433

3534
T = TypeVar("T", bound=BaseEvaluationCriteria)
3635

@@ -278,84 +277,9 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
278277
category=UiPathEvaluationErrorCategory.SYSTEM,
279278
)
280279

281-
# Log the request details (exclude non-JSON-serializable objects)
282-
logger.info(
283-
f"🤖 Calling LLM evaluator with model: {model} (using function calling)"
280+
response = await _call_llm_with_logging(
281+
self.llm_service, request_data, model
284282
)
285-
max_tokens_str = (
286-
str(max_tokens_value) if max_tokens_value is not None else "unset"
287-
)
288-
logger.debug(
289-
f"Request data: model={model}, max_tokens={max_tokens_str}, temperature={self.evaluator_config.temperature}, tool_choice=required"
290-
)
291-
292-
# Log full request body for debugging
293-
request_body_for_log = copy.deepcopy(request_data)
294-
# Convert tool_choice to dict for logging
295-
if "tool_choice" in request_body_for_log:
296-
request_body_for_log["tool_choice"] = request_body_for_log[
297-
"tool_choice"
298-
].model_dump()
299-
# Convert tools to dict for logging
300-
if "tools" in request_body_for_log:
301-
request_body_for_log["tools"] = [
302-
t.model_dump() for t in request_body_for_log["tools"]
303-
]
304-
logger.info(
305-
f"📤 Full request body:\n{json.dumps(request_body_for_log, indent=2)}"
306-
)
307-
308-
try:
309-
response = await self.llm_service(**request_data)
310-
except Exception as e:
311-
# Enhanced error logging with details
312-
logger.error("=" * 80)
313-
logger.error("❌ LLM REQUEST FAILED")
314-
logger.error("=" * 80)
315-
logger.error(f"Model: {model}")
316-
logger.error("API Endpoint: Normalized API (/llm/api/chat/completions)")
317-
logger.error(f"Error Type: {type(e).__name__}")
318-
logger.error(f"Error Message: {str(e)}")
319-
320-
# Try to extract HTTP error details if available
321-
if hasattr(e, "response"):
322-
logger.error(
323-
f"HTTP Status Code: {e.response.status_code if hasattr(e.response, 'status_code') else 'N/A'}"
324-
)
325-
try:
326-
error_body = (
327-
e.response.json()
328-
if hasattr(e.response, "json")
329-
else str(e.response.content)
330-
)
331-
logger.error(
332-
f"Response Body: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}"
333-
)
334-
except Exception:
335-
logger.error(
336-
f"Response Body: {str(e.response.content) if hasattr(e.response, 'content') else 'N/A'}"
337-
)
338-
339-
max_tokens_str = (
340-
str(self.evaluator_config.max_tokens)
341-
if self.evaluator_config.max_tokens is not None
342-
else "unset"
343-
)
344-
logger.error(
345-
f"Request Details: model={model}, max_tokens={max_tokens_str}, temperature={self.evaluator_config.temperature}, tool_choice=required"
346-
)
347-
logger.error("=" * 80)
348-
349-
raise UiPathEvaluationError(
350-
code="FAILED_TO_GET_LLM_RESPONSE",
351-
title="Failed to get LLM response",
352-
detail=f"Model: {model}, Error: {type(e).__name__}: {str(e)}",
353-
category=UiPathEvaluationErrorCategory.SYSTEM,
354-
) from e
355-
356-
# Log successful response
357-
logger.info(f"✅ LLM response received successfully from {model}")
358-
logger.debug(f"Response: {response}")
359283

360284
# Extract structured output from tool call
361285
return self._extract_tool_call_response(response, model)
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""Tests for evaluator_utils._call_llm_with_logging helper."""
2+
3+
import logging
4+
from typing import Any
5+
from unittest.mock import MagicMock
6+
7+
import pytest
8+
9+
from uipath.eval.evaluators.evaluator_utils import _call_llm_with_logging
10+
from uipath.eval.models.models import UiPathEvaluationError
11+
12+
LOGGER_NAME = "uipath.eval.evaluators.evaluator_utils"
13+
14+
15+
def _make_request_data() -> dict[str, Any]:
16+
"""Create minimal request_data for tests."""
17+
return {
18+
"model": "gpt-4o",
19+
"messages": [{"role": "user", "content": "test"}],
20+
"tools": [],
21+
"tool_choice": MagicMock(model_dump=lambda: {"type": "required"}),
22+
}
23+
24+
25+
class TestCallLlmWithLogging:
26+
"""Tests for _call_llm_with_logging."""
27+
28+
@pytest.mark.asyncio
29+
async def test_success_returns_response(self) -> None:
30+
"""Test that a successful LLM call returns the response unchanged."""
31+
expected_response = MagicMock()
32+
33+
async def mock_llm_service(**kwargs: Any) -> Any:
34+
return expected_response
35+
36+
result = await _call_llm_with_logging(
37+
mock_llm_service, _make_request_data(), "gpt-4o"
38+
)
39+
assert result is expected_response
40+
41+
@pytest.mark.asyncio
42+
async def test_passes_request_data_to_llm_service(self) -> None:
43+
"""Test that request_data kwargs are forwarded to the LLM service."""
44+
captured_kwargs: dict[str, Any] = {}
45+
46+
async def mock_llm_service(**kwargs: Any) -> Any:
47+
captured_kwargs.update(kwargs)
48+
return MagicMock()
49+
50+
request_data = _make_request_data()
51+
request_data["temperature"] = 0.5
52+
53+
await _call_llm_with_logging(mock_llm_service, request_data, "gpt-4o")
54+
assert captured_kwargs["model"] == "gpt-4o"
55+
assert captured_kwargs["temperature"] == 0.5
56+
57+
@pytest.mark.asyncio
58+
async def test_plain_exception_wraps_in_evaluation_error(self) -> None:
59+
"""Test that a plain exception is wrapped in UiPathEvaluationError."""
60+
61+
async def mock_llm_service(**kwargs: Any) -> Any:
62+
raise RuntimeError("connection refused")
63+
64+
with pytest.raises(UiPathEvaluationError) as exc_info:
65+
await _call_llm_with_logging(
66+
mock_llm_service, _make_request_data(), "gpt-4o"
67+
)
68+
69+
error = exc_info.value
70+
assert error.error_info.code == "Python.FAILED_TO_GET_LLM_RESPONSE"
71+
assert "gpt-4o" in error.error_info.detail
72+
assert "RuntimeError" in error.error_info.detail
73+
assert "connection refused" in error.error_info.detail
74+
assert isinstance(error.__cause__, RuntimeError)
75+
76+
@pytest.mark.asyncio
77+
async def test_http_error_includes_status_in_logs(self) -> None:
78+
"""Test that an exception with .response logs HTTP status code and body."""
79+
mock_response = MagicMock()
80+
mock_response.status_code = 429
81+
mock_response.json.return_value = {"error": "rate limited"}
82+
83+
async def mock_llm_service(**kwargs: Any) -> Any:
84+
exc = Exception("Too Many Requests")
85+
exc.response = mock_response # type: ignore[attr-defined]
86+
raise exc
87+
88+
logger = logging.getLogger(LOGGER_NAME)
89+
logged_messages: list[str] = []
90+
handler = logging.Handler()
91+
handler.emit = lambda record: logged_messages.append(record.getMessage()) # type: ignore[assignment]
92+
logger.addHandler(handler)
93+
94+
try:
95+
with pytest.raises(UiPathEvaluationError):
96+
await _call_llm_with_logging(
97+
mock_llm_service, _make_request_data(), "gpt-4o"
98+
)
99+
finally:
100+
logger.removeHandler(handler)
101+
102+
all_logs = "\n".join(logged_messages)
103+
assert "429" in all_logs
104+
assert "rate limited" in all_logs
105+
106+
@pytest.mark.asyncio
107+
async def test_http_error_json_parse_failure_falls_back_to_content(self) -> None:
108+
"""Test fallback to .content when .json() raises."""
109+
mock_response = MagicMock()
110+
mock_response.status_code = 500
111+
mock_response.json.side_effect = ValueError("not json")
112+
mock_response.content = b"Internal Server Error"
113+
114+
async def mock_llm_service(**kwargs: Any) -> Any:
115+
exc = Exception("Server Error")
116+
exc.response = mock_response # type: ignore[attr-defined]
117+
raise exc
118+
119+
logger = logging.getLogger(LOGGER_NAME)
120+
logged_messages: list[str] = []
121+
handler = logging.Handler()
122+
handler.emit = lambda record: logged_messages.append(record.getMessage()) # type: ignore[assignment]
123+
logger.addHandler(handler)
124+
125+
try:
126+
with pytest.raises(UiPathEvaluationError):
127+
await _call_llm_with_logging(
128+
mock_llm_service, _make_request_data(), "gpt-4o"
129+
)
130+
finally:
131+
logger.removeHandler(handler)
132+
133+
all_logs = "\n".join(logged_messages)
134+
assert "Internal Server Error" in all_logs

packages/uipath/uv.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)