Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/uipath/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath"
version = "2.10.34"
version = "2.10.35"
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
Expand Down
92 changes: 92 additions & 0 deletions packages/uipath/src/uipath/eval/evaluators/evaluator_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Internal utilities shared across evaluators."""

import copy
import json
import logging
from collections.abc import Callable
from typing import Any

from ..models.models import UiPathEvaluationError, UiPathEvaluationErrorCategory

logger = logging.getLogger(__name__)


async def _call_llm_with_logging(
llm_service: Callable[..., Any],
request_data: dict[str, Any],
model: str,
) -> Any:
"""Call the LLM service with detailed request/response logging and error handling.

Args:
llm_service: The LLM chat completions callable
request_data: The request payload to send
model: The model name (for logging)

Returns:
The raw LLM response

Raises:
UiPathEvaluationError: If the LLM call fails
"""
# Log the request details
logger.info(
f"🤖 Calling LLM evaluator with model: {model} (using function calling)"
)
logger.debug(f"Request data: model={model}, tool_choice=required")

# Log full request body for debugging
request_body_for_log = copy.deepcopy(request_data)
if "tool_choice" in request_body_for_log:
request_body_for_log["tool_choice"] = request_body_for_log[
"tool_choice"
].model_dump()
if "tools" in request_body_for_log:
request_body_for_log["tools"] = [
t.model_dump() for t in request_body_for_log["tools"]
]
logger.info(f"📤 Full request body:\n{json.dumps(request_body_for_log, indent=2)}")

try:
response = await llm_service(**request_data)
except Exception as e:
logger.error("=" * 80)
logger.error("❌ LLM REQUEST FAILED")
logger.error("=" * 80)
logger.error(f"Model: {model}")
logger.error("API Endpoint: Normalized API (/llm/api/chat/completions)")
logger.error(f"Error Type: {type(e).__name__}")
logger.error(f"Error Message: {str(e)}")

if hasattr(e, "response"):
logger.error(
f"HTTP Status Code: {e.response.status_code if hasattr(e.response, 'status_code') else 'N/A'}"
)
try:
error_body = (
e.response.json()
if hasattr(e.response, "json")
else str(e.response.content)
)
logger.error(
f"Response Body: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}"
)
except Exception:
logger.error(
f"Response Body: {str(e.response.content) if hasattr(e.response, 'content') else 'N/A'}"
)

logger.error(f"Request Details: model={model}, tool_choice=required")
logger.error("=" * 80)

raise UiPathEvaluationError(
code="FAILED_TO_GET_LLM_RESPONSE",
title="Failed to get LLM response",
detail=f"Model: {model}, Error: {type(e).__name__}: {str(e)}",
category=UiPathEvaluationErrorCategory.SYSTEM,
) from e

logger.info(f"✅ LLM response received successfully from {model}")
logger.debug(f"Response: {response}")

return response
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
LegacyEvaluationCriteria,
LegacyEvaluatorConfig,
)
from .evaluator_utils import _call_llm_with_logging
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -211,5 +212,7 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
}

assert self.llm, "LLM should be initialized before calling this method."
response = await self.llm.chat_completions(**request_data)
response = await _call_llm_with_logging(
self.llm.chat_completions, request_data, model
)
return extract_tool_call_response(response, model)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
LegacyEvaluationCriteria,
LegacyEvaluatorConfig,
)
from .evaluator_utils import _call_llm_with_logging
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -181,5 +182,7 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
"tool_choice": tool_choice,
}

response = await self.llm.chat_completions(**request_data)
response = await _call_llm_with_logging(
self.llm.chat_completions, request_data, model
)
return extract_tool_call_response(response, model)
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""

import copy
import json
import logging
from abc import abstractmethod
from collections.abc import Callable
Expand Down Expand Up @@ -31,6 +29,7 @@
BaseEvaluatorConfig,
BaseEvaluatorJustification,
)
from .evaluator_utils import _call_llm_with_logging

T = TypeVar("T", bound=BaseEvaluationCriteria)

Expand Down Expand Up @@ -278,84 +277,7 @@ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
category=UiPathEvaluationErrorCategory.SYSTEM,
)

# Log the request details (exclude non-JSON-serializable objects)
logger.info(
f"🤖 Calling LLM evaluator with model: {model} (using function calling)"
)
max_tokens_str = (
str(max_tokens_value) if max_tokens_value is not None else "unset"
)
logger.debug(
f"Request data: model={model}, max_tokens={max_tokens_str}, temperature={self.evaluator_config.temperature}, tool_choice=required"
)

# Log full request body for debugging
request_body_for_log = copy.deepcopy(request_data)
# Convert tool_choice to dict for logging
if "tool_choice" in request_body_for_log:
request_body_for_log["tool_choice"] = request_body_for_log[
"tool_choice"
].model_dump()
# Convert tools to dict for logging
if "tools" in request_body_for_log:
request_body_for_log["tools"] = [
t.model_dump() for t in request_body_for_log["tools"]
]
logger.info(
f"📤 Full request body:\n{json.dumps(request_body_for_log, indent=2)}"
)

try:
response = await self.llm_service(**request_data)
except Exception as e:
# Enhanced error logging with details
logger.error("=" * 80)
logger.error("❌ LLM REQUEST FAILED")
logger.error("=" * 80)
logger.error(f"Model: {model}")
logger.error("API Endpoint: Normalized API (/llm/api/chat/completions)")
logger.error(f"Error Type: {type(e).__name__}")
logger.error(f"Error Message: {str(e)}")

# Try to extract HTTP error details if available
if hasattr(e, "response"):
logger.error(
f"HTTP Status Code: {e.response.status_code if hasattr(e.response, 'status_code') else 'N/A'}"
)
try:
error_body = (
e.response.json()
if hasattr(e.response, "json")
else str(e.response.content)
)
logger.error(
f"Response Body: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}"
)
except Exception:
logger.error(
f"Response Body: {str(e.response.content) if hasattr(e.response, 'content') else 'N/A'}"
)

max_tokens_str = (
str(self.evaluator_config.max_tokens)
if self.evaluator_config.max_tokens is not None
else "unset"
)
logger.error(
f"Request Details: model={model}, max_tokens={max_tokens_str}, temperature={self.evaluator_config.temperature}, tool_choice=required"
)
logger.error("=" * 80)

raise UiPathEvaluationError(
code="FAILED_TO_GET_LLM_RESPONSE",
title="Failed to get LLM response",
detail=f"Model: {model}, Error: {type(e).__name__}: {str(e)}",
category=UiPathEvaluationErrorCategory.SYSTEM,
) from e

# Log successful response
logger.info(f"✅ LLM response received successfully from {model}")
logger.debug(f"Response: {response}")
response = await _call_llm_with_logging(self.llm_service, request_data, model)

# Extract structured output from tool call
return self._extract_tool_call_response(response, model)
Expand Down
134 changes: 134 additions & 0 deletions packages/uipath/tests/evaluators/test_evaluator_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Tests for evaluator_utils._call_llm_with_logging helper."""

import logging
from typing import Any
from unittest.mock import MagicMock

import pytest

from uipath.eval.evaluators.evaluator_utils import _call_llm_with_logging
from uipath.eval.models.models import UiPathEvaluationError

LOGGER_NAME = "uipath.eval.evaluators.evaluator_utils"


def _make_request_data() -> dict[str, Any]:
"""Create minimal request_data for tests."""
return {
"model": "gpt-4o",
"messages": [{"role": "user", "content": "test"}],
"tools": [],
"tool_choice": MagicMock(model_dump=lambda: {"type": "required"}),
}


class TestCallLlmWithLogging:
"""Tests for _call_llm_with_logging."""

@pytest.mark.asyncio
async def test_success_returns_response(self) -> None:
"""Test that a successful LLM call returns the response unchanged."""
expected_response = MagicMock()

async def mock_llm_service(**kwargs: Any) -> Any:
return expected_response

result = await _call_llm_with_logging(
mock_llm_service, _make_request_data(), "gpt-4o"
)
assert result is expected_response

@pytest.mark.asyncio
async def test_passes_request_data_to_llm_service(self) -> None:
"""Test that request_data kwargs are forwarded to the LLM service."""
captured_kwargs: dict[str, Any] = {}

async def mock_llm_service(**kwargs: Any) -> Any:
captured_kwargs.update(kwargs)
return MagicMock()

request_data = _make_request_data()
request_data["temperature"] = 0.5

await _call_llm_with_logging(mock_llm_service, request_data, "gpt-4o")
assert captured_kwargs["model"] == "gpt-4o"
assert captured_kwargs["temperature"] == 0.5

@pytest.mark.asyncio
async def test_plain_exception_wraps_in_evaluation_error(self) -> None:
"""Test that a plain exception is wrapped in UiPathEvaluationError."""

async def mock_llm_service(**kwargs: Any) -> Any:
raise RuntimeError("connection refused")

with pytest.raises(UiPathEvaluationError) as exc_info:
await _call_llm_with_logging(
mock_llm_service, _make_request_data(), "gpt-4o"
)

error = exc_info.value
assert error.error_info.code == "Python.FAILED_TO_GET_LLM_RESPONSE"
assert "gpt-4o" in error.error_info.detail
assert "RuntimeError" in error.error_info.detail
assert "connection refused" in error.error_info.detail
assert isinstance(error.__cause__, RuntimeError)

@pytest.mark.asyncio
async def test_http_error_includes_status_in_logs(self) -> None:
"""Test that an exception with .response logs HTTP status code and body."""
mock_response = MagicMock()
mock_response.status_code = 429
mock_response.json.return_value = {"error": "rate limited"}

async def mock_llm_service(**kwargs: Any) -> Any:
exc = Exception("Too Many Requests")
exc.response = mock_response # type: ignore[attr-defined]
raise exc

logger = logging.getLogger(LOGGER_NAME)
logged_messages: list[str] = []
handler = logging.Handler()
handler.emit = lambda record: logged_messages.append(record.getMessage()) # type: ignore[assignment]
logger.addHandler(handler)

try:
with pytest.raises(UiPathEvaluationError):
await _call_llm_with_logging(
mock_llm_service, _make_request_data(), "gpt-4o"
)
finally:
logger.removeHandler(handler)

all_logs = "\n".join(logged_messages)
assert "429" in all_logs
assert "rate limited" in all_logs

@pytest.mark.asyncio
async def test_http_error_json_parse_failure_falls_back_to_content(self) -> None:
"""Test fallback to .content when .json() raises."""
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.json.side_effect = ValueError("not json")
mock_response.content = b"Internal Server Error"

async def mock_llm_service(**kwargs: Any) -> Any:
exc = Exception("Server Error")
exc.response = mock_response # type: ignore[attr-defined]
raise exc

logger = logging.getLogger(LOGGER_NAME)
logged_messages: list[str] = []
handler = logging.Handler()
handler.emit = lambda record: logged_messages.append(record.getMessage()) # type: ignore[assignment]
logger.addHandler(handler)

try:
with pytest.raises(UiPathEvaluationError):
await _call_llm_with_logging(
mock_llm_service, _make_request_data(), "gpt-4o"
)
finally:
logger.removeHandler(handler)

all_logs = "\n".join(logged_messages)
assert "Internal Server Error" in all_logs
4 changes: 2 additions & 2 deletions packages/uipath/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading