Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/uipath/_cli/_evals/_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,22 @@

from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime
from uipath._events._event_bus import EventBus
from uipath.tracing import LlmOpsHttpExporter


async def evaluate(
runtime_factory: UiPathRuntimeFactoryProtocol,
trace_manager: UiPathTraceManager,
eval_context: UiPathEvalContext,
event_bus: EventBus,
live_tracking_exporter: LlmOpsHttpExporter,
) -> UiPathRuntimeResult:
async with UiPathEvalRuntime(
factory=runtime_factory,
context=eval_context,
trace_manager=trace_manager,
event_bus=event_bus,
live_tracking_exporter=live_tracking_exporter,
) as eval_runtime:
results = await eval_runtime.execute()
await event_bus.wait_for_all(timeout=10)
Expand Down
17 changes: 7 additions & 10 deletions src/uipath/_cli/_evals/_progress_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,10 @@ async def update_eval_set_run(
)

async def handle_create_eval_set_run(self, payload: EvalSetRunCreatedEvent) -> None:
logger.info("[TraceID] handle_create_eval_set_run: Event handler triggered")
logger.info(
f"[TraceID] handle_create_eval_set_run: Received payload with eval_set_run_id={payload.eval_set_run_id}"
)
try:
self.evaluators = {eval.id: eval for eval in payload.evaluators}
self.evaluator_scores = {eval.id: [] for eval in payload.evaluators}
Expand All @@ -369,6 +373,9 @@ async def handle_create_eval_set_run(self, payload: EvalSetRunCreatedEvent) -> N
self.is_coded_eval[payload.execution_id] = is_coded

eval_set_run_id = payload.eval_set_run_id
logger.info(
f"[TraceID] handle_create_eval_set_run: eval_set_run_id from payload = {eval_set_run_id}"
)
if not eval_set_run_id:
eval_set_run_id = await self.create_eval_set_run_sw(
eval_set_id=payload.eval_set_id,
Expand Down Expand Up @@ -422,16 +429,6 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
try:
eval_run_id = self.eval_run_ids.get(payload.execution_id)

# Use evalRunId as the trace_id for agent execution spans
# This makes all agent spans children of the eval run trace
if eval_run_id:
self.spans_exporter.trace_id = eval_run_id
else:
# Fallback to evalSetRunId if eval_run_id not available yet
if self.eval_set_execution_id:
self.spans_exporter.trace_id = self.eval_set_run_ids.get(
self.eval_set_execution_id
)

self.spans_exporter.export(payload.spans)

Expand Down
22 changes: 20 additions & 2 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def __init__(
factory: UiPathRuntimeFactoryProtocol,
trace_manager: UiPathTraceManager,
event_bus: EventBus,
live_tracking_exporter: LlmOpsHttpExporter,
):
self.context: UiPathEvalContext = context
# Wrap the factory to support model settings overrides
Expand All @@ -323,11 +324,15 @@ def __init__(
self.trace_manager.tracer_provider.add_span_processor(span_processor)

# Live tracking processor for real-time span updates
live_tracking_exporter = LlmOpsHttpExporter()
live_tracking_processor = LiveTrackingSpanProcessor(live_tracking_exporter)
self.live_tracking_exporter = live_tracking_exporter
live_tracking_processor = LiveTrackingSpanProcessor(self.live_tracking_exporter)
self.trace_manager.tracer_span_processors.append(live_tracking_processor)
self.trace_manager.tracer_provider.add_span_processor(live_tracking_processor)

logger.info(
f"[TraceID] UiPathEvalRuntime.__init__: live_tracking_exporter.trace_id = {getattr(self.live_tracking_exporter, 'trace_id', None)}"
)

self.logs_exporter: ExecutionLogsExporter = ExecutionLogsExporter()
# Use job_id if available (for single runtime runs), otherwise generate UUID
self.execution_id = context.job_id or str(uuid.uuid4())
Expand Down Expand Up @@ -386,6 +391,9 @@ async def initiate_evaluation(
)
evaluators = await self._load_evaluators(evaluation_set, runtime)

logger.info(
f"[TraceID] Publishing CREATE_EVAL_SET_RUN event with eval_set_run_id={self.context.eval_set_run_id}"
)
await self.event_bus.publish(
EvaluationEvents.CREATE_EVAL_SET_RUN,
EvalSetRunCreatedEvent(
Expand All @@ -397,6 +405,9 @@ async def initiate_evaluation(
evaluators=evaluators,
),
)
logger.info(
f"[TraceID] After event publish, exporter.trace_id = {getattr(self.live_tracking_exporter, 'trace_id', None)}"
)

return (
evaluation_set,
Expand Down Expand Up @@ -440,9 +451,16 @@ async def execute(self) -> UiPathRuntimeResult:
}
if self.context.eval_set_run_id:
span_attributes["eval_set_run_id"] = self.context.eval_set_run_id

logger.info(
f"[TraceID] About to create 'Evaluation Set Run' span. Current exporter.trace_id = {getattr(self.live_tracking_exporter, 'trace_id', None)}"
)
with tracer.start_as_current_span(
"Evaluation Set Run", attributes=span_attributes
) as span:
logger.info(
f"[TraceID] Inside 'Evaluation Set Run' span. Exporter.trace_id = {getattr(self.live_tracking_exporter, 'trace_id', None)}"
)
try:
(
evaluation_set,
Expand Down
22 changes: 20 additions & 2 deletions src/uipath/_cli/cli_eval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import ast
import asyncio
import logging
import os
from typing import Any

import click
from uipath.core.tracing import UiPathTraceManager
from uipath.runtime import UiPathRuntimeContext, UiPathRuntimeFactoryRegistry

logger = logging.getLogger(__name__)

from uipath._cli._evals._console_progress_reporter import ConsoleProgressReporter
from uipath._cli._evals._evaluate import evaluate
from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
Expand Down Expand Up @@ -203,8 +206,18 @@ def eval(
async def execute_eval():
event_bus = EventBus()

live_tracking_exporter = LlmOpsHttpExporter()

if eval_context.eval_set_run_id:
live_tracking_exporter.trace_id = eval_context.eval_set_run_id
logger.info(
f"[TraceID] Set live_tracking_exporter.trace_id = {eval_context.eval_set_run_id} (user-provided via --eval-set-run-id)"
)

if should_register_progress_reporter:
progress_reporter = StudioWebProgressReporter(LlmOpsHttpExporter())
progress_reporter = StudioWebProgressReporter(
live_tracking_exporter
)
await progress_reporter.subscribe_to_eval_runtime_events(event_bus)

console_reporter = ConsoleProgressReporter()
Expand Down Expand Up @@ -247,11 +260,16 @@ async def execute_eval():
trace_manager,
eval_context,
event_bus,
live_tracking_exporter,
)
else:
# Fall back to execution without overwrites
ctx.result = await evaluate(
runtime_factory, trace_manager, eval_context, event_bus
runtime_factory,
trace_manager,
eval_context,
event_bus,
live_tracking_exporter,
)
finally:
if runtime_factory:
Expand Down
Loading