Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from uipath.core.tracing import UiPathTraceManager
from uipath.core.tracing.processors import UiPathExecutionBatchTraceProcessor
from uipath.runtime import (
UiPathExecuteOptions,
UiPathExecutionRuntime,
UiPathRuntimeFactoryProtocol,
UiPathRuntimeProtocol,
Expand Down Expand Up @@ -941,14 +942,20 @@ async def execute_runtime(
"span_type": "eval",
}

# Create a new runtime with unique runtime_id for this eval execution.
# This ensures each eval has its own LangGraph thread_id (clean state),
# preventing message accumulation across eval runs.
# Create a new runtime with runtime_id for this eval execution.
# Use eval_item.id to maintain consistent thread_id across suspend and resume.
# This ensures checkpoints can be found when resuming from suspended state.
runtime_id = eval_item.id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think someone changed this for a reason I can't remember. @akshaylive or @radu-mocanu do you guys remember why we no longer use eval_item.id as the runtime_id?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They made it so there was no conflict in the runtime when multiple evals were started the runtime collided

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to fix the LangGraph aspect too. Is there a better way @cristipufu it is not resume properly for me without this change as the mapping won't work

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as far as I remember the concern was regarding running multiple eval runtimes in parallel targeting the same evaluation set. there would be collisions since eval_item.id is static

if self.context.resume:
logger.info(
f"🟢 EVAL RUNTIME: Resume mode - using eval_item.id '{runtime_id}' to load checkpoint"
)

eval_runtime = None
try:
eval_runtime = await self.factory.new_runtime(
entrypoint=self.context.entrypoint or "",
runtime_id=execution_id,
runtime_id=runtime_id,
)
execution_runtime = UiPathExecutionRuntime(
delegate=eval_runtime,
Expand All @@ -966,9 +973,29 @@ async def execute_runtime(
input_overrides or {},
eval_id=eval_item.id,
)
result = await execution_runtime.execute(
input=inputs_with_overrides,
)

# In resume mode, pass None as input
# The UiPathResumableRuntime wrapper will automatically:
# 1. Fetch triggers from storage
# 2. Read resume data via trigger_manager.read_trigger()
# 3. Build resume map: {interrupt_id: resume_data}
# 4. Pass this map to the delegate runtime
if self.context.resume:
logger.info("🟢 EVAL RUNTIME: Resuming from checkpoint")
logger.info(f"🟢 EVAL RUNTIME: Using thread_id: {runtime_id}")
logger.info(
"🟢 EVAL RUNTIME: Passing None - wrapper will load resume data from storage"
)

options = UiPathExecuteOptions(resume=True)
result = await execution_runtime.execute(
input=None, # Let wrapper load resume data
options=options,
)
else:
result = await execution_runtime.execute(
input=inputs_with_overrides,
)
except Exception as e:
end_time = time()
spans, logs = self._get_and_clear_execution_data(execution_id)
Expand Down