Skip to content

Commit 1b72c4b

Browse files
authored
Merge pull request #174 from SentienceAPI/fix_agentruntime_step_id
Fix AgentRuntime step_id from UUID to step-N
2 parents 0997c4c + 5101e39 commit 1b72c4b

File tree

6 files changed

+86
-41
lines changed

6 files changed

+86
-41
lines changed

sentience/agent_runtime.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
import asyncio
6767
import difflib
6868
import time
69-
import uuid
7069
from dataclasses import dataclass
7170
from typing import TYPE_CHECKING, Any
7271

@@ -504,20 +503,20 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str:
504503
step_index: Optional explicit step index (otherwise auto-increments)
505504
506505
Returns:
507-
Generated step_id
506+
Generated step_id in format 'step-N' where N is the step index
508507
"""
509508
# Clear previous step state
510509
self._assertions_this_step = []
511510

512-
# Generate new step_id
513-
self.step_id = str(uuid.uuid4())
514-
515511
# Update step index
516512
if step_index is not None:
517513
self.step_index = step_index
518514
else:
519515
self.step_index += 1
520516

517+
# Generate step_id in 'step-N' format for Studio compatibility
518+
self.step_id = f"step-{self.step_index}"
519+
521520
return self.step_id
522521

523522
def assert_(
@@ -583,7 +582,7 @@ def assert_done(
583582
True if task is complete (assertion passed), False otherwise
584583
"""
585584
# Convenience wrapper for assert_ with required=True
586-
ok = self.assertTrue(predicate, label=label, required=True)
585+
ok = self.assert_(predicate, label=label, required=True)
587586
if ok:
588587
self._task_done = True
589588
self._task_done_label = label

sentience/schemas/trace_v1.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@
3737
},
3838
"step_id": {
3939
"type": ["string", "null"],
40-
"pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
41-
"description": "UUID for the step (present for step-scoped events)"
40+
"description": "Step identifier in 'step-N' format where N is the step index (present for step-scoped events)"
41+
},
42+
"step_index": {
43+
"type": ["integer", "null"],
44+
"minimum": 0,
45+
"description": "Step index (0-based), present for step-scoped events"
4246
},
4347
"data": {
4448
"type": "object",
@@ -67,6 +71,7 @@
6771
"description": "snapshot or snapshot_taken data",
6872
"properties": {
6973
"step_id": {"type": ["string", "null"]},
74+
"step_index": {"type": ["integer", "null"], "minimum": 0, "description": "Step index for Studio compatibility"},
7075
"snapshot_id": {"type": ["string", "null"]},
7176
"snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
7277
"snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},

sentience/trace_event_builder.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class TraceEventBuilder:
2323
def build_snapshot_event(
2424
snapshot: Snapshot,
2525
include_all_elements: bool = True,
26+
step_index: int | None = None,
2627
) -> dict[str, Any]:
2728
"""
2829
Build snapshot_taken trace event data.
@@ -31,6 +32,8 @@ def build_snapshot_event(
3132
snapshot: Snapshot to build event from
3233
include_all_elements: If True, include all elements (for DOM tree display).
3334
If False, use filtered elements only.
35+
step_index: Optional step index (0-based) for Studio compatibility.
36+
Required when step_id is not in 'step-N' format (e.g., UUIDs).
3437
3538
Returns:
3639
Dictionary with snapshot event data
@@ -64,13 +67,19 @@ def build_snapshot_event(
6467
el_dict["importance_score"] = importance_score
6568
elements_data.append(el_dict)
6669

67-
return {
70+
result = {
6871
"url": snapshot.url,
6972
"element_count": len(snapshot.elements),
7073
"timestamp": snapshot.timestamp,
7174
"elements": elements_data, # Full element data for DOM tree
7275
}
7376

77+
# Include step_index if provided (required for UUID step_ids)
78+
if step_index is not None:
79+
result["step_index"] = step_index
80+
81+
return result
82+
7483
@staticmethod
7584
def build_step_end_event(
7685
step_id: str,

tests/test_agent_runtime.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,34 @@ class TestAgentRuntimeBeginStep:
152152
"""Tests for begin_step method."""
153153

154154
def test_begin_step_generates_step_id(self) -> None:
155-
"""Test begin_step generates a UUID step_id."""
155+
"""Test begin_step generates a step_id in 'step-N' format."""
156156
backend = MockBackend()
157157
tracer = MockTracer()
158158
runtime = AgentRuntime(backend=backend, tracer=tracer)
159159

160160
step_id = runtime.begin_step(goal="Test step")
161161

162162
assert step_id is not None
163-
assert len(step_id) == 36 # UUID length with dashes
163+
assert step_id == "step-1" # First step should be step-1
164+
165+
def test_begin_step_id_matches_index(self) -> None:
166+
"""Test step_id format matches step_index for Studio compatibility."""
167+
backend = MockBackend()
168+
tracer = MockTracer()
169+
runtime = AgentRuntime(backend=backend, tracer=tracer)
170+
171+
step_id_1 = runtime.begin_step(goal="Step 1")
172+
assert step_id_1 == "step-1"
173+
assert runtime.step_index == 1
174+
175+
step_id_2 = runtime.begin_step(goal="Step 2")
176+
assert step_id_2 == "step-2"
177+
assert runtime.step_index == 2
178+
179+
# With explicit index
180+
step_id_10 = runtime.begin_step(goal="Step 10", step_index=10)
181+
assert step_id_10 == "step-10"
182+
assert runtime.step_index == 10
164183

165184
def test_begin_step_increments_index(self) -> None:
166185
"""Test begin_step auto-increments step_index."""

tests/test_screenshot_storage.py

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,10 @@ def test_extract_screenshots_from_trace(self):
4242
}
4343
)
4444

45-
# Close to write file
46-
sink.close(blocking=False)
47-
48-
# Wait a bit for file to be written
49-
import time
50-
51-
time.sleep(0.1)
45+
# Finalize trace file synchronously (closes file handle properly)
46+
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
47+
# to avoid Windows file locking issues in tests
48+
sink._finalize_trace_file_for_upload()
5249

5350
# Extract screenshots
5451
screenshots = sink._extract_screenshots_from_trace()
@@ -59,7 +56,7 @@ def test_extract_screenshots_from_trace(self):
5956
assert screenshots[1]["format"] == "png"
6057
assert screenshots[1]["step_id"] == "step-1"
6158

62-
# Cleanup
59+
# Cleanup - file handle is already closed
6360
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
6461
trace_path = cache_dir / f"{run_id}.jsonl"
6562
if trace_path.exists():
@@ -93,15 +90,15 @@ def test_extract_screenshots_handles_multiple(self):
9390
}
9491
)
9592

96-
sink.close(blocking=False)
97-
import time
98-
99-
time.sleep(0.1)
93+
# Finalize trace file synchronously (closes file handle properly)
94+
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
95+
# to avoid Windows file locking issues in tests
96+
sink._finalize_trace_file_for_upload()
10097

10198
screenshots = sink._extract_screenshots_from_trace()
10299
assert len(screenshots) == 3
103100

104-
# Cleanup
101+
# Cleanup - file handle is already closed
105102
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
106103
trace_path = cache_dir / f"{run_id}.jsonl"
107104
if trace_path.exists():
@@ -130,15 +127,15 @@ def test_extract_screenshots_skips_events_without_screenshots(self):
130127
}
131128
)
132129

133-
sink.close(blocking=False)
134-
import time
135-
136-
time.sleep(0.1)
130+
# Finalize trace file synchronously (closes file handle properly)
131+
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
132+
# to avoid Windows file locking issues in tests
133+
sink._finalize_trace_file_for_upload()
137134

138135
screenshots = sink._extract_screenshots_from_trace()
139136
assert len(screenshots) == 0
140137

141-
# Cleanup
138+
# Cleanup - file handle is already closed
142139
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
143140
trace_path = cache_dir / f"{run_id}.jsonl"
144141
if trace_path.exists():
@@ -174,10 +171,8 @@ def test_create_cleaned_trace_removes_screenshot_fields(self):
174171
}
175172
)
176173

177-
sink.close(blocking=False)
178-
import time
179-
180-
time.sleep(0.1)
174+
# Finalize trace file synchronously to avoid Windows file locking issues
175+
sink._finalize_trace_file_for_upload()
181176

182177
# Create cleaned trace
183178
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
@@ -223,10 +218,8 @@ def test_create_cleaned_trace_preserves_other_events(self):
223218
}
224219
)
225220

226-
sink.close(blocking=False)
227-
import time
228-
229-
time.sleep(0.1)
221+
# Finalize trace file synchronously to avoid Windows file locking issues
222+
sink._finalize_trace_file_for_upload()
230223

231224
# Create cleaned trace
232225
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
@@ -436,10 +429,8 @@ def test_upload_removes_screenshot_base64_from_trace(self):
436429
}
437430
)
438431

439-
sink.close(blocking=False)
440-
import time
441-
442-
time.sleep(0.1)
432+
# Finalize trace file synchronously to avoid Windows file locking issues
433+
sink._finalize_trace_file_for_upload()
443434

444435
# Mock gateway and upload responses
445436
mock_upload_urls = {

tests/test_trace_event_builder.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,25 @@ def test_build_step_end_event_with_none_verify_data():
320320

321321
# Verify should be empty dict when verify_data is None
322322
assert result["verify"] == {}
323+
324+
325+
def test_build_snapshot_event_with_step_index():
326+
"""Test that build_snapshot_event includes step_index when provided.
327+
328+
This is required for AgentRuntime which uses UUID step_ids that can't be
329+
parsed by Studio's trace-parser to extract step_index.
330+
"""
331+
elements = [create_element(1, text="Test element")]
332+
snapshot = create_snapshot(elements)
333+
334+
# Without step_index
335+
result_without = TraceEventBuilder.build_snapshot_event(snapshot)
336+
assert "step_index" not in result_without
337+
338+
# With step_index=0
339+
result_with_zero = TraceEventBuilder.build_snapshot_event(snapshot, step_index=0)
340+
assert result_with_zero["step_index"] == 0
341+
342+
# With step_index=5
343+
result_with_five = TraceEventBuilder.build_snapshot_event(snapshot, step_index=5)
344+
assert result_with_five["step_index"] == 5

0 commit comments

Comments
 (0)