|
1 | 1 | """Tests for screenshot extraction and upload in CloudTraceSink""" |
2 | 2 |
|
3 | 3 | import base64 |
| 4 | +import gzip |
4 | 5 | import json |
5 | 6 | import os |
6 | 7 | from pathlib import Path |
@@ -184,7 +185,7 @@ def test_create_cleaned_trace_removes_screenshot_fields(self): |
184 | 185 | sink._create_cleaned_trace(cleaned_trace_path) |
185 | 186 |
|
186 | 187 | # Read cleaned trace |
187 | | - with open(cleaned_trace_path, "r") as f: |
| 188 | + with open(cleaned_trace_path) as f: |
188 | 189 | cleaned_event = json.loads(f.readline()) |
189 | 190 |
|
190 | 191 | # Verify screenshot fields are removed |
@@ -233,7 +234,7 @@ def test_create_cleaned_trace_preserves_other_events(self): |
233 | 234 | sink._create_cleaned_trace(cleaned_trace_path) |
234 | 235 |
|
235 | 236 | # Read cleaned trace |
236 | | - with open(cleaned_trace_path, "r") as f: |
| 237 | + with open(cleaned_trace_path) as f: |
237 | 238 | cleaned_event = json.loads(f.readline()) |
238 | 239 |
|
239 | 240 | # Verify action event is unchanged |
@@ -401,3 +402,111 @@ def test_complete_trace_includes_screenshot_count(self): |
401 | 402 | assert stats["screenshot_count"] == 2 |
402 | 403 |
|
403 | 404 | sink.close(blocking=False) |
| 405 | + |
| 406 | + def test_upload_removes_screenshot_base64_from_trace(self): |
| 407 | + """Test that uploaded trace data does not contain screenshot_base64.""" |
| 408 | + upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz" |
| 409 | + run_id = "test-screenshot-upload-clean-1" |
| 410 | + api_key = "sk_test_123" |
| 411 | + |
| 412 | + test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" |
| 413 | + |
| 414 | + sink = CloudTraceSink(upload_url, run_id=run_id, api_key=api_key) |
| 415 | + |
| 416 | + # Emit snapshot event with screenshot |
| 417 | + sink.emit( |
| 418 | + { |
| 419 | + "v": 1, |
| 420 | + "type": "snapshot", |
| 421 | + "ts": "2026-01-01T00:00:00.000Z", |
| 422 | + "run_id": run_id, |
| 423 | + "seq": 1, |
| 424 | + "step_id": "step-1", |
| 425 | + "data": { |
| 426 | + "url": "https://example.com", |
| 427 | + "element_count": 10, |
| 428 | + "screenshot_base64": test_image_base64, |
| 429 | + "screenshot_format": "png", |
| 430 | + }, |
| 431 | + } |
| 432 | + ) |
| 433 | + |
| 434 | + sink.close(blocking=False) |
| 435 | + import time |
| 436 | + |
| 437 | + time.sleep(0.1) |
| 438 | + |
| 439 | + # Mock gateway and upload responses |
| 440 | + mock_upload_urls = { |
| 441 | + "1": "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/screenshots/step_0001.png?signature=...", |
| 442 | + } |
| 443 | + |
| 444 | + with ( |
| 445 | + patch("sentience.cloud_tracing.requests.post") as mock_post, |
| 446 | + patch("sentience.cloud_tracing.requests.put") as mock_put, |
| 447 | + ): |
| 448 | + # Mock gateway response for screenshot URLs |
| 449 | + mock_gateway_response = Mock() |
| 450 | + mock_gateway_response.status_code = 200 |
| 451 | + mock_gateway_response.json.return_value = {"upload_urls": mock_upload_urls} |
| 452 | + mock_post.return_value = mock_gateway_response |
| 453 | + |
| 454 | + # Mock screenshot upload response |
| 455 | + mock_screenshot_upload = Mock() |
| 456 | + mock_screenshot_upload.status_code = 200 |
| 457 | + mock_put.return_value = mock_screenshot_upload |
| 458 | + |
| 459 | + # Call _do_upload to simulate the full upload process |
| 460 | + sink._do_upload() |
| 461 | + |
| 462 | + # Verify trace was uploaded (PUT was called) |
| 463 | + assert mock_put.called |
| 464 | + |
| 465 | + # Find the trace upload call (not screenshot upload) |
| 466 | + # Screenshot uploads happen first, then trace upload |
| 467 | + put_calls = mock_put.call_args_list |
| 468 | + trace_upload_call = None |
| 469 | + for call in put_calls: |
| 470 | + # Trace upload has Content-Type: application/x-gzip |
| 471 | + headers = call[1].get("headers", {}) |
| 472 | + if headers.get("Content-Type") == "application/x-gzip": |
| 473 | + trace_upload_call = call |
| 474 | + break |
| 475 | + |
| 476 | + assert trace_upload_call is not None, "Trace upload should have been called" |
| 477 | + |
| 478 | + # Decompress and verify the uploaded trace data |
| 479 | + compressed_data = trace_upload_call[1]["data"] |
| 480 | + decompressed_data = gzip.decompress(compressed_data) |
| 481 | + trace_content = decompressed_data.decode("utf-8") |
| 482 | + |
| 483 | + # Parse the trace events |
| 484 | + events = [ |
| 485 | + json.loads(line) for line in trace_content.strip().split("\n") if line.strip() |
| 486 | + ] |
| 487 | + |
| 488 | + # Find snapshot event |
| 489 | + snapshot_events = [e for e in events if e.get("type") == "snapshot"] |
| 490 | + assert len(snapshot_events) > 0, "Should have at least one snapshot event" |
| 491 | + |
| 492 | + # Verify screenshot_base64 is NOT in the uploaded trace |
| 493 | + for event in snapshot_events: |
| 494 | + data = event.get("data", {}) |
| 495 | + assert ( |
| 496 | + "screenshot_base64" not in data |
| 497 | + ), "screenshot_base64 should be removed from uploaded trace" |
| 498 | + assert ( |
| 499 | + "screenshot_format" not in data |
| 500 | + ), "screenshot_format should be removed from uploaded trace" |
| 501 | + # Verify other fields are preserved |
| 502 | + assert "url" in data |
| 503 | + assert "element_count" in data |
| 504 | + |
| 505 | + # Cleanup |
| 506 | + cache_dir = Path.home() / ".sentience" / "traces" / "pending" |
| 507 | + trace_path = cache_dir / f"{run_id}.jsonl" |
| 508 | + cleaned_trace_path = cache_dir / f"{run_id}.cleaned.jsonl" |
| 509 | + if trace_path.exists(): |
| 510 | + trace_path.unlink() |
| 511 | + if cleaned_trace_path.exists(): |
| 512 | + cleaned_trace_path.unlink() |
0 commit comments