Skip to content

Commit 44f1732

Browse files
timmarkhuffAuto-format Bot
andauthored
Add retry decorator to tests that are vulnerable to transient service issues (#421)
Some tests in python-sdk are vulnerable to bad responses from the cloud service. For example, an image query might get a result of STILL_PROCESSING, which means the cloud didn't have an answer in time. This is a transient error and will almost always be resolved with a retry. This PR adds a retry decorator to protect such tests. Any test that submits an image query and asserts anything about the result are protected with this decorator. I also found a few instances of tests that were not using our standard `detector_name` function for naming detectors. I fixed those too. --------- Co-authored-by: Auto-format Bot <autoformatbot@groundlight.ai>
1 parent bda1afb commit 44f1732

8 files changed

Lines changed: 108 additions & 32 deletions

test/integration/test_groundlight.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from urllib3.exceptions import ConnectTimeoutError, MaxRetryError, ReadTimeoutError
3030
from urllib3.util.retry import Retry
3131

32+
from test.retry_decorator import retry_on_failure
33+
3234
DEFAULT_CONFIDENCE_THRESHOLD = 0.9
3335
IQ_IMPROVEMENT_THRESHOLD = 0.75
3436

@@ -273,20 +275,23 @@ def test_get_detector_by_name(gl: Groundlight, detector: Detector):
273275
gl.get_detector_by_name(name="not a real name")
274276

275277

278+
@retry_on_failure()
276279
def test_ask_confident(gl: Groundlight, detector: Detector):
277280
_image_query = gl.ask_confident(detector=detector.id, image="test/assets/dog.jpeg", wait=10)
278281
assert str(_image_query)
279282
assert isinstance(_image_query, ImageQuery)
280283
assert is_valid_display_result(_image_query.result)
281284

282285

286+
@retry_on_failure()
283287
def test_ask_ml(gl: Groundlight, detector: Detector):
284288
_image_query = gl.ask_ml(detector=detector.id, image="test/assets/dog.jpeg", wait=10)
285289
assert str(_image_query)
286290
assert isinstance(_image_query, ImageQuery)
287291
assert is_valid_display_result(_image_query.result)
288292

289293

294+
@retry_on_failure()
290295
def test_submit_image_query(gl: Groundlight, detector: Detector):
291296
def validate_image_query(_image_query: ImageQuery):
292297
assert str(_image_query)
@@ -314,6 +319,7 @@ def validate_image_query(_image_query: ImageQuery):
314319
assert _image_query.result.confidence >= IQ_IMPROVEMENT_THRESHOLD
315320

316321

322+
@retry_on_failure()
317323
def test_submit_image_query_blocking(gl: Groundlight, detector: Detector):
318324
_image_query = gl.submit_image_query(
319325
detector=detector.id, image="test/assets/dog.jpeg", wait=10, human_review="NEVER"
@@ -323,13 +329,15 @@ def test_submit_image_query_blocking(gl: Groundlight, detector: Detector):
323329
assert is_valid_display_result(_image_query.result)
324330

325331

332+
@retry_on_failure()
326333
def test_submit_image_query_returns_yes(gl: Groundlight):
327334
# We use the "never-review" pipeline to guarantee a confident "yes" answer.
328335
detector = gl.get_or_create_detector(name="Always a dog", query="Is there a dog?", pipeline_config="never-review")
329336
image_query = gl.submit_image_query(detector=detector, image="test/assets/dog.jpeg", wait=10, human_review="NEVER")
330337
assert image_query.result.label == Label.YES
331338

332339

340+
@retry_on_failure()
333341
def test_submit_image_query_returns_text(gl: Groundlight):
334342
# We use the "never-review" pipeline to guarantee a confident "yes" answer.
335343
detector = gl.get_or_create_detector(
@@ -339,20 +347,23 @@ def test_submit_image_query_returns_text(gl: Groundlight):
339347
assert isinstance(image_query.text, str)
340348

341349

350+
@retry_on_failure()
342351
def test_submit_image_query_filename(gl: Groundlight, detector: Detector):
343352
_image_query = gl.submit_image_query(detector=detector.id, image="test/assets/dog.jpeg", human_review="NEVER")
344353
assert str(_image_query)
345354
assert isinstance(_image_query, ImageQuery)
346355
assert is_valid_display_result(_image_query.result)
347356

348357

358+
@retry_on_failure()
349359
def test_submit_image_query_png(gl: Groundlight, detector: Detector):
350360
_image_query = gl.submit_image_query(detector=detector.id, image="test/assets/cat.png", human_review="NEVER")
351361
assert str(_image_query)
352362
assert isinstance(_image_query, ImageQuery)
353363
assert is_valid_display_result(_image_query.result)
354364

355365

366+
@retry_on_failure()
356367
def test_submit_image_query_with_confidence_threshold(gl: Groundlight, detector: Detector):
357368
confidence_threshold = 0.5234 # Arbitrary specific value
358369
_image_query = gl.submit_image_query(
@@ -366,6 +377,7 @@ def test_submit_image_query_with_confidence_threshold(gl: Groundlight, detector:
366377

367378

368379
@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing an image query ID.")
380+
@retry_on_failure()
369381
def test_submit_image_query_with_id(gl: Groundlight, detector: Detector):
370382
# submit_image_query
371383
id = f"iq_{KsuidMs()}"
@@ -380,6 +392,7 @@ def test_submit_image_query_with_id(gl: Groundlight, detector: Detector):
380392
assert _image_query.metadata.get("is_from_edge")
381393

382394

395+
@retry_on_failure()
383396
def test_submit_image_query_with_human_review_param(gl: Groundlight, detector: Detector):
384397
# For now, this just tests that the image query is submitted successfully.
385398
# There should probably be a better way to check whether the image query was escalated for human review.
@@ -451,6 +464,7 @@ def test_create_detector_with_invalid_metadata(gl: Groundlight, metadata_list: A
451464

452465
@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing image query metadata.")
453466
@pytest.mark.parametrize("metadata", [None, {}, {"a": 1}, '{"a": 1}'])
467+
@retry_on_failure()
454468
def test_submit_image_query_with_metadata(
455469
gl: Groundlight, detector: Detector, image: str, metadata: Union[Dict, str, None]
456470
):
@@ -505,6 +519,7 @@ def test_submit_image_query_with_metadata_returns_user_error(gl: Groundlight, de
505519
assert is_user_error(exc_info.value.status)
506520

507521

522+
@retry_on_failure()
508523
def test_submit_image_query_jpeg_bytes(gl: Groundlight, detector: Detector):
509524
jpeg = open("test/assets/dog.jpeg", "rb").read()
510525
_image_query = gl.submit_image_query(detector=detector.id, image=jpeg, human_review="NEVER")
@@ -543,6 +558,7 @@ def test_submit_image_query_bad_jpeg_file(gl: Groundlight, detector: Detector):
543558

544559

545560
@pytest.mark.skipif(MISSING_PIL, reason="Needs pillow") # type: ignore
561+
@retry_on_failure()
546562
def test_submit_image_query_pil(gl: Groundlight, detector: Detector):
547563
# generates a pil image and submits it
548564
from PIL import Image
@@ -565,6 +581,7 @@ def test_submit_image_query_wait_and_want_async_causes_exception(gl: Groundlight
565581
)
566582

567583

584+
@retry_on_failure()
568585
def test_submit_image_query_with_want_async_workflow(gl: Groundlight, detector: Detector):
569586
"""
570587
Tests the workflow for submitting an image query with the want_async parameter set to True.
@@ -589,6 +606,7 @@ def test_submit_image_query_with_want_async_workflow(gl: Groundlight, detector:
589606
assert _image_query.result.label in VALID_DISPLAY_LABELS
590607

591608

609+
@retry_on_failure()
592610
def test_ask_async_workflow(gl: Groundlight, detector: Detector):
593611
"""
594612
Tests the workflow for submitting an image query with ask_async.
@@ -638,19 +656,22 @@ def test_list_image_queries_with_filter(gl: Groundlight, detector_name: Callable
638656
assert image_query.id in iq_ids
639657

640658

659+
@retry_on_failure()
641660
def test_get_image_query(gl: Groundlight, image_query_yes: ImageQuery):
642661
_image_query = gl.get_image_query(id=image_query_yes.id)
643662
assert str(_image_query)
644663
assert isinstance(_image_query, ImageQuery)
645664
assert is_valid_display_result(_image_query.result)
646665

647666

667+
@retry_on_failure()
648668
def test_get_image_query_label_yes(gl: Groundlight, image_query_yes: ImageQuery):
649669
gl.add_label(image_query_yes, Label.YES)
650670
retrieved_iq = gl.get_image_query(id=image_query_yes.id)
651671
assert retrieved_iq.result.label == Label.YES
652672

653673

674+
@retry_on_failure()
654675
def test_get_image_query_label_no(gl: Groundlight, image_query_no: ImageQuery):
655676
gl.add_label(image_query_no, Label.NO)
656677
retrieved_iq = gl.get_image_query(id=image_query_no.id)
@@ -709,6 +730,7 @@ def test_enum_string_equality():
709730

710731

711732
@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore
733+
@retry_on_failure()
712734
def test_submit_numpy_image(gl: Groundlight, detector: Detector):
713735
np_img = np.random.uniform(0, 255, (600, 800, 3)) # type: ignore
714736
_image_query = gl.submit_image_query(detector=detector.id, image=np_img, human_review="NEVER")
@@ -771,6 +793,7 @@ def test_update_inspection_metadata_invalid_inspection_id(gl: Groundlight):
771793

772794

773795
@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint doesn't support inspection_id")
796+
@retry_on_failure()
774797
def test_stop_inspection_pass(gl: Groundlight, detector: Detector):
775798
"""Starts an inspection, submits a query with the inspection ID that should pass, stops
776799
the inspection, checks the result.
@@ -820,6 +843,7 @@ def test_update_detector_confidence_threshold_failure(gl: Groundlight, detector:
820843

821844

822845
@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.")
846+
@retry_on_failure()
823847
def test_submit_image_query_with_inspection_id_metadata_and_want_async(gl: Groundlight, detector: Detector, image: str):
824848
inspection_id = gl.start_inspection()
825849
metadata = {"key": "value"}
@@ -852,6 +876,7 @@ def test_submit_image_query_with_empty_inspection_id(gl: Groundlight, detector:
852876
)
853877

854878

879+
@retry_on_failure()
855880
def test_binary_detector(gl: Groundlight, detector_name: Callable):
856881
"""
857882
verify that we can create and submit to a binary detector
@@ -863,6 +888,7 @@ def test_binary_detector(gl: Groundlight, detector_name: Callable):
863888
assert binary_iq.result.label is not None
864889

865890

891+
@retry_on_failure()
866892
def test_counting_detector(gl: Groundlight, detector_name: Callable):
867893
"""
868894
verify that we can create and submit to a counting detector
@@ -874,6 +900,7 @@ def test_counting_detector(gl: Groundlight, detector_name: Callable):
874900
assert count_iq.result.count is not None
875901

876902

903+
@retry_on_failure()
877904
def test_counting_detector_async(gl: Groundlight, detector_name: Callable):
878905
"""
879906
verify that we can create and submit to a counting detector
@@ -893,6 +920,7 @@ def test_counting_detector_async(gl: Groundlight, detector_name: Callable):
893920
assert _image_query.result is not None
894921

895922

923+
@retry_on_failure()
896924
def test_multiclass_detector(gl: Groundlight, detector_name: Callable):
897925
"""
898926
verify that we can create and submit to a multi-class detector

test/retry_decorator.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Test-only helpers for retrying tests affected by transient cloud timing."""
2+
3+
import functools
4+
import time
5+
from typing import Any, Callable, Tuple, Type
6+
7+
8+
def retry_on_failure(
9+
*,
10+
max_attempts: int = 2,
11+
exception_types: Tuple[Type[BaseException], ...] = (AssertionError,),
12+
retry_delay_seconds: float = 5.0,
13+
) -> Callable[[Callable[..., Any]], Callable[..., None]]:
14+
"""Run the wrapped test up to `max_attempts` times when it raises a listed exception."""
15+
16+
if max_attempts < 1:
17+
raise ValueError("max_attempts must be at least 1")
18+
19+
def decorator(fn: Callable[..., Any]) -> Callable[..., None]:
20+
@functools.wraps(fn)
21+
def wrapper(*args: object, **kwargs: object) -> None:
22+
for attempt in range(max_attempts):
23+
try:
24+
fn(*args, **kwargs)
25+
return
26+
except exception_types:
27+
if attempt == max_attempts - 1:
28+
raise
29+
time.sleep(retry_delay_seconds)
30+
31+
return wrapper
32+
33+
return decorator
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
from datetime import datetime
1+
from typing import Callable
22

33
from groundlight import ExperimentalApi
44

55
gl = ExperimentalApi()
66

77

8-
def test_invalid_endpoint_config():
8+
def test_invalid_endpoint_config(detector_name: Callable):
99
print(gl.make_generic_api_request(endpoint="/v1/me", method="GET"))
1010
print(gl.make_generic_api_request(endpoint="/v1/detectors", method="GET"))
11-
name = f"Test {datetime.utcnow()}"
12-
print(gl.make_generic_api_request(endpoint="/v1/detector-groups", method="POST", body={"name": name}))
11+
print(gl.make_generic_api_request(endpoint="/v1/detector-groups", method="POST", body={"name": detector_name()}))

test/unit/test_experimental.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from groundlight import ExperimentalApi
66
from model import Detector, ImageQuery
77

8+
from test.retry_decorator import retry_on_failure
9+
810

911
def test_detector_groups(gl_experimental: ExperimentalApi, detector_name: Callable):
1012
"""
@@ -90,6 +92,7 @@ def test_submit_multiple_rois(gl_experimental: ExperimentalApi, image_query_one:
9092
gl_experimental.add_label(image_query_one, 3, [roi] * 3)
9193

9294

95+
@retry_on_failure()
9396
def test_text_recognition_detector(gl_experimental: ExperimentalApi, detector_name: Callable):
9497
"""
9598
verify that we can create and submit to a text recognition detector
@@ -103,6 +106,7 @@ def test_text_recognition_detector(gl_experimental: ExperimentalApi, detector_na
103106
assert mc_iq.result.text is not None
104107

105108

109+
@retry_on_failure()
106110
def test_bounding_box_detector(gl_experimental: ExperimentalApi, detector_name: Callable):
107111
"""
108112
Verify that we can create and submit to a bounding box detector
@@ -117,6 +121,7 @@ def test_bounding_box_detector(gl_experimental: ExperimentalApi, detector_name:
117121
assert bbox_iq.rois is not None
118122

119123

124+
@retry_on_failure()
120125
def test_bounding_box_detector_async(gl_experimental: ExperimentalApi, detector_name: Callable):
121126
"""
122127
Verify that we can create and submit to a bounding box detector with ask_async

test/unit/test_images.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
from datetime import datetime
1+
from typing import Callable
22

33
import PIL
44
from groundlight import ExperimentalApi
55

6+
from test.retry_decorator import retry_on_failure
67

7-
def test_get_image(gl_experimental: ExperimentalApi):
8-
name = f"Test {datetime.utcnow()}"
9-
det = gl_experimental.get_or_create_detector(name, "test_query")
8+
9+
@retry_on_failure()
10+
def test_get_image(gl_experimental: ExperimentalApi, detector_name: Callable):
11+
det = gl_experimental.get_or_create_detector(detector_name(), "test_query")
1012
iq = gl_experimental.submit_image_query(det, image="test/assets/dog.jpeg", wait=10)
1113
gl_experimental.get_image(iq.id)
1214
assert isinstance(PIL.Image.open(gl_experimental.get_image(iq.id)), PIL.Image.Image)

test/unit/test_internalapi.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1+
from typing import Callable
2+
13
from groundlight import ExperimentalApi
24
from groundlight.internalapi import iq_is_answered, iq_is_confident
35
from model import ImageQuery
46

57

6-
def test_iq_is_confident(gl_experimental: ExperimentalApi, initial_iq: ImageQuery):
7-
det = gl_experimental.get_or_create_detector("Test", "test_query")
8+
def test_iq_is_confident(gl_experimental: ExperimentalApi, initial_iq: ImageQuery, detector_name: Callable):
9+
det = gl_experimental.get_or_create_detector(detector_name(), "test_query")
810
iq = gl_experimental.ask_async(det, image="test/assets/dog.jpeg")
911
assert not iq_is_confident(iq, 0.9)
1012

1113
assert not iq_is_confident(initial_iq, 0.9)
1214

1315

14-
def test_iq_is_answered(gl_experimental: ExperimentalApi, initial_iq: ImageQuery):
15-
det = gl_experimental.get_or_create_detector("Test", "test_query")
16+
def test_iq_is_answered(gl_experimental: ExperimentalApi, initial_iq: ImageQuery, detector_name: Callable):
17+
det = gl_experimental.get_or_create_detector(detector_name(), "test_query")
1618
iq = gl_experimental.ask_async(det, image="test/assets/dog.jpeg")
1719
assert not iq_is_answered(iq)
1820

0 commit comments

Comments
 (0)