Skip to content

Commit d89bc10

Browse files
Bernd VerstCopilot
andcommitted
Reset sync client long-poll failure tracking
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 09697bb commit d89bc10

3 files changed

Lines changed: 23 additions & 9 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@ ADDED
2222
- Added optional `resiliency_options` parameters to `TaskHubGrpcClient`,
2323
`AsyncTaskHubGrpcClient`, and `TaskHubGrpcWorker` so applications can pass
2424
gRPC resiliency settings through constructor APIs.
25-
- Added `get_orchestration_history()` and `list_instance_ids()` to the sync and async gRPC clients.
26-
- Added in-memory backend support for `StreamInstanceHistory` and `ListInstanceIds` so local orchestration tests can retrieve history and page terminal instance IDs by completion window.
25+
- Added `get_orchestration_history()` and `list_instance_ids()` to the sync
26+
and async gRPC clients.
27+
- Added in-memory backend support for `StreamInstanceHistory` and
28+
`ListInstanceIds` so local orchestration tests can retrieve history and page
29+
terminal instance IDs by completion window.
2730

2831
FIXED
2932

@@ -32,8 +35,9 @@ FIXED
3235
SDK-owned channels are cleaned up on shutdown and full resets, and
3336
caller-owned channels are never recreated or closed during worker reconnects.
3437
- Fixed sync `TaskHubGrpcClient` transport resiliency so SDK-owned channels are
35-
recreated after repeated transport failures without counting long-poll
36-
timeout deadlines against the recreation threshold.
38+
recreated after repeated transport failures while long-poll timeout
39+
deadlines, successful replies, and application-level RPC errors reset the
40+
failure tracker.
3741

3842
## v1.4.0
3943

durabletask/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def _invoke_unary(
234234
should_recreate = self._client_failure_tracker.record_failure()
235235
if should_recreate:
236236
self._maybe_recreate_channel()
237-
elif status_code != grpc.StatusCode.DEADLINE_EXCEEDED:
237+
else:
238238
self._client_failure_tracker.record_success()
239239
raise
240240
else:

tests/durabletask/test_client.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,20 @@ def test_sync_client_recreates_sdk_owned_channel_after_repeated_unavailable():
359359
timer.start.assert_called_once_with()
360360

361361

362-
def test_sync_client_does_not_count_long_poll_deadline():
362+
@pytest.mark.parametrize(
363+
("stub_method_name", "client_method_name"),
364+
[
365+
("WaitForInstanceStart", "wait_for_orchestration_start"),
366+
("WaitForInstanceCompletion", "wait_for_orchestration_completion"),
367+
],
368+
)
369+
def test_sync_client_resets_failure_tracking_after_long_poll_deadline(
370+
stub_method_name: str,
371+
client_method_name: str,
372+
):
363373
stub = MagicMock()
364374
stub.GetInstance.side_effect = FakeRpcError(grpc.StatusCode.UNAVAILABLE)
365-
stub.WaitForInstanceStart.side_effect = FakeRpcError(grpc.StatusCode.DEADLINE_EXCEEDED)
375+
getattr(stub, stub_method_name).side_effect = FakeRpcError(grpc.StatusCode.DEADLINE_EXCEEDED)
366376

367377
with patch("durabletask.client.shared.get_grpc_channel", return_value=MagicMock()), patch(
368378
"durabletask.client.stubs.TaskHubSidecarServiceStub", return_value=stub
@@ -373,8 +383,8 @@ def test_sync_client_does_not_count_long_poll_deadline():
373383
with pytest.raises(FakeRpcError):
374384
client.get_orchestration_state("abc")
375385
with pytest.raises(TimeoutError):
376-
client.wait_for_orchestration_start("abc")
377-
assert client._client_failure_tracker.consecutive_failures == 1
386+
getattr(client, client_method_name)("abc")
387+
assert client._client_failure_tracker.consecutive_failures == 0
378388

379389

380390
def test_sync_client_does_not_recreate_caller_owned_channel():

0 commit comments

Comments
 (0)