Add in-process WorkflowEnvironment testing harness

durable-workflow-ops · claude · durable-workflow-ops · commit b10ba6da3d18 · 2026-04-18T15:59:56.000Z
Closes zorporation/durable-workflow#393. New durable_workflow.testing module with: - WorkflowEnvironment: drives a workflow to completion in a single process, no server required. Resolves ScheduleActivity/StartTimer/ StartChildWorkflow/RecordSideEffect/UpsertSearchAttributes/ RecordVersionMarker commands against user-registered mocks. - register_activity_result() and register_activity() for canned or callable activity mocks. - register_child_workflow_result() for child workflow returns. - signal() to pre-queue signals that dispatch to @workflow.signal handlers before the first iteration. - replay_history() and replay_history_file() to replay a production history export (from Client.get_history) against current workflow code — standard pattern for regression-testing non-determinism. 12 unit tests cover single- and multi-activity pipelines, callable mocks, timer auto-fire, workflow failure surfacing, missing-mock errors, iteration limits, signal dispatch, and history-file replay in both list and dict-with-events shapes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/README.md b/README.md
@@ -77,6 +77,26 @@ result = yield ctx.schedule_activity(
 )
 ```
 
+Child workflow starts use the same retry policy shape and workflow-level
+execution/run timeout names:
+
+```python
+from durable_workflow import ChildWorkflowRetryPolicy
+
+receipt = yield ctx.start_child_workflow(
+    "payment.child",
+    [order],
+    retry_policy=ChildWorkflowRetryPolicy(
+        max_attempts=3,
+        initial_interval_seconds=2,
+        backoff_coefficient=2,
+        non_retryable_error_types=["ValidationError"],
+    ),
+    execution_timeout_seconds=600,
+    run_timeout_seconds=120,
+)
+```
+
 ## Features
 
 - **Async-first**: Built on `httpx` and `asyncio`
diff --git a/docs/index.md b/docs/index.md
@@ -18,7 +18,7 @@ pip install 'durable-workflow[prometheus]'
 
 - **[Client](reference/client.md)** — start workflows, signal, query, update, wait for results, manage schedules.
 - **[Worker](reference/worker.md)** — poll the server for workflow and activity tasks, dispatch to registered handlers.
-- **[Workflow](reference/workflow.md)** — workflow-side primitives: `ActivityRetryPolicy`, `ContinueAsNew`, `StartChildWorkflow`, and the workflow decorator.
+- **[Workflow](reference/workflow.md)** — workflow-side primitives: `ActivityRetryPolicy`, `ChildWorkflowRetryPolicy`, `ContinueAsNew`, `StartChildWorkflow`, and the workflow decorator.
 - **[Activity](reference/activity.md)** — activity decorator and execution context.
 - **[Errors](reference/errors.md)** — typed exceptions raised by the client and worker.
 - **[Retry policy](reference/retry_policy.md)** — HTTP transport retry configuration for the client.
diff --git a/docs/reference/testing.md b/docs/reference/testing.md
@@ -0,0 +1,3 @@
+# Testing
+
+::: durable_workflow.testing
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -72,6 +72,7 @@ nav:
       - Serializer: reference/serializer.md
       - Metrics: reference/metrics.md
       - Sync helpers: reference/sync.md
+      - Testing: reference/testing.md
 
 extra:
   social:
diff --git a/src/durable_workflow/__init__.py b/src/durable_workflow/__init__.py
@@ -6,7 +6,7 @@
 except PackageNotFoundError:  # source checkout without installed metadata
     __version__ = "0.0.0+unknown"
 
-from . import activity, sync, workflow
+from . import activity, sync, testing, workflow
 from .activity import ActivityContext, ActivityInfo
 from .client import (
     Client,
@@ -48,14 +48,15 @@
 )
 from .retry_policy import RetryPolicy, TransportRetryPolicy
 from .worker import Worker
-from .workflow import ActivityRetryPolicy, ContinueAsNew, StartChildWorkflow
+from .workflow import ActivityRetryPolicy, ChildWorkflowRetryPolicy, ContinueAsNew, StartChildWorkflow
 
 __all__ = [
     "__version__",
     "ActivityCancelled",
     "ActivityContext",
     "ActivityInfo",
     "ActivityRetryPolicy",
+    "ChildWorkflowRetryPolicy",
     "ChildWorkflowFailed",
     "Client",
     "ContinueAsNew",
@@ -76,6 +77,7 @@
     "WorkflowList",
     "activity",
     "sync",
+    "testing",
     "workflow",
     "DurableWorkflowError",
     "InvalidArgument",
diff --git a/src/durable_workflow/testing.py b/src/durable_workflow/testing.py
@@ -0,0 +1,267 @@
+"""In-process test harness for workflow authoring.
+
+:class:`WorkflowEnvironment` drives a workflow to completion in a single
+Python process, without a running server or worker. It reuses the same
+:func:`durable_workflow.workflow.replay` machinery the worker uses, but
+resolves yielded commands against user-registered activity mocks and
+auto-fires timers / side-effects / search-attribute upserts so tests do
+not need a real clock or Redis.
+
+Typical use::
+
+    def test_my_workflow():
+        env = WorkflowEnvironment()
+        env.register_activity_result("charge_card", {"id": "ch_1"})
+        env.register_activity_result("send_receipt", None)
+        result = env.execute_workflow(OrderWorkflow, "order-1", {"amount": 42})
+        assert result == {"status": "complete", "charge_id": "ch_1"}
+
+For regression-testing workflow code against production histories, use
+:func:`replay_history` — it hands the real durable history straight to
+the worker's replayer and surfaces any non-determinism as a raised
+exception.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable, Iterable
+from pathlib import Path
+from typing import Any
+
+from . import serializer
+from .errors import WorkflowCancelled, WorkflowFailed, WorkflowTerminated
+from .workflow import (
+    CompleteWorkflow,
+    ContinueAsNew,
+    FailWorkflow,
+    RecordSideEffect,
+    RecordVersionMarker,
+    ReplayOutcome,
+    ScheduleActivity,
+    StartChildWorkflow,
+    StartTimer,
+    UpsertSearchAttributes,
+    replay,
+)
+
+
+class WorkflowEnvironment:
+    """Drives a workflow to completion against user-registered activity mocks."""
+
+    def __init__(self, *, iteration_limit: int = 1000) -> None:
+        self._activity_results: dict[str, Any] = {}
+        self._activity_fns: dict[str, Callable[..., Any]] = {}
+        self._child_workflow_results: dict[str, Any] = {}
+        self._pending_signals: list[tuple[str, list[Any]]] = []
+        self._iteration_limit = iteration_limit
+
+    def register_activity_result(self, name: str, result: Any) -> None:
+        """Canned response: every call to ``name`` returns ``result``."""
+        self._activity_results[name] = result
+
+    def register_activity(self, name: str, fn: Callable[..., Any]) -> None:
+        """Callable mock: ``fn(*arguments)`` is invoked for each scheduled call.
+
+        Use this when the test needs the mock to vary with arguments (e.g.
+        look up by order id) or to capture invocations.
+        """
+        self._activity_fns[name] = fn
+
+    def register_child_workflow_result(self, workflow_type: str, result: Any) -> None:
+        """Canned response for child workflow completions."""
+        self._child_workflow_results[workflow_type] = result
+
+    def signal(self, name: str, args: list[Any] | None = None) -> None:
+        """Queue a signal to be delivered before the next iteration.
+
+        Signals are drained in the order they were queued and injected into
+        the workflow history as ``SignalReceived`` events; the replayer then
+        dispatches each to its registered ``@workflow.signal`` handler.
+        """
+        self._pending_signals.append((name, list(args) if args is not None else []))
+
+    def execute_workflow(
+        self,
+        workflow_cls: type,
+        *args: Any,
+        run_id: str = "test-run",
+    ) -> Any:
+        """Drive ``workflow_cls`` to a terminal state and return its result.
+
+        Raises :class:`~durable_workflow.errors.WorkflowFailed` if the workflow
+        ended in the ``failed`` state. Activities that do not have a
+        registered mock raise :class:`KeyError` so tests fail loudly on
+        missing fixtures.
+        """
+        history: list[dict[str, Any]] = []
+
+        for _ in range(self._iteration_limit):
+            self._drain_pending_signals_into(history)
+            outcome = replay(workflow_cls, history, list(args), run_id=run_id)
+            terminal = self._apply_commands(outcome, history)
+            if terminal is not _SENTINEL:
+                return terminal
+
+        raise RuntimeError(
+            f"workflow did not terminate within {self._iteration_limit} iterations; "
+            "check for missing activity mocks or signals that never satisfy a wait."
+        )
+
+    def _drain_pending_signals_into(self, history: list[dict[str, Any]]) -> None:
+        while self._pending_signals:
+            name, sig_args = self._pending_signals.pop(0)
+            history.append(
+                {
+                    "event_type": "SignalReceived",
+                    "payload": {
+                        "signal_name": name,
+                        "value": serializer.envelope(sig_args),
+                        "payload_codec": serializer.AVRO_CODEC,
+                    },
+                }
+            )
+
+    def _apply_commands(
+        self, outcome: ReplayOutcome, history: list[dict[str, Any]]
+    ) -> Any:
+        for cmd in outcome.commands:
+            if isinstance(cmd, CompleteWorkflow):
+                return cmd.result
+            if isinstance(cmd, FailWorkflow):
+                raise WorkflowFailed(cmd.message, cmd.exception_type)
+            if isinstance(cmd, ContinueAsNew):
+                raise NotImplementedError(
+                    "continue_as_new is not yet supported by the test harness; "
+                    "drive each run explicitly with a separate execute_workflow call."
+                )
+            if isinstance(cmd, ScheduleActivity):
+                history.append(self._resolve_activity(cmd))
+            elif isinstance(cmd, StartTimer):
+                history.append({"event_type": "TimerFired", "payload": {}})
+            elif isinstance(cmd, StartChildWorkflow):
+                history.append(self._resolve_child_workflow(cmd))
+            elif isinstance(cmd, RecordSideEffect):
+                history.append(
+                    {
+                        "event_type": "SideEffectRecorded",
+                        "payload": {
+                            "result": serializer.envelope(cmd.result),
+                            "payload_codec": serializer.AVRO_CODEC,
+                        },
+                    }
+                )
+            elif isinstance(cmd, UpsertSearchAttributes):
+                history.append(
+                    {"event_type": "SearchAttributesUpserted", "payload": {}}
+                )
+            elif isinstance(cmd, RecordVersionMarker):
+                history.append(
+                    {
+                        "event_type": "VersionMarkerRecorded",
+                        "payload": {"version": cmd.version},
+                    }
+                )
+            else:
+                raise TypeError(f"unsupported command in test harness: {cmd!r}")
+        return _SENTINEL
+
+    def _resolve_activity(self, cmd: ScheduleActivity) -> dict[str, Any]:
+        if cmd.activity_type in self._activity_fns:
+            result = self._activity_fns[cmd.activity_type](*cmd.arguments)
+        elif cmd.activity_type in self._activity_results:
+            result = self._activity_results[cmd.activity_type]
+        else:
+            raise KeyError(
+                f"no mock registered for activity {cmd.activity_type!r}; "
+                "call env.register_activity_result() or env.register_activity()."
+            )
+        return {
+            "event_type": "ActivityCompleted",
+            "payload": {
+                "result": serializer.envelope(result),
+                "payload_codec": serializer.AVRO_CODEC,
+            },
+        }
+
+    def _resolve_child_workflow(self, cmd: StartChildWorkflow) -> dict[str, Any]:
+        if cmd.workflow_type not in self._child_workflow_results:
+            raise KeyError(
+                f"no mock registered for child workflow {cmd.workflow_type!r}; "
+                "call env.register_child_workflow_result()."
+            )
+        return {
+            "event_type": "ChildRunCompleted",
+            "payload": {
+                "result": serializer.envelope(self._child_workflow_results[cmd.workflow_type]),
+                "payload_codec": serializer.AVRO_CODEC,
+            },
+        }
+
+
+# Sentinel marking "no terminal command seen this iteration".
+_SENTINEL = object()
+
+
+def replay_history(
+    workflow_cls: type,
+    history_events: Iterable[dict[str, Any]],
+    start_input: list[Any] | None = None,
+    *,
+    run_id: str = "",
+    payload_codec: str | None = None,
+) -> ReplayOutcome:
+    """Replay a production history against current workflow code.
+
+    Hands the durable history directly to the worker's replayer. Raises any
+    exception the workflow would raise during replay — for example a
+    non-determinism failure when ``run`` yields a different command sequence
+    from the one recorded in history.
+
+    This is the supported way to regression-test a workflow change against
+    real production traffic: dump the history from ``Client.get_history``,
+    save the JSON, and replay it on every PR.
+    """
+    return replay(
+        workflow_cls,
+        history_events,
+        list(start_input or []),
+        run_id=run_id,
+        payload_codec=payload_codec,
+    )
+
+
+def replay_history_file(
+    workflow_cls: type,
+    path: str | Path,
+    start_input: list[Any] | None = None,
+    *,
+    run_id: str = "",
+    payload_codec: str | None = None,
+) -> ReplayOutcome:
+    """Convenience wrapper: load a JSON history file and replay it.
+
+    Accepts either a list of events at the top level or a dict with an
+    ``events`` key (matching the shape of ``Client.get_history``).
+    """
+    data = json.loads(Path(path).read_text())
+    events = data["events"] if isinstance(data, dict) else data
+    return replay_history(
+        workflow_cls,
+        events,
+        start_input,
+        run_id=run_id,
+        payload_codec=payload_codec,
+    )
+
+
+__all__ = [
+    "WorkflowEnvironment",
+    "replay_history",
+    "replay_history_file",
+]
+
+
+# Re-export terminal exceptions the harness may raise so tests can catch
+# them without hunting for the right import path.
+_TERMINAL_EXCEPTIONS = (WorkflowFailed, WorkflowCancelled, WorkflowTerminated)
diff --git a/src/durable_workflow/workflow.py b/src/durable_workflow/workflow.py
diff --git a/tests/test_replay.py b/tests/test_replay.py
diff --git a/tests/test_testing_harness.py b/tests/test_testing_harness.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Testing`
	`2`	`+`
	`3`	`+::: durable_workflow.testing`