samples: workflow_streams: rename chat -> llm in scenario 5

jssmith · jssmith · commit 81bf6051124c · 2026-05-02T18:56:34.000-07:00
"Chat" implies multi-turn conversation. The new scenario is a
one-shot LLM completion stream, not a chat. Rename to make the
scope clear:

- chat_shared.py             -&gt; llm_shared.py
- workflows/chat_workflow.py -&gt; workflows/llm_workflow.py
- activities/chat_activity.py -&gt; activities/llm_activity.py
- run_chat.py                -&gt; run_llm.py
- run_chat_worker.py         -&gt; run_llm_worker.py
- ChatInput / ChatWorkflow   -&gt; LLMInput / LLMWorkflow
- CHAT_TASK_QUEUE            -&gt; LLM_TASK_QUEUE
  ("workflow-stream-chat-task-queue" -&gt; "workflow-stream-llm-task-queue")
- chat-stream extra          -&gt; llm-stream
- workflow id prefix
  workflow-stream-chat-...   -&gt; workflow-stream-llm-...

The activity's `stream_completion` defn name and the topic
constants (`delta`, `complete`, `retry`) stay the same — those
already describe what they do without the "chat" framing.
README, docstrings, and run instructions updated to match.
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,7 +48,7 @@ openai-agents = [
 pydantic-converter = ["pydantic>=2.10.6,<3"]
 sentry = ["sentry-sdk>=2.13.0"]
 trio-async = ["trio>=0.28.0,<0.29", "trio-asyncio>=0.15.0,<0.16"]
-chat-stream = ["openai>=1.0"]
+llm-stream = ["openai>=1.0"]
 cloud-export-to-parquet = [
     "pandas>=2.2.2,<3 ; python_version >= '3.10' and python_version < '4.0'",
     "numpy>=1.26.0,<2 ; python_version >= '3.10' and python_version < '3.13'",
diff --git a/workflow_streams/README.md b/workflow_streams/README.md
@@ -71,49 +71,49 @@ it's separate).
 
 ## Scenario 5 — LLM streaming
 
-* `workflows/chat_workflow.py` — a workflow that hosts a
+* `workflows/llm_workflow.py` — a workflow that hosts a
   `WorkflowStream` and runs `stream_completion` as a single activity.
   The workflow itself does no streaming; the activity owns the
   non-deterministic OpenAI call.
-* `activities/chat_activity.py` — calls
+* `activities/llm_activity.py` — calls
   `openai.AsyncOpenAI().chat.completions.create(stream=True)`,
   publishes each token chunk as a `TextDelta` on the `delta` topic,
   the final accumulated text on the `complete` topic, and a
   `RetryEvent` on the `retry` topic when running on attempt > 1.
-* `run_chat.py` — subscribes to all three topics, renders deltas to
+* `run_llm.py` — subscribes to all three topics, renders deltas to
   the terminal as they arrive, and on a `retry` event uses ANSI
   escapes to rewind the printed output before the retried attempt
   starts re-publishing.
-* `run_chat_worker.py` — separate worker on its own task queue
-  (`workflow-stream-chat-task-queue`), registering only `ChatWorkflow`
+* `run_llm_worker.py` — separate worker on its own task queue
+  (`workflow-stream-llm-task-queue`), registering only `LLMWorkflow`
   and `stream_completion`. This isolates the `openai` dependency and
   the `OPENAI_API_KEY` requirement to this one scenario.
 
 This scenario is split out for two reasons. First, it needs an extra
 dependency (`openai`) and a secret (`OPENAI_API_KEY`) — putting it on
 the main worker would force every other scenario to set up an OpenAI
-key. Second, killing the chat worker mid-stream is the easiest way to
+key. Second, killing the LLM worker mid-stream is the easiest way to
 demonstrate retry handling, and you don't want the same `Ctrl-C` to
 interrupt the other four scenarios' worker.
 
 Setup:
 
 ```bash
-uv sync --group chat-stream
+uv sync --group llm-stream
 export OPENAI_API_KEY=...
 ```
 
 Run:
 
 ```bash
-# Terminal 1: chat worker (its own task queue)
-uv run workflow_streams/run_chat_worker.py
+# Terminal 1: LLM worker (its own task queue)
+uv run workflow_streams/run_llm_worker.py
 
 # Terminal 2:
-uv run workflow_streams/run_chat.py
+uv run workflow_streams/run_llm.py
 ```
 
-To trigger the retry path, kill the chat worker in Terminal 1
+To trigger the retry path, kill the LLM worker in Terminal 1
 (`Ctrl-C`) while output is streaming, then start it again. The
 activity's next attempt sends a `RetryEvent` first; the consumer
 clears its on-screen output via ANSI escapes and re-renders from
diff --git a/workflow_streams/activities/llm_activity.py b/workflow_streams/activities/llm_activity.py
@@ -6,20 +6,20 @@
 from temporalio import activity
 from temporalio.contrib.workflow_streams import WorkflowStreamClient
 
-from workflow_streams.chat_shared import (
+from workflow_streams.llm_shared import (
     TOPIC_COMPLETE,
     TOPIC_DELTA,
     TOPIC_RETRY,
-    ChatInput,
+    LLMInput,
     RetryEvent,
     TextComplete,
     TextDelta,
 )
 
 
 @activity.defn
-async def stream_completion(input: ChatInput) -> str:
-    """Stream a chat completion to the parent workflow's stream.
+async def stream_completion(input: LLMInput) -> str:
+    """Stream an LLM completion to the parent workflow's stream.
 
     Activity-as-publisher: each delta from the OpenAI streaming API is
     pushed to the workflow's stream as a ``TextDelta`` event on the
diff --git a/workflow_streams/llm_shared.py b/workflow_streams/llm_shared.py
@@ -1,8 +1,8 @@
 """Types and constants for the LLM-streaming scenario.
 
 Kept separate from ``shared.py`` because the other scenarios don't
-use these — and the chat scenario runs on its own worker and task
-queue so the ``openai`` dependency stays out of everyone else's path.
+use these — and this scenario runs on its own worker and task queue
+so the ``openai`` dependency stays out of everyone else's path.
 """
 
 from __future__ import annotations
@@ -11,9 +11,9 @@
 
 from temporalio.contrib.workflow_streams import WorkflowStreamState
 
-# Scenario 5 (LLM streaming) runs on its own worker so the openai
-# dependency only matters for that scenario.
-CHAT_TASK_QUEUE = "workflow-stream-chat-task-queue"
+# Scenario 5 runs on its own worker so the openai dependency only
+# matters for that scenario.
+LLM_TASK_QUEUE = "workflow-stream-llm-task-queue"
 
 # Topics published by the activity.
 TOPIC_DELTA = "delta"
@@ -22,7 +22,7 @@
 
 
 @dataclass
-class ChatInput:
+class LLMInput:
     prompt: str
     model: str = "gpt-5-mini"
     # Carries stream state across continue-as-new. None on a fresh start.
diff --git a/workflow_streams/run_llm.py b/workflow_streams/run_llm.py
@@ -1,24 +1,24 @@
 """Stream LLM output to the terminal, handling retries.
 
-Starts a ``ChatWorkflow``, subscribes to its delta / complete / retry
+Starts an ``LLMWorkflow``, subscribes to its delta / complete / retry
 topics, and renders the model's output to stdout as it arrives. On a
 ``RETRY`` event (the activity is on attempt > 1), the consumer rewinds
 its rendered output with ANSI escapes and starts fresh — so a killed
 worker doesn't leave a half-finished response stuck on screen
 followed by the retried attempt's full output.
 
-Requires ``OPENAI_API_KEY`` in the environment and the ``chat-stream``
+Requires ``OPENAI_API_KEY`` in the environment and the ``llm-stream``
 extra::
 
-    uv sync --group chat-stream
+    uv sync --group llm-stream
     export OPENAI_API_KEY=...
 
-Run the chat worker first (``uv run workflow_streams/run_chat_worker.py``),
+Run the LLM worker first (``uv run workflow_streams/run_llm_worker.py``),
 then::
 
-    uv run workflow_streams/run_chat.py
+    uv run workflow_streams/run_llm.py
 
-To see retry handling in action, kill the chat worker mid-stream
+To see retry handling in action, kill the LLM worker mid-stream
 (Ctrl-C in its terminal) and start it again. The consumer will clear
 its accumulated output on the ``RETRY`` event and re-render the
 retried attempt's output from scratch.
@@ -34,17 +34,17 @@
 from temporalio.common import RawValue
 from temporalio.contrib.workflow_streams import WorkflowStreamClient
 
-from workflow_streams.chat_shared import (
-    CHAT_TASK_QUEUE,
+from workflow_streams.llm_shared import (
+    LLM_TASK_QUEUE,
     TOPIC_COMPLETE,
     TOPIC_DELTA,
     TOPIC_RETRY,
-    ChatInput,
+    LLMInput,
     RetryEvent,
     TextComplete,
     TextDelta,
 )
-from workflow_streams.workflows.chat_workflow import ChatWorkflow
+from workflow_streams.workflows.llm_workflow import LLMWorkflow
 
 # Long enough that you can comfortably kill the worker mid-stream and
 # watch the retry render. Adjust to taste.
@@ -71,20 +71,20 @@ async def main() -> None:
     client = await Client.connect("localhost:7233")
     converter = client.data_converter.payload_converter
 
-    workflow_id = f"workflow-stream-chat-{uuid.uuid4().hex[:8]}"
-    chat_input = ChatInput(prompt=DEFAULT_PROMPT)
+    workflow_id = f"workflow-stream-llm-{uuid.uuid4().hex[:8]}"
+    llm_input = LLMInput(prompt=DEFAULT_PROMPT)
     handle = await client.start_workflow(
-        ChatWorkflow.run,
-        chat_input,
+        LLMWorkflow.run,
+        llm_input,
         id=workflow_id,
-        task_queue=CHAT_TASK_QUEUE,
+        task_queue=LLM_TASK_QUEUE,
     )
 
     # Print a header so the user sees something immediately. The
     # response will start streaming below it once the first delta
     # arrives — until then this is the only line on screen.
     print(
-        f"[chat {workflow_id}] streaming response from {chat_input.model}, "
+        f"[llm {workflow_id}] streaming response from {llm_input.model}, "
         f"awaiting first token..."
     )
     print()
diff --git a/workflow_streams/run_llm_worker.py b/workflow_streams/run_llm_worker.py
@@ -5,7 +5,7 @@
 scenario. Different task queue too — the other four samples won't
 route work to this worker.
 
-Kill this worker mid-stream while ``run_chat.py`` is running to
+Kill this worker mid-stream while ``run_llm.py`` is running to
 trigger a retry: Temporal restarts the activity on the next worker
 to come up, the activity publishes a ``RetryEvent`` on its second
 attempt, and the consumer resets its rendered output.
@@ -19,18 +19,18 @@
 from temporalio.client import Client
 from temporalio.worker import Worker
 
-from workflow_streams.activities.chat_activity import stream_completion
-from workflow_streams.chat_shared import CHAT_TASK_QUEUE
-from workflow_streams.workflows.chat_workflow import ChatWorkflow
+from workflow_streams.activities.llm_activity import stream_completion
+from workflow_streams.llm_shared import LLM_TASK_QUEUE
+from workflow_streams.workflows.llm_workflow import LLMWorkflow
 
 
 async def main() -> None:
     logging.basicConfig(level=logging.INFO)
     client = await Client.connect("localhost:7233")
     worker = Worker(
         client,
-        task_queue=CHAT_TASK_QUEUE,
-        workflows=[ChatWorkflow],
+        task_queue=LLM_TASK_QUEUE,
+        workflows=[LLMWorkflow],
         activities=[stream_completion],
     )
     await worker.run()
diff --git a/workflow_streams/workflows/llm_workflow.py b/workflow_streams/workflows/llm_workflow.py
@@ -6,14 +6,14 @@
 from temporalio.common import RetryPolicy
 from temporalio.contrib.workflow_streams import WorkflowStream
 
-from workflow_streams.chat_shared import ChatInput
+from workflow_streams.llm_shared import LLMInput
 
 with workflow.unsafe.imports_passed_through():
-    from workflow_streams.activities.chat_activity import stream_completion
+    from workflow_streams.activities.llm_activity import stream_completion
 
 
 @workflow.defn
-class ChatWorkflow:
+class LLMWorkflow:
     """Wrapper for an LLM-streaming activity.
 
     The workflow does no streaming of its own; it hosts the
@@ -26,18 +26,18 @@ class ChatWorkflow:
     retries it (up to ``max_attempts``); the retried attempt
     re-publishes from the start, so the consumer must reset on the
     activity's ``RETRY`` event. See
-    `activities/chat_activity.py` and `run_chat.py`.
+    `activities/llm_activity.py` and `run_llm.py`.
     """
 
     @workflow.init
-    def __init__(self, input: ChatInput) -> None:
+    def __init__(self, input: LLMInput) -> None:
         # Construct the stream from `@workflow.init` so the
         # publish-Signal handler is registered before any external
         # publisher (the activity, here) tries to publish.
         self.stream = WorkflowStream(prior_state=input.stream_state)
 
     @workflow.run
-    async def run(self, input: ChatInput) -> str:
+    async def run(self, input: LLMInput) -> str:
         result = await workflow.execute_activity(
             stream_completion,
             input,