HumeAI · fern-api · Apr 24, 2026
diff --git a/.fern/metadata.json b/.fern/metadata.json
@@ -1,5 +1,5 @@
 {
-  "cliVersion": "4.62.5",
+  "cliVersion": "4.65.2",
   "generatorName": "fernapi/fern-python-sdk",
   "generatorVersion": "4.61.0",
   "generatorConfig": {
@@ -64,6 +64,6 @@
       }
     ]
   },
-  "originGitCommit": "a07a0deaa640e8b286f9c8e4e47426b37a083a67",
-  "sdkVersion": "0.13.11"
+  "originGitCommit": "689cf69b64d5e981ce706b580bc2d669fab8ebc2",
+  "sdkVersion": "0.13.12"
 }
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ dynamic = ["version"]
 
 [tool.poetry]
 name = "hume"
-version = "0.13.11"
+version = "0.13.12"
 description = "A Python SDK for Hume AI"
 readme = "README.md"
 authors = []

diff --git a/src/hume/core/client_wrapper.py b/src/hume/core/client_wrapper.py
@@ -28,12 +28,12 @@ def get_headers(self) -> typing.Dict[str, str]:
         import platform
 
         headers: typing.Dict[str, str] = {
-            "User-Agent": "hume/0.13.11",
+            "User-Agent": "hume/0.13.12",
             "X-Fern-Language": "Python",
             "X-Fern-Runtime": f"python/{platform.python_version()}",
             "X-Fern-Platform": f"{platform.system().lower()}/{platform.release()}",
             "X-Fern-SDK-Name": "hume",
-            "X-Fern-SDK-Version": "0.13.11",
+            "X-Fern-SDK-Version": "0.13.12",
             **(self.get_custom_headers() or {}),
         }
         if self.api_key is not None:

diff --git a/src/hume/tts/__init__.py b/src/hume/tts/__init__.py
@@ -9,6 +9,7 @@
     from .types import (
         AudioEncoding,
         AudioFormatType,
+        Context,
         ErrorResponse,
         Format,
         FormatMp3,
@@ -21,6 +22,7 @@
         PostedContextWithGenerationId,
         PostedContextWithUtterances,
         PostedTts,
+        PostedTtsStream,
         PostedUtterance,
         PostedUtteranceVoice,
         PostedUtteranceVoiceWithId,
@@ -46,6 +48,7 @@
     "AudioEncoding": ".types",
     "AudioFormatType": ".types",
     "BadRequestError": ".errors",
+    "Context": ".types",
     "ErrorResponse": ".types",
     "Format": ".types",
     "FormatMp3": ".types",
@@ -58,6 +61,7 @@
     "PostedContextWithGenerationId": ".types",
     "PostedContextWithUtterances": ".types",
     "PostedTts": ".types",
+    "PostedTtsStream": ".types",
     "PostedUtterance": ".types",
     "PostedUtteranceVoice": ".types",
     "PostedUtteranceVoiceWithId": ".types",
@@ -107,6 +111,7 @@ def __dir__():
     "AudioEncoding",
     "AudioFormatType",
     "BadRequestError",
+    "Context",
     "ErrorResponse",
     "Format",
     "FormatMp3",
@@ -119,6 +124,7 @@ def __dir__():
     "PostedContextWithGenerationId",
     "PostedContextWithUtterances",
     "PostedTts",
+    "PostedTtsStream",
     "PostedUtterance",
     "PostedUtteranceVoice",
     "PostedUtteranceVoiceWithId",

diff --git a/src/hume/tts/raw_client.py b/src/hume/tts/raw_client.py
diff --git a/src/hume/tts/types/__init__.py b/src/hume/tts/types/__init__.py
@@ -8,6 +8,7 @@
 if typing.TYPE_CHECKING:
     from .audio_encoding import AudioEncoding
     from .audio_format_type import AudioFormatType
+    from .context import Context
     from .error_response import ErrorResponse
     from .format import Format
     from .format_mp_3 import FormatMp3
@@ -20,6 +21,7 @@
     from .posted_context_with_generation_id import PostedContextWithGenerationId
     from .posted_context_with_utterances import PostedContextWithUtterances
     from .posted_tts import PostedTts
+    from .posted_tts_stream import PostedTtsStream
     from .posted_utterance import PostedUtterance
     from .posted_utterance_voice import PostedUtteranceVoice
     from .posted_utterance_voice_with_id import PostedUtteranceVoiceWithId
@@ -41,6 +43,7 @@
 _dynamic_imports: typing.Dict[str, str] = {
     "AudioEncoding": ".audio_encoding",
     "AudioFormatType": ".audio_format_type",
+    "Context": ".context",
     "ErrorResponse": ".error_response",
     "Format": ".format",
     "FormatMp3": ".format_mp_3",
@@ -53,6 +56,7 @@
     "PostedContextWithGenerationId": ".posted_context_with_generation_id",
     "PostedContextWithUtterances": ".posted_context_with_utterances",
     "PostedTts": ".posted_tts",
+    "PostedTtsStream": ".posted_tts_stream",
     "PostedUtterance": ".posted_utterance",
     "PostedUtteranceVoice": ".posted_utterance_voice",
     "PostedUtteranceVoiceWithId": ".posted_utterance_voice_with_id",
@@ -98,6 +102,7 @@ def __dir__():
 __all__ = [
     "AudioEncoding",
     "AudioFormatType",
+    "Context",
     "ErrorResponse",
     "Format",
     "FormatMp3",
@@ -110,6 +115,7 @@ def __dir__():
     "PostedContextWithGenerationId",
     "PostedContextWithUtterances",
     "PostedTts",
+    "PostedTtsStream",
     "PostedUtterance",
     "PostedUtteranceVoice",
     "PostedUtteranceVoiceWithId",

diff --git a/src/hume/tts/types/context.py b/src/hume/tts/types/context.py
@@ -0,0 +1,8 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from .posted_context_with_generation_id import PostedContextWithGenerationId
+from .posted_context_with_utterances import PostedContextWithUtterances
+
+Context = typing.Union[PostedContextWithGenerationId, PostedContextWithUtterances]
diff --git a/src/hume/tts/types/posted_tts.py b/src/hume/tts/types/posted_tts.py
@@ -66,8 +66,6 @@ class PostedTts(UniversalBaseModel):
     For a comparison of Octave versions, see the [Octave versions](/docs/text-to-speech-tts/overview#octave-versions) section in the TTS overview.
     """
 
-    instant_mode: typing.Optional[bool] = None
-
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:

diff --git a/src/hume/tts/types/posted_tts_stream.py b/src/hume/tts/types/posted_tts_stream.py
@@ -0,0 +1,84 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+from .context import Context
+from .format import Format
+from .octave_version import OctaveVersion
+from .posted_utterance import PostedUtterance
+from .timestamp_type import TimestampType
+
+
+class PostedTtsStream(UniversalBaseModel):
+    context: typing.Optional[Context] = pydantic.Field(default=None)
+    """
+    Utterances to use as context for generating consistent speech style and prosody across multiple requests. These will not be converted to speech output.
+    """
+
+    format: typing.Optional[Format] = pydantic.Field(default=None)
+    """
+    Specifies the output audio file format.
+    """
+
+    include_timestamp_types: typing.Optional[typing.List[TimestampType]] = pydantic.Field(default=None)
+    """
+    The set of timestamp types to include in the response. Only supported for Octave 2 requests.
+    """
+
+    instant_mode: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Enables ultra-low latency streaming, significantly reducing the time until the first audio chunk is received. Recommended for real-time applications requiring immediate audio playback. For further details, see our documentation on [instant mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode). 
+    - A [voice](/reference/text-to-speech-tts/synthesize-json-streaming#request.body.utterances.voice) must be specified when instant mode is enabled. Dynamic voice generation is not supported with this mode.
+    - Instant mode is only supported for streaming endpoints (e.g., [/v0/tts/stream/json](/reference/text-to-speech-tts/synthesize-json-streaming), [/v0/tts/stream/file](/reference/text-to-speech-tts/synthesize-file-streaming)).
+    - Ensure only a single generation is requested ([num_generations](/reference/text-to-speech-tts/synthesize-json-streaming#request.body.num_generations) must be `1` or omitted).
+    """
+
+    num_generations: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Number of audio generations to produce from the input utterances.
+
+    Using `num_generations` enables faster processing than issuing multiple sequential requests. Additionally, specifying `num_generations` allows prosody continuation across all generations without repeating context, ensuring each generation sounds slightly different while maintaining contextual consistency.
+    """
+
+    split_utterances: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Controls how audio output is segmented in the response.
+
+    - When **enabled** (`true`), input utterances are automatically split into natural-sounding speech segments.
+
+    - When **disabled** (`false`), the response maintains a strict one-to-one mapping between input utterances and output snippets. 
+
+    This setting affects how the `snippets` array is structured in the response, which may be important for applications that need to track the relationship between input text and generated audio segments. When setting to `false`, avoid including utterances with long `text`, as this can result in distorted output.
+    """
+
+    strip_headers: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    If enabled, the audio for all the chunks of a generation, once concatenated together, will constitute a single audio file. Otherwise, if disabled, each chunk's audio will be its own audio file, each with its own headers (if applicable).
+    """
+
+    utterances: typing.List[PostedUtterance] = pydantic.Field()
+    """
+    A list of **Utterances** to be converted to speech output.
+
+    An **Utterance** is a unit of input for [Octave](/docs/text-to-speech-tts/overview), and includes input `text`, an optional `description` to serve as the prompt for how the speech should be delivered, an optional `voice` specification, and additional controls to guide delivery for `speed` and `trailing_silence`.
+    """
+
+    version: typing.Optional[OctaveVersion] = pydantic.Field(default=None)
+    """
+    Selects the Octave model version used to synthesize speech for this request. If you omit this field, Hume automatically routes the request to the most appropriate model. Setting a specific version ensures stable and repeatable behavior across requests.
+
+    Use `2` to opt into the latest Octave capabilities. When you specify version `2`, you must also provide a `voice`. Requests that set `version: 2` without a voice will be rejected.
+
+    For a comparison of Octave versions, see the [Octave versions](/docs/text-to-speech-tts/overview#octave-versions) section in the TTS overview.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/tests/wire/test_empathicVoice_tools.py b/tests/wire/test_empathicVoice_tools.py
@@ -73,8 +73,8 @@ def test_empathicVoice_tools_delete_tool_version() -> None:
     """Test delete-tool-version endpoint with WireMock"""
     test_id = "empathic_voice.tools.delete_tool_version.0"
     client = get_client(test_id)
-    client.empathic_voice.tools.delete_tool_version(id="your-tool-id", version=1)
-    verify_request_count(test_id, "DELETE", "/v0/evi/tools/your-tool-id/version/1", None, 1)
+    client.empathic_voice.tools.delete_tool_version(id="", version=1)
+    verify_request_count(test_id, "DELETE", "/v0/evi/tools/{id}/version/1", None, 1)
 
 
 def test_empathicVoice_tools_update_tool_description() -> None: