-
Notifications
You must be signed in to change notification settings - Fork 2.7k
update inference models to match the latest #4597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,9 +20,15 @@ | |
|
|
||
| CartesiaModels = Literal[ | ||
| "cartesia", | ||
| "cartesia/sonic", | ||
| "cartesia/sonic-3", | ||
| "cartesia/sonic-2", | ||
| "cartesia/sonic-turbo", | ||
| "cartesia/sonic", | ||
| ] | ||
| DeepgramModels = Literal[ | ||
| "deepgram", | ||
| "deepgram/aura", | ||
| "deepgram/aura-2", | ||
| ] | ||
| ElevenlabsModels = Literal[ | ||
| "elevenlabs", | ||
|
|
@@ -34,16 +40,18 @@ | |
| ] | ||
| RimeModels = Literal[ | ||
| "rime", | ||
| "rime/mist", | ||
| "rime/mistv2", | ||
| "rime/arcana", | ||
| "rime/mistv2", | ||
| ] | ||
davidzhao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| InworldModels = Literal[ | ||
| "inworld", | ||
| "inworld/inworld-tts-1.5-max", | ||
| "inworld/inworld-tts-1.5-mini", | ||
| "inworld/inworld-tts-1-max", | ||
| "inworld/inworld-tts-1", | ||
| ] | ||
|
|
||
| TTSModels = Union[CartesiaModels, ElevenlabsModels, RimeModels, InworldModels] | ||
| TTSModels = Union[CartesiaModels, DeepgramModels, ElevenlabsModels, RimeModels, InworldModels] | ||
|
|
||
|
|
||
| def _parse_model_string(model: str) -> tuple[str, str | None]: | ||
|
|
@@ -98,8 +106,13 @@ def _make_fallback(model: FallbackModelType) -> FallbackModel: | |
|
|
||
|
|
||
| class CartesiaOptions(TypedDict, total=False): | ||
| duration: float # max duration of audio in seconds | ||
| speed: Literal["slow", "normal", "fast"] # default: not specified | ||
| emotion: str | ||
| speed: Literal["slow", "normal", "fast"] | ||
| volume: float | ||
|
|
||
|
Comment on lines
108
to
+112
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's examine the file context to understand the CartesiaOptions usage
cat -n livekit-agents/livekit/agents/inference/tts.py | head -130 | tail -40Repository: livekit/agents Length of output: 172 🏁 Script executed: # Also check if there are any other references to CartesiaOptions in the codebase
rg "CartesiaOptions" --type py -B 2 -A 5Repository: livekit/agents Length of output: 4238 🌐 Web query:
💡 Result: Below are the main Cartesia TTS API request options/parameters (from the official docs). Common (all TTS endpoints)
|
||
|
|
||
| class DeepgramOptions(TypedDict, total=False): | ||
| pass | ||
|
|
||
|
|
||
| class ElevenlabsOptions(TypedDict, total=False): | ||
|
|
@@ -157,6 +170,25 @@ def __init__( | |
| ) -> None: | ||
| pass | ||
|
|
||
| @overload | ||
| def __init__( | ||
| self, | ||
| model: DeepgramModels, | ||
| *, | ||
| voice: NotGivenOr[str] = NOT_GIVEN, | ||
| language: NotGivenOr[str] = NOT_GIVEN, | ||
| encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN, | ||
| sample_rate: NotGivenOr[int] = NOT_GIVEN, | ||
| base_url: NotGivenOr[str] = NOT_GIVEN, | ||
| api_key: NotGivenOr[str] = NOT_GIVEN, | ||
| api_secret: NotGivenOr[str] = NOT_GIVEN, | ||
| http_session: aiohttp.ClientSession | None = None, | ||
| extra_kwargs: NotGivenOr[DeepgramOptions] = NOT_GIVEN, | ||
| fallback: NotGivenOr[list[FallbackModelType] | FallbackModelType] = NOT_GIVEN, | ||
| conn_options: NotGivenOr[APIConnectOptions] = NOT_GIVEN, | ||
| ) -> None: | ||
| pass | ||
|
|
||
| @overload | ||
| def __init__( | ||
| self, | ||
|
|
@@ -246,7 +278,12 @@ def __init__( | |
| api_secret: NotGivenOr[str] = NOT_GIVEN, | ||
| http_session: aiohttp.ClientSession | None = None, | ||
| extra_kwargs: NotGivenOr[ | ||
| dict[str, Any] | CartesiaOptions | ElevenlabsOptions | RimeOptions | InworldOptions | ||
| dict[str, Any] | ||
| | CartesiaOptions | ||
| | DeepgramOptions | ||
| | ElevenlabsOptions | ||
| | RimeOptions | ||
| | InworldOptions | ||
| ] = NOT_GIVEN, | ||
| fallback: NotGivenOr[list[FallbackModelType] | FallbackModelType] = NOT_GIVEN, | ||
| conn_options: NotGivenOr[APIConnectOptions] = NOT_GIVEN, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we clarify they are preview versions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think our docs page should be the authority here.