Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Mapping of ISO 639-3 language codes used by Scribe to ISO 639-1
used by Livekit turn detector multilingual model
Note: ISO 639-3 language codes supported by Scribe but that have no ISO 639-1 code
are defined with a None mapping value
https://elevenlabs.io/docs/overview/capabilities/speech-to-text#supported-languages
"""

ISO_639_3_TO_1 = {
"afr": "af",
"amh": "am",
"ara": "ar",
"hye": "hy",
"asm": "as",
"ast": None,
"aze": "az",
"bel": "be",
"ben": "bn",
"bos": "bs",
"bul": "bg",
"mya": "my",
"yue": None,
"cat": "ca",
"ceb": None,
"nya": "ny",
"hrv": "hr",
"ces": "cs",
"dan": "da",
"nld": "nl",
"eng": "en",
"est": "et",
"fil": None,
"fin": "fi",
"fra": "fr",
"ful": "ff",
"glg": "gl",
"lug": "lg",
"kat": "ka",
"deu": "de",
"ell": "el",
"guj": "gu",
"hau": "ha",
"heb": "he",
"hin": "hi",
"hun": "hu",
"isl": "is",
"ibo": "ig",
"ind": "id",
"gle": "ga",
"ita": "it",
"jpn": "ja",
"jav": "jv",
"kea": None,
"kan": "kn",
"kaz": "kk",
"khm": "km",
"kor": "ko",
"kur": "ku",
"kir": "ky",
"lao": "lo",
"lav": "lv",
"lin": "ln",
"lit": "lt",
"luo": None,
"ltz": "lb",
"mkd": "mk",
"msa": "ms",
"mal": "ml",
"mlt": "mt",
"zho": "zh",
"mri": "mi",
"mar": "mr",
"mon": "mn",
"nep": "ne",
"nso": None,
"nor": "no",
"oci": "oc",
"ori": "or",
"pus": "ps",
"fas": "fa",
"pol": "pl",
"por": "pt",
"pan": "pa",
"ron": "ro",
"rus": "ru",
"srp": "sr",
"sna": "sn",
"snd": "sd",
"slk": "sk",
"slv": "sl",
"som": "so",
"spa": "es",
"swa": "sw",
"swe": "sv",
"tam": "ta",
"tgk": "tg",
"tel": "te",
"tha": "th",
"tur": "tr",
"ukr": "uk",
"umb": None,
"urd": "ur",
"uzb": "uz",
"vie": "vi",
"cym": "cy",
"wol": "wo",
"xho": "xh",
"zul": "zu",
}


def iso639_3_to_1(code: str | None) -> str | None:
"""Convert ISO 639-3 to ISO 639-1. Returns None if no mapping exists."""
return ISO_639_3_TO_1.get(code.lower()) if code else None
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from livekit.agents.utils import AudioBuffer, http_context, is_given
from livekit.agents.voice.io import TimedString

from .languages import iso639_3_to_1
from .log import logger
from .models import STTRealtimeSampleRates

Expand Down Expand Up @@ -220,8 +221,9 @@ async def _recognize_impl(
except Exception as e:
raise APIConnectionError() from e

normalized_language = iso639_3_to_1(language_code) or language_code
return self._transcription_to_speech_event(
language_code=language_code,
language_code=normalized_language,
Comment on lines +224 to +226
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

find . -type f -name "stt.py" | grep elevenlabs

Repository: livekit/agents

Length of output: 136


🏁 Script executed:

head -1 livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py | wc -l
wc -l livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 140


🏁 Script executed:

sed -n '210,240p' livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 1055


🏁 Script executed:

sed -n '180,240p' livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 2548


🏁 Script executed:

grep -n "class SpeechData" livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py
rg -A 10 "class SpeechData"

Repository: livekit/agents

Length of output: 772


🏁 Script executed:

grep -n "def.*batch\|async def" livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py | head -20

Repository: livekit/agents

Length of output: 441


🏁 Script executed:

sed -n '170,250p' livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 3237


🏁 Script executed:

sed -n '326,450p' livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 5154


🏁 Script executed:

sed -n '470,550p' livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 3746


🏁 Script executed:

grep -n "_process_stream_event" livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py

Repository: livekit/agents

Length of output: 177


Guard against missing language_code in batch path.

If ElevenLabs omits language_code, None will propagate into SpeechData.language (annotated as str) and violate the type contract, potentially breaking downstream consumers. The streaming path has protection with a fallback chain (self._language or "en"), but the batch path lacks this. Consider aligning the batch path with the streaming pattern.

-        normalized_language = iso639_3_to_1(language_code) or language_code
+        normalized_language = (
+            iso639_3_to_1(language_code)
+            or language_code
+            or self._opts.language_code
+            or "en"
+        )
🤖 Prompt for AI Agents
In `@livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/stt.py`
around lines 224 - 226, The batch path can propagate a None language_code into
SpeechData.language; update the logic that computes normalized_language
(currently using iso639_3_to_1(language_code) or language_code) to fall back to
self._language or "en" when language_code is missing, e.g. derive a
safe_language = language_code or self._language or "en" then call
iso639_3_to_1(safe_language) or safe_language before passing to
_transcription_to_speech_event so SpeechData.language always receives a str.

text=extracted_text,
start_time=start_time,
end_time=end_time,
Expand Down Expand Up @@ -483,9 +485,10 @@ def _process_stream_event(self, data: dict) -> None:
start_time = words[0].get("start", 0) if words else 0
end_time = words[-1].get("end", 0) if words else 0

normalized_language = iso639_3_to_1(self._language) or self._language or "en"
# 11labs only sends word timestamps for final transcripts
speech_data = stt.SpeechData(
language=self._language or "en",
language=normalized_language,
text=text,
start_time=start_time + self.start_time_offset,
end_time=end_time + self.start_time_offset,
Expand Down