Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions __tests__/elevenlabs-language-mapping.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/**
* Tests for ElevenLabs language mapping fix (issue #51)
*
* Problem:
* 1. labels.accent ("american", "british") is not a BCP-47 code
* 2. Multilingual voices only got one language code instead of all supported languages
*
* Fix:
* - Fetch /v1/models to get language lists per model
* - Map voice.high_quality_base_model_ids → union of languages
* - Use language_id ("en", "es") as bcp47, language name as display
*/

import { ElevenLabsTTSClient } from "../src/engines/elevenlabs";

const MOCK_VOICES = [
{
voice_id: "v1",
name: "Rachel",
labels: { gender: "female", accent: "american" },
high_quality_base_model_ids: ["eleven_multilingual_v2", "eleven_flash_v2_5"],
},
{
voice_id: "v2",
name: "Bella",
labels: { gender: "female", accent: "british" },
high_quality_base_model_ids: ["eleven_multilingual_v2"],
},
{
voice_id: "v3",
name: "OldVoice",
labels: {},
high_quality_base_model_ids: [], // no models
},
];

const MOCK_MODELS = [
{
model_id: "eleven_multilingual_v2",
can_do_text_to_speech: true,
languages: [
{ language_id: "en", name: "English" },
{ language_id: "es", name: "Spanish" },
{ language_id: "fr", name: "French" },
{ language_id: "de", name: "German" },
],
},
{
model_id: "eleven_flash_v2_5",
can_do_text_to_speech: true,
languages: [
{ language_id: "en", name: "English" },
{ language_id: "es", name: "Spanish" },
{ language_id: "ja", name: "Japanese" },
],
},
{
model_id: "eleven_tts_v1",
can_do_text_to_speech: false, // not a TTS model — should be ignored
languages: [{ language_id: "en", name: "English" }],
},
];

function mockFetch(voicesPayload: object, modelsPayload: object[]) {
return jest.fn().mockImplementation((url: string) => {
if (url.includes("/models")) {
return Promise.resolve({
ok: true,
json: () => Promise.resolve(modelsPayload),
});
}
return Promise.resolve({
ok: true,
json: () => Promise.resolve(voicesPayload),
});
});
}

describe("ElevenLabs _mapVoicesToUnified — language mapping from models", () => {
let client: any;

beforeEach(() => {
client = new ElevenLabsTTSClient({ apiKey: "fake" });
});

it("maps a multilingual voice to all languages from its models (deduped)", async () => {
// Rachel supports eleven_multilingual_v2 (en, es, fr, de) + eleven_flash_v2_5 (en, es, ja)
// → union = en, es, fr, de, ja (en and es deduped)
const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS);
const voices = await client._mapVoicesToUnified(rawVoices);
const rachel = voices.find((v: any) => v.id === "v1");

const bcp47s = rachel.languageCodes.map((lc: any) => lc.bcp47);
expect(bcp47s).toContain("en");
expect(bcp47s).toContain("es");
expect(bcp47s).toContain("fr");
expect(bcp47s).toContain("de");
expect(bcp47s).toContain("ja");
expect(new Set(bcp47s).size).toBe(bcp47s.length); // no duplicates
});

it("uses human-readable language name as display", async () => {
const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS);
const voices = await client._mapVoicesToUnified(rawVoices);
const rachel = voices.find((v: any) => v.id === "v1");
const en = rachel.languageCodes.find((lc: any) => lc.bcp47 === "en");

expect(en.display).toBe("English");
});

it("falls back to English when voice has no model ids", async () => {
const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS);
const voices = await client._mapVoicesToUnified(rawVoices);
const old = voices.find((v: any) => v.id === "v3");

expect(old.languageCodes).toHaveLength(1);
expect(old.languageCodes[0].bcp47).toBe("en");
});

it("ignores models where can_do_text_to_speech is false", async () => {
const rawVoices = await client._getVoicesWithModels(MOCK_VOICES, MOCK_MODELS);
const voices = await client._mapVoicesToUnified(rawVoices);
// No voice uses eleven_tts_v1 — but confirm it wasn't added to language map
const rachel = voices.find((v: any) => v.id === "v1");
const bcp47s = rachel.languageCodes.map((lc: any) => lc.bcp47);
// eleven_tts_v1 only had "en" — already present, so count shouldn't change due to it
expect(bcp47s).toContain("en");
});
});
73 changes: 50 additions & 23 deletions src/engines/elevenlabs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -456,28 +456,54 @@ export class ElevenLabsTTSClient extends AbstractTTSClient {
}

/**
* Get available voices from the provider
* @returns Promise resolving to an array of voice objects
*/
* Merge raw voices with resolved language data from the models endpoint.
* Extracted as a separate method so tests can inject mock data directly.
*/
protected _getVoicesWithModels(rawVoices: any[], models: any[]): any[] {
// Build model_id → languages map (TTS-capable models only)
const modelLanguageMap = new Map<string, { language_id: string; name: string }[]>();
for (const model of models) {
if (model.can_do_text_to_speech && Array.isArray(model.languages)) {
modelLanguageMap.set(model.model_id, model.languages);
}
}

return rawVoices.map((voice) => {
const modelIds: string[] = voice.high_quality_base_model_ids ?? [];
const seen = new Set<string>();
const resolvedLanguages: { language_id: string; name: string }[] = [];
for (const modelId of modelIds) {
for (const lang of modelLanguageMap.get(modelId) ?? []) {
if (!seen.has(lang.language_id)) {
seen.add(lang.language_id);
resolvedLanguages.push(lang);
}
}
}
return { ...voice, _resolvedLanguages: resolvedLanguages };
});
}

protected async _getVoices(): Promise<any[]> {
try {
const response = await fetch(`${this.baseUrl}/voices`, {
method: "GET",
headers: {
"xi-api-key": this.apiKey,
},
});

if (!response.ok) {
const errorText = await response.text();
const headers = { "xi-api-key": this.apiKey };
const [voicesResp, modelsResp] = await Promise.all([
fetch(`${this.baseUrl}/voices`, { method: "GET", headers }),
fetch(`${this.baseUrl}/models`, { method: "GET", headers }),
]);

if (!voicesResp.ok) {
const errorText = await voicesResp.text();
console.error(
`ElevenLabs API error: ${response.status} ${response.statusText}\nResponse: ${errorText}`
`ElevenLabs API error: ${voicesResp.status} ${voicesResp.statusText}\nResponse: ${errorText}`
);
throw new Error(`Failed to get voices: ${response.statusText}`);
throw new Error(`Failed to get voices: ${voicesResp.statusText}`);
}

const data = await response.json();
return data.voices;
const voiceData = await voicesResp.json();
const modelData = modelsResp.ok ? await modelsResp.json() : [];

return this._getVoicesWithModels(voiceData.voices, modelData);
} catch (error) {
console.error("Error getting ElevenLabs voices:", error);
return [];
Expand Down Expand Up @@ -877,13 +903,14 @@ export class ElevenLabsTTSClient extends AbstractTTSClient {
: voice.labels?.gender === "male"
? "Male"
: undefined,
languageCodes: [
{
bcp47: voice.labels?.accent || "en-US",
iso639_3: (voice.labels?.accent || "en-US").split("-")[0] || "eng",
display: voice.labels?.accent || "English",
},
],
languageCodes:
Array.isArray(voice._resolvedLanguages) && voice._resolvedLanguages.length > 0
? voice._resolvedLanguages.map((lang: { language_id: string; name: string }) => ({
bcp47: lang.language_id,
iso639_3: lang.language_id,
display: lang.name,
}))
: [{ bcp47: "en", iso639_3: "en", display: "English" }],
provider: "elevenlabs",
}));
}
Expand Down
Loading