Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/__tests__/azure-mstts-namespace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,26 @@ describe("Azure MSTTS Namespace Handling", () => {
const xmlnsMatches = result.match(/xmlns="http:\/\/www\.w3\.org\/2001\/10\/synthesis"/g);
expect(xmlnsMatches?.length).toBe(1);
});

it("should nest <prosody> inside <voice>, not as a direct child of <speak>", async () => {
// Regression test for: https://github.com/willwade/js-tts-wrapper/issues/38
// When rate/pitch/volume are passed as options, <prosody> was placed outside
// <voice>, which Azure rejects with:
// "Node [speak] with type [RootSpeak] should not contain node [prosody] with type [Others]"
const plainSSML = `<speak>Hello world</speak>`;
const options = { rate: "fast", pitch: "high", volume: 80 };

const result = (client as any).ensureAzureSSMLStructure(plainSSML, "en-US-JennyNeural", options);

// <prosody> must appear after <voice>, not before it
const voiceIndex = result.indexOf("<voice");
const prosodyIndex = result.indexOf("<prosody");
expect(voiceIndex).toBeGreaterThan(-1);
expect(prosodyIndex).toBeGreaterThan(voiceIndex);

// The structure must be <speak><voice><prosody>...</prosody></voice></speak>
expect(result).toMatch(/<voice[^>]*>\s*<prosody[^>]*>/);
expect(result).toMatch(/<\/prosody>\s*<\/voice>/);
});
});
});
22 changes: 16 additions & 6 deletions src/engines/azure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -646,12 +646,22 @@ export class AzureTTSClient extends AbstractTTSClient {
if (options.volume !== undefined) attrs.push(`volume="${options.volume}%"`);

if (attrs.length > 0) {
// Extract content
const match = ssml.match(/<speak[^>]*>(.*?)<\/speak>/s);
if (match) {
const content = match[1];
const prosodyContent = `<prosody ${attrs.join(" ")}>${content}</prosody>`;
ssml = ssml.replace(content, prosodyContent);
// Extract content from inside <voice> if present, otherwise from <speak>.
// Prosody must be nested inside <voice>, not as a direct child of <speak>.
if (ssml.includes("<voice")) {
const match = ssml.match(/<voice[^>]*>(.*?)<\/voice>/s);
if (match) {
const content = match[1];
const prosodyContent = `<prosody ${attrs.join(" ")}>${content}</prosody>`;
ssml = ssml.replace(content, prosodyContent);
}
} else {
const match = ssml.match(/<speak[^>]*>(.*?)<\/speak>/s);
if (match) {
const content = match[1];
const prosodyContent = `<prosody ${attrs.join(" ")}>${content}</prosody>`;
ssml = ssml.replace(content, prosodyContent);
}
}
}
}
Expand Down
Loading