msgbyte · tianheil3 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/README.md b/README.md
@@ -72,6 +72,46 @@ UPSTASH_REDIS_REST_TOKEN="cutia_redis_token"
 NODE_ENV="development"
 ```
 
+Optional TTS env values:
+
+```bash
+EXTERNAL_TTS_API_BASE_URL="https://your-tts-provider.example.com/v1"
+EXTERNAL_TTS_API_MODEL="gpt-4o-mini-tts"
+EXTERNAL_TTS_API_KEY="your_tts_api_key"
+```
+
+Cutia prefers `EXTERNAL_TTS_API_*` for external speech synthesis. The legacy
+`API_BASE_URL` / `API_MODEL` / `API_KEY` names are still accepted as
+compatibility aliases when the namespaced variables are absent.
+
+Use a provider-supported TTS model for `EXTERNAL_TTS_API_MODEL` (for example
+`gpt-4o-mini-tts` or another audio-output model that your provider actually
+supports). The shared `API_MODEL` alias is only a migration fallback and may
+already point at a non-TTS chat model in your environment.
+
+Before treating a failed probe as a code regression, confirm the provider
+itself is TTS-capable for the current credentials:
+
+- `/models` should list the configured TTS model or another audio-capable model
+- either `/audio/speech` must return audio directly, or `/responses` must accept
+  audio output requests for the configured model
+- if `/audio/speech` returns `404` and `/models` contains only chat/text models,
+  the provider is not exposing a usable TTS surface for this environment
+- legacy fallback is best-effort only; if the legacy upstream is unavailable,
+  route probes will still return `502`
+
+To verify that the configured provider can actually return audio, run:
+
+```bash
+bun --eval 'import { getExternalTtsConfig, synthesizeSpeechWithOpenAiCompatible } from "./src/lib/tts/openai-compatible.ts"; const config = getExternalTtsConfig({ env: process.env }); const audio = await synthesizeSpeechWithOpenAiCompatible({ config, text: "Cutia TTS probe", voice: "default" }); console.log(audio.byteLength);'
+```
+
+If you want to verify the route end-to-end from the app directory, run:
+
+```bash
+NODE_ENV=development NEXT_PUBLIC_SITE_URL=http://localhost:3000 UPSTASH_REDIS_REST_URL=http://localhost:8079 UPSTASH_REDIS_REST_TOKEN=cutia_redis_token bun --eval 'import { NextRequest } from "next/server"; import { POST } from "./src/app/api/tts/generate/route.ts"; const request = new NextRequest("http://localhost/api/tts/generate", { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ text: "Cutia route probe", voice: "default" }) }); const response = await POST(request); console.log(response.status); console.log(await response.text());'
+```
+
 To enable authentication, also start PostgreSQL and add these env values:
 
 ```bash

diff --git a/apps/web/.env.example b/apps/web/.env.example
@@ -16,9 +16,23 @@ UPSTASH_REDIS_REST_TOKEN=example_token_here
 FREESOUND_CLIENT_ID=your_client_id_here
 FREESOUND_API_KEY=your_api_key_here
 
+# Optional: external OpenAI-compatible TTS provider
+# Preferred namespaced variables:
+# EXTERNAL_TTS_API_BASE_URL=https://your-tts-provider.example.com/v1
+# EXTERNAL_TTS_API_MODEL=gpt-4o-mini-tts
+# EXTERNAL_TTS_API_KEY=your_tts_api_key
+# Use a provider-supported audio/TTS model here. Shared API_MODEL values are
+# often general chat models and may not work for speech generation.
+# The provider should also expose that model from /models and support either
+# /audio/speech or /responses audio output for the same credentials.
+# Compatibility aliases used when EXTERNAL_TTS_* is absent:
+# API_BASE_URL=https://your-shared-api.example.com/v1
+# API_MODEL=your_tts_model
+# API_KEY=your_tts_api_key
+
 # Cloudflare R2 (for reference image uploads)
 R2_ACCOUNT_ID=your_r2_account_id
 R2_ACCESS_KEY_ID=your_r2_access_key_id
 R2_SECRET_ACCESS_KEY=your_r2_secret_access_key
 R2_BUCKET_NAME=your_r2_bucket_name
-R2_PUBLIC_URL=https://your-r2-public-url.example.com
+R2_PUBLIC_URL=https://your-r2-public-url.example.com
diff --git a/apps/web/src/app/api/tts/generate/route.test.ts b/apps/web/src/app/api/tts/generate/route.test.ts
@@ -0,0 +1,113 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+import { TtsError } from "@/lib/tts/errors";
+import { NextRequest } from "next/server";
+
+let synthesizeImpl: typeof import("@/lib/tts/provider").synthesizeSpeechWithFallback;
+const originalConsoleError = console.error;
+
+mock.module("@cutia/env/web", () => ({
+	webEnv: {
+		API_BASE_URL: "https://example.com/v1",
+		API_MODEL: "tts-1",
+		API_KEY: "secret",
+	},
+}));
+
+mock.module("@/lib/tts/provider", () => ({
+	synthesizeSpeechWithFallback: (args: Parameters<typeof synthesizeImpl>[0]) =>
+		synthesizeImpl(args),
+}));
+
+const { POST } = await import("./route");
+
+function createRequest(body: unknown): NextRequest {
+	return new NextRequest("http://localhost/api/tts/generate", {
+		body: JSON.stringify(body),
+		headers: {
+			"content-type": "application/json",
+		},
+		method: "POST",
+	});
+}
+
+describe("POST /api/tts/generate", () => {
+	beforeEach(() => {
+		console.error = mock(() => {});
+		synthesizeImpl = async () => Uint8Array.from([1, 2, 3]).buffer;
+	});
+
+	afterEach(() => {
+		console.error = originalConsoleError;
+	});
+
+	test("returns base64 audio for successful synthesis", async () => {
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(200);
+		expect(await response.json()).toEqual({
+			audio: "AQID",
+		});
+	});
+
+	test("returns 502 for structured legacy upstream errors without relying on message prefixes", async () => {
+		synthesizeImpl = async () => {
+			throw new TtsError({
+				code: "LEGACY_TTS_UPSTREAM",
+				message: "legacy fallback audio download failed",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(502);
+		expect(await response.json()).toEqual({
+			error: "legacy fallback audio download failed",
+		});
+	});
+
+	test("returns 502 for structured external upstream errors without relying on message prefixes", async () => {
+		synthesizeImpl = async () => {
+			throw new TtsError({
+				code: "EXTERNAL_TTS_UPSTREAM",
+				message: "upstream gateway timeout",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(502);
+		expect(await response.json()).toEqual({
+			error: "upstream gateway timeout",
+		});
+	});
+
+	test("returns the original config error message for structured config failures", async () => {
+		synthesizeImpl = async () => {
+			throw new TtsError({
+				code: "EXTERNAL_TTS_CONFIG",
+				message: "external config missing",
+			});
+		};
+
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(500);
+		expect(await response.json()).toEqual({
+			error: "external config missing",
+		});
+	});
+
+	test("returns 500 for unexpected non-TtsError exceptions", async () => {
+		synthesizeImpl = async () => {
+			throw new Error("unexpected failure");
+		};
+
+		const response = await POST(createRequest({ text: "hello" }));
+
+		expect(response.status).toBe(500);
+		expect(await response.json()).toEqual({
+			error: "Internal server error",
+			detail: "unexpected failure",
+		});
+	});
+});
diff --git a/apps/web/src/app/api/tts/generate/route.ts b/apps/web/src/app/api/tts/generate/route.ts
@@ -1,18 +1,14 @@
+import { webEnv } from "@cutia/env/web";
 import { type NextRequest, NextResponse } from "next/server";
 import { z } from "zod";
-
-const TTS_API_BASE = "https://api.milorapart.top/apis/mbAIsc";
+import { isTtsError } from "@/lib/tts/errors";
+import { synthesizeSpeechWithFallback } from "@/lib/tts/provider";
 
 const requestSchema = z.object({
 	text: z.string().min(1, "Text is required").max(2000, "Text too long"),
 	voice: z.string().optional(),
 });
 
-const upstreamResponseSchema = z.object({
-	code: z.number(),
-	url: z.string().url(),
-});
-
 export async function POST(request: NextRequest) {
 	try {
 		const body = await request.json();
@@ -28,42 +24,33 @@ export async function POST(request: NextRequest) {
 			);
 		}
 
-		const { text } = validation.data;
-		const upstreamUrl = `${TTS_API_BASE}?${new URLSearchParams({ text, format: "mp3" })}`;
-		const upstreamResponse = await fetch(upstreamUrl);
-
-		if (!upstreamResponse.ok) {
-			return NextResponse.json(
-				{ error: `Upstream error: ${upstreamResponse.status}` },
-				{ status: 502 },
-			);
-		}
-
-		const upstreamData = await upstreamResponse.json();
-		const parsed = upstreamResponseSchema.safeParse(upstreamData);
-
-		if (!parsed.success || parsed.data.code !== 200) {
-			return NextResponse.json(
-				{ error: "TTS generation failed" },
-				{ status: 502 },
-			);
-		}
-
-		const audioResponse = await fetch(parsed.data.url);
-		if (!audioResponse.ok) {
-			return NextResponse.json(
-				{ error: `Failed to download audio: ${audioResponse.status}` },
-				{ status: 502 },
-			);
-		}
-
-		const audioArrayBuffer = await audioResponse.arrayBuffer();
+		const { text, voice } = validation.data;
+		const audioArrayBuffer = await synthesizeSpeechWithFallback({
+			env: webEnv,
+			text,
+			voice,
+		});
 		const base64 = Buffer.from(audioArrayBuffer).toString("base64");
 
 		return NextResponse.json({ audio: base64 });
 	} catch (error) {
 		const message = error instanceof Error ? error.message : "Unknown error";
 		console.error("TTS generate error:", error);
+
+		if (isTtsError(error)) {
+			switch (error.code) {
+				case "EXTERNAL_TTS_CONFIG":
+					return NextResponse.json({ error: message }, { status: 500 });
+				case "EXTERNAL_TTS_UPSTREAM":
+				case "LEGACY_TTS_UPSTREAM":
+					return NextResponse.json({ error: message }, { status: 502 });
+				default: {
+					const exhaustiveCode: never = error.code;
+					throw new Error(`Unhandled TTS error code: ${exhaustiveCode}`);
+				}
+			}
+		}
+
 		return NextResponse.json(
 			{ error: "Internal server error", detail: message },
 			{ status: 500 },

diff --git a/apps/web/src/constants/tts-constants.ts b/apps/web/src/constants/tts-constants.ts
@@ -3,8 +3,7 @@ export interface VoicePack {
 	name: string;
 }
 
-export const VOICE_PACKS: VoicePack[] = [
-	{ id: "default", name: "Default" },
-];
+export const VOICE_PACKS: VoicePack[] = [{ id: "default", name: "Default" }];
 
 export const DEFAULT_VOICE_PACK = "default";
+export const DEFAULT_EXTERNAL_TTS_VOICE = "alloy";
diff --git a/apps/web/src/lib/tts/errors.ts b/apps/web/src/lib/tts/errors.ts
@@ -0,0 +1,39 @@
+export const TTS_ERROR_CODES = [
+	"EXTERNAL_TTS_CONFIG",
+	"EXTERNAL_TTS_UPSTREAM",
+	"LEGACY_TTS_UPSTREAM",
+] as const;
+
+export type TtsErrorCode = (typeof TTS_ERROR_CODES)[number];
+
+export class TtsError extends Error {
+	code: TtsErrorCode;
+	retryable?: boolean;
+	status?: number;
+
+	constructor({
+		code,
+		message,
+		retryable,
+		status,
+	}: {
+		code: TtsErrorCode;
+		message: string;
+		retryable?: boolean;
+		status?: number;
+	}) {
+		super(message);
+		this.name = "TtsError";
+		this.code = code;
+		this.retryable = retryable;
+		this.status = status;
+	}
+}
+
+export function isTtsError(error: unknown): error is TtsError {
+	if (!(error instanceof Error)) {
+		return false;
+	}
+
+	return TTS_ERROR_CODES.includes((error as TtsError).code);
+}