Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/green-colts-kiss.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-groq': minor
---

Add tree-shakeable Text-to-Speech (TTS) adapter for Groq API with English and Arabic voices, multiple output formats (default WAV), configurable speed and sample rate, new types, model metadata, and unit tests.
159 changes: 159 additions & 0 deletions packages/typescript/ai-groq/src/adapters/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { BaseTTSAdapter } from '@tanstack/ai/adapters'
import { createGroqClient, generateId, getGroqApiKeyFromEnv } from '../utils'
import { validateAudioInput } from '../audio/audio-provider-options'
import type { GroqTTSModel } from '../model-meta'
import type {
GroqTTSFormat,
GroqTTSProviderOptions,
GroqTTSVoice,
} from '../audio/tts-provider-options'
import type { TTSOptions, TTSResult } from '@tanstack/ai'
import type Groq_SDK from 'groq-sdk'
import type { GroqClientConfig } from '../utils'

/**
* Configuration for Groq TTS adapter
*/
export interface GroqTTSConfig extends GroqClientConfig {}

/**
* Groq Text-to-Speech Adapter
*
* Tree-shakeable adapter for Groq TTS functionality.
* Supports canopylabs/orpheus-v1-english and canopylabs/orpheus-arabic-saudi models.
*
* Features:
* - English voices: autumn(f), diana(f), hannah(f), austin(m), daniel(m), troy(m)
* - Arabic voices: fahad(m), sultan(m), lulwa(f), noura(f)
* - Output formats: flac, mp3, mulaw, ogg, wav (only wav currently supported)
* - Speed control
* - Configurable sample rate
* - Vocal direction support (English voices only)
*/
export class GroqTTSAdapter<TModel extends GroqTTSModel> extends BaseTTSAdapter<
TModel,
GroqTTSProviderOptions
> {
readonly name = 'groq' as const

private client: Groq_SDK

constructor(config: GroqTTSConfig, model: TModel) {
super(config, model)
this.client = createGroqClient(config)
}

async generateSpeech(
options: TTSOptions<GroqTTSProviderOptions>,
): Promise<TTSResult> {
const {
model,
text,
voice = 'autumn',
format = 'wav',
speed,
modelOptions,
} = options

validateAudioInput({ input: text, model })

const voiceFormat = format as GroqTTSFormat

const request: Groq_SDK.Audio.Speech.SpeechCreateParams = {
model,
input: text,
voice: voice as GroqTTSVoice,
response_format: voiceFormat,
speed,
...modelOptions,
}

const response = await this.client.audio.speech.create(request)

const arrayBuffer = await response.arrayBuffer()
const base64 = Buffer.from(arrayBuffer).toString('base64')

const contentType = this.getContentType(voiceFormat)

return {
id: generateId(this.name),
model,
audio: base64,
format: voiceFormat,
contentType,
}
}

private getContentType(format: string): string {
const contentTypes: Record<string, string> = {
flac: 'audio/flac',
mp3: 'audio/mpeg',
mulaw: 'audio/basic',
ogg: 'audio/ogg',
wav: 'audio/wav',
}
return contentTypes[format] || 'audio/wav'
}
}

/**
* Creates a Groq speech adapter with explicit API key.
* Type resolution happens here at the call site.
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param apiKey - Your Groq API key
* @param config - Optional additional configuration
* @returns Configured Groq speech adapter instance with resolved types
*
* @example
* ```typescript
* const adapter = createGroqSpeech('canopylabs/orpheus-v1-english', "gsk_...");
*
* const result = await generateSpeech({
* adapter,
* text: 'Hello, world!',
* voice: 'autumn'
* });
* ```
*/
export function createGroqSpeech<TModel extends GroqTTSModel>(
model: TModel,
apiKey: string,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
return new GroqTTSAdapter({ apiKey, ...config }, model)
}

/**
* Creates a Groq speech adapter with automatic API key detection from environment variables.
* Type resolution happens here at the call site.
*
* Looks for `GROQ_API_KEY` in:
* - `process.env` (Node.js)
* - `window.env` (Browser with injected env)
*
* @param model - The model name (e.g., 'canopylabs/orpheus-v1-english')
* @param config - Optional configuration (excluding apiKey which is auto-detected)
* @returns Configured Groq speech adapter instance with resolved types
* @throws Error if GROQ_API_KEY is not found in environment
*
* @example
* ```typescript
* // Automatically uses GROQ_API_KEY from environment
* const adapter = groqSpeech('canopylabs/orpheus-v1-english');
*
* const result = await generateSpeech({
* adapter,
* text: 'Welcome to TanStack AI!',
* voice: 'autumn',
* format: 'wav'
* });
* ```
*/
export function groqSpeech<TModel extends GroqTTSModel>(
model: TModel,
config?: Omit<GroqTTSConfig, 'apiKey'>,
): GroqTTSAdapter<TModel> {
const apiKey = getGroqApiKeyFromEnv()
return createGroqSpeech(model, apiKey, config)
}
25 changes: 25 additions & 0 deletions packages/typescript/ai-groq/src/audio/audio-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Common audio provider options for Groq audio endpoints.
*/
export interface AudioProviderOptions {
/**
* The text to generate audio for.
* Maximum length is 200 characters.
* Use [directions] for vocal control (English voices only).
*/
input: string
/**
* The audio model to use for generation.
*/
model: string
}

/**
* Validates that the audio input text does not exceed the maximum length.
* @throws Error if input text exceeds 200 characters
*/
export const validateAudioInput = (options: AudioProviderOptions) => {
if (options.input.length > 200) {
throw new Error('Input text exceeds maximum length of 200 characters.')
}
}
49 changes: 49 additions & 0 deletions packages/typescript/ai-groq/src/audio/tts-provider-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Groq TTS voice options for English models
*/
export type GroqTTSEnglishVoice =
| 'autumn'
| 'diana'
| 'hannah'
| 'austin'
| 'daniel'
| 'troy'

/**
* Groq TTS voice options for Arabic models
*/
export type GroqTTSArabicVoice = 'fahad' | 'sultan' | 'lulwa' | 'noura'

/**
* Union of all Groq TTS voice options
*/
export type GroqTTSVoice = GroqTTSEnglishVoice | GroqTTSArabicVoice

/**
* Groq TTS output format options.
* Only wav is currently supported.
*/
export type GroqTTSFormat = 'flac' | 'mp3' | 'mulaw' | 'ogg' | 'wav'

/**
* Groq TTS sample rate options
*/
export type GroqTTSSampleRate =
| 8000
| 16000
| 22050
| 24000
| 32000
| 44100
| 48000

/**
* Provider-specific options for Groq TTS.
* These options are passed via `modelOptions` when calling `generateSpeech`.
*/
export interface GroqTTSProviderOptions {
/**
* The sample rate of the generated audio in Hz.
*/
sample_rate?: GroqTTSSampleRate
}
22 changes: 20 additions & 2 deletions packages/typescript/ai-groq/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* @module @tanstack/ai-groq
*
* Groq provider adapter for TanStack AI.
* Provides tree-shakeable adapters for Groq's Chat Completions API.
* Provides tree-shakeable adapters for Groq's Chat Completions API and TTS API.
*/

// Text (Chat) adapter
Expand All @@ -14,15 +14,33 @@ export {
type GroqTextProviderOptions,
} from './adapters/text'

// TTS adapter - for text-to-speech
export {
GroqTTSAdapter,
createGroqSpeech,
groqSpeech,
type GroqTTSConfig,
} from './adapters/tts'
export type {
GroqTTSProviderOptions,
GroqTTSVoice,
GroqTTSEnglishVoice,
GroqTTSArabicVoice,
GroqTTSFormat,
GroqTTSSampleRate,
} from './audio/tts-provider-options'

// Types
export type {
GroqChatModelProviderOptionsByName,
GroqTTSModelProviderOptionsByName,
GroqModelInputModalitiesByName,
ResolveProviderOptions,
ResolveInputModalities,
GroqChatModels,
GroqTTSModel,
} from './model-meta'
export { GROQ_CHAT_MODELS } from './model-meta'
export { GROQ_CHAT_MODELS, GROQ_TTS_MODELS } from './model-meta'
export type {
GroqTextMetadata,
GroqImageMetadata,
Expand Down
65 changes: 61 additions & 4 deletions packages/typescript/ai-groq/src/model-meta.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { GroqTextProviderOptions } from './text/text-provider-options'
import type { GroqTTSProviderOptions } from './audio/tts-provider-options'

/**
* Internal metadata structure describing a Groq model's capabilities and pricing.
Expand Down Expand Up @@ -351,14 +352,23 @@ export type GroqChatModelProviderOptionsByName = {
[K in (typeof GROQ_CHAT_MODELS)[number]]: GroqTextProviderOptions
}

/**
* Type-only map from Groq TTS model name to its provider options type.
*/
export type GroqTTSModelProviderOptionsByName = {
[K in GroqTTSModel]: GroqTTSProviderOptions
}

/**
* Resolves the provider options type for a specific Groq model.
* Falls back to generic GroqTextProviderOptions for unknown models.
* Checks TTS models first, then chat models, then falls back to generic options.
*/
export type ResolveProviderOptions<TModel extends string> =
TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions
TModel extends GroqTTSModel
? GroqTTSProviderOptions
: TModel extends keyof GroqChatModelProviderOptionsByName
? GroqChatModelProviderOptionsByName[TModel]
: GroqTextProviderOptions

/**
* Resolve input modalities for a specific model.
Expand All @@ -368,3 +378,50 @@ export type ResolveInputModalities<TModel extends string> =
TModel extends keyof GroqModelInputModalitiesByName
? GroqModelInputModalitiesByName[TModel]
: readonly ['text']

// ============================================================================
// TTS Models
// ============================================================================

const ORPHEUS_V1_ENGLISH = {
name: 'canopylabs/orpheus-v1-english',
pricing: {
input: {
normal: 22,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

const ORPHEUS_ARABIC_SAUDI = {
name: 'canopylabs/orpheus-arabic-saudi',
pricing: {
input: {
normal: 40,
},
},
supports: {
input: ['text'],
output: ['audio'],
endpoints: ['tts'],
features: [],
},
} as const satisfies ModelMeta<GroqTTSProviderOptions>

/**
* All supported Groq TTS model identifiers.
*/
export const GROQ_TTS_MODELS = [
ORPHEUS_V1_ENGLISH.name,
ORPHEUS_ARABIC_SAUDI.name,
] as const

/**
* Union type of all supported Groq TTS model names.
*/
export type GroqTTSModel = (typeof GROQ_TTS_MODELS)[number]
6 changes: 2 additions & 4 deletions packages/typescript/ai-groq/src/utils/schema-converter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,7 @@ export function makeGroqStructuredOutputCompatible(
): Record<string, any> {
const result = { ...schema }

if (result.type === 'object') {
if (!result.properties) {
result.properties = {}
}
if (result.type === 'object' && result.properties) {
const properties = { ...result.properties }
const allPropertyNames = Object.keys(properties)

Expand Down Expand Up @@ -96,6 +93,7 @@ export function makeGroqStructuredOutputCompatible(
} else {
delete result.required
}
result.required = allPropertyNames
result.additionalProperties = false
}

Expand Down
Loading
Loading