Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
WorkerOptions,
cli,
defineAgent,
inference,
metrics,
voice,
} from '@livekit/agents';
Expand Down Expand Up @@ -56,15 +57,23 @@ export default defineAgent({
const session = new voice.AgentSession({
// Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
// See all available models at https://docs.livekit.io/agents/models/stt/
stt: 'assemblyai/universal-streaming:en',
stt: new inference.STT({
model: 'assemblyai/universal-streaming',
language: 'en',
}),

// A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
// See all providers at https://docs.livekit.io/agents/models/llm/
llm: 'openai/gpt-4.1-mini',
llm: new inference.LLM({
model: 'openai/gpt-4.1-mini',
}),

// Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
// See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
tts: 'cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc',
tts: new inference.TTS({
model: 'cartesia/sonic-3',
voice: '9626c31c-bec5-4cca-baa8-f8ba9e84c8bc',
}),

// VAD and turn detection are used to determine when the user is speaking and when the agent should respond
// See more at https://docs.livekit.io/agents/build/turns
Expand Down