livekit-examples · bcherry · Oct 29, 2025 · Oct 25, 2025 · Oct 29, 2025
diff --git a/src/agent.ts b/src/agent.ts
@@ -4,6 +4,7 @@ import {
   WorkerOptions,
   cli,
   defineAgent,
+  inference,
   metrics,
   voice,
 } from '@livekit/agents';
@@ -56,15 +57,23 @@ export default defineAgent({
     const session = new voice.AgentSession({
       // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
       // See all available models at https://docs.livekit.io/agents/models/stt/
-      stt: 'assemblyai/universal-streaming:en',
+      stt: new inference.STT({
+        model: 'assemblyai/universal-streaming',
+        language: 'en',
+      }),
 
       // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
       // See all providers at https://docs.livekit.io/agents/models/llm/
-      llm: 'openai/gpt-4.1-mini',
+      llm: new inference.LLM({
+        model: 'openai/gpt-4.1-mini',
+      }),
 
       // Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
       // See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
-      tts: 'cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc',
+      tts: new inference.TTS({
+        model: 'cartesia/sonic-3',
+        voice: '9626c31c-bec5-4cca-baa8-f8ba9e84c8bc',
+      }),
 
       // VAD and turn detection are used to determine when the user is speaking and when the agent should respond
       // See more at https://docs.livekit.io/agents/build/turns