livekit · davidzhao · Apr 8, 2025 · Mar 17, 2025 · Mar 18, 2025 · Mar 19, 2025
diff --git a/.changeset/famous-beds-call.md b/.changeset/famous-beds-call.md
@@ -0,0 +1,5 @@
+---
+"@livekit/agents-plugin-openai": patch
+---
+
+ignore apiKey if isAzure & entraToken
diff --git a/.changeset/fix-synchronizer-race.md b/.changeset/fix-synchronizer-race.md
@@ -0,0 +1,5 @@
+---
+"@livekit/agents": patch
+---
+
+fix: fix race condition in TextAudioSynchronizer causing "TextAudioSynchronizer is closed" errors in AgentPlayout
diff --git a/.changeset/metal-keys-run.md b/.changeset/metal-keys-run.md
@@ -0,0 +1,5 @@
+---
+"@livekit/agents-plugin-deepgram": patch
+---
+
+fix(deepgram): add keyterm for nova-3-general #337
diff --git a/.changeset/rude-games-allow.md b/.changeset/rude-games-allow.md
@@ -0,0 +1,6 @@
+---
+'@livekit/agents-plugin-neuphonic': minor
+'@livekit/agents-plugin-resemble': minor
+---
+
+initial version
diff --git a/.changeset/selfish-ducks-compare.md b/.changeset/selfish-ducks-compare.md
@@ -0,0 +1,5 @@
+---
+"@livekit/agents-plugin-openai": patch
+---
+
+feat(openai): add new TTS model and voices, include optional instruct…
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -49,6 +49,8 @@ jobs:
           ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }}
           DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
           CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }}
+          NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }}
+          RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }}
         run: pnpm test plugins
       - name: Test specific plugins
         if: steps.filter.outputs.agents-or-tests == 'false' && steps.filter.outputs.plugins == 'true' && github.event_name == 'pull_request'
@@ -57,6 +59,8 @@ jobs:
           ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }}
           DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
           CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }}
+          NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }}
+          RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }}
         run: |
           plugins=$(git diff-tree --name-only --no-commit-id -r ${{ github.sha }} | grep '^plugins.*\.ts$' | cut -d/ -f2 | sort -u | tr '\n' ' ')
           read -ra plugins <<< "$plugins"

diff --git a/README.md b/README.md
@@ -66,6 +66,9 @@ The following plugins are available today:
 | [@livekit/agents-plugin-openai](https://www.npmjs.com/package/@livekit/agents-plugin-openai)         | STT, LLM, TTS, Realtime API |
 | [@livekit/agents-plugin-deepgram](https://www.npmjs.com/package/@livekit/agents-plugin-deepgram)     | STT                         |
 | [@livekit/agents-plugin-elevenlabs](https://www.npmjs.com/package/@livekit/agents-plugin-elevenlabs) | TTS                         |
+| [@livekit/agents-plugin-cartesia](https://www.npmjs.com/package/@livekit/agents-plugin-cartesia)     | TTS                         |
+| [@livekit/agents-plugin-resemble](https://www.npmjs.com/package/@livekit/agents-plugin-resemble)     | TTS                         |
+| [@livekit/agents-plugin-neuphonic](https://www.npmjs.com/package/@livekit/agents-plugin-neuphonic)   | TTS                         |
 | [@livekit/agents-plugin-silero](https://www.npmjs.com/package/@livekit/agents-plugin-silero)         | VAD                         |
 | [@livekit/agents-plugin-livekit](https://www.npmjs.com/package/@livekit/agents-plugin-livekit)       | End-of-turn detection       |
 

diff --git a/agents/src/multimodal/agent_playout.ts b/agents/src/multimodal/agent_playout.ts
@@ -161,7 +161,9 @@ export class AgentPlayout extends EventEmitter {
                     }
                     handle.synchronizer.pushText(text);
                   }
-                  handle.synchronizer.markTextSegmentEnd();
+                  if (!cancelled) {
+                    handle.synchronizer.markTextSegmentEnd();
+                  }
                   resolveText();
                 } catch (error) {
                   rejectText(error);
@@ -231,23 +233,24 @@ export class AgentPlayout extends EventEmitter {
               await gracefullyCancel(captureTask);
             }
 
+            if (!readTextTask.isCancelled) {
+              await gracefullyCancel(readTextTask);
+            }
+
             handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
 
             if (handle.interrupted || captureTask.error) {
-              await handle.synchronizer.close(true);
               this.#audioSource.clearQueue(); // make sure to remove any queued frames
             }
 
-            if (!readTextTask.isCancelled) {
-              await gracefullyCancel(readTextTask);
-            }
-
             if (!firstFrame) {
               this.emit('playout_stopped', handle.interrupted);
             }
 
             handle.doneFut.resolve();
-            await handle.synchronizer.close(false);
+
+            const isInterrupted = handle.interrupted || !!captureTask.error;
+            await handle.synchronizer.close(isInterrupted);
           }
 
           resolve();

diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts
@@ -22,6 +22,7 @@ export interface STTOptions {
   sampleRate: number;
   numChannels: number;
   keywords: [string, number][];
+  keyterm: string[];
   profanityFilter: boolean;
   dictation: boolean;
   diarize: boolean;
@@ -42,6 +43,7 @@ const defaultSTTOptions: STTOptions = {
   sampleRate: 16000,
   numChannels: 1,
   keywords: [],
+  keyterm: [],
   profanityFilter: false,
   dictation: false,
   diarize: false,
@@ -138,6 +140,7 @@ export class SpeechStream extends stt.SpeechStream {
         endpointing: this.#opts.endpointing || false,
         filler_words: this.#opts.fillerWords,
         keywords: this.#opts.keywords.map((x) => x.join(':')),
+        keyterm: this.#opts.keyterm,
         profanity_filter: this.#opts.profanityFilter,
         language: this.#opts.language,
       };
@@ -146,7 +149,7 @@ export class SpeechStream extends stt.SpeechStream {
           if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
             streamURL.searchParams.append(k, encodeURIComponent(v));
           } else {
-            v.forEach((x) => streamURL.searchParams.append('keywords', encodeURIComponent(x)));
+            v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));
           }
         }
       });

diff --git a/plugins/neuphonic/README.md b/plugins/neuphonic/README.md
@@ -0,0 +1,16 @@
+<!--
+SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+-->
+# Neuphonic plugin for LiveKit Agents
+
+The Agents Framework is designed for building realtime, programmable
+participants that run on servers. Use it to create conversational, multi-modal
+voice agents that can see, hear, and understand.
+
+This package contains the Neuphonic plugin, which allows for voice synthesis.
+Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
+information on how to use it.
+See the [repository](https://github.com/livekit/agents-js) for more information
+about the framework as a whole.
diff --git a/plugins/neuphonic/api-extractor.json b/plugins/neuphonic/api-extractor.json
@@ -0,0 +1,20 @@
+/**
+ * Config file for API Extractor.  For more info, please visit: https://api-extractor.com
+ */
+{
+  "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
+
+  /**
+   * Optionally specifies another JSON config file that this file extends from.  This provides a way for
+   * standard settings to be shared across multiple projects.
+   *
+   * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
+   * the "extends" field.  Otherwise, the first path segment is interpreted as an NPM package name, and will be
+   * resolved using NodeJS require().
+   *
+   * SUPPORTED TOKENS: none
+   * DEFAULT VALUE: ""
+   */
+  "extends": "../../api-extractor-shared.json",
+  "mainEntryPointFilePath": "./dist/index.d.ts"
+}
diff --git a/plugins/neuphonic/package.json b/plugins/neuphonic/package.json
@@ -0,0 +1,49 @@
+{
+  "name": "@livekit/agents-plugin-neuphonic",
+  "version": "0.0.0",
+  "description": "Neuphonic plugin for LiveKit Node Agents",
+  "main": "dist/index.js",
+  "require": "dist/index.cjs",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js",
+      "require": "./dist/index.cjs"
+    }
+  },
+  "author": "LiveKit",
+  "type": "module",
+  "repository": "git@github.com:livekit/agents-js.git",
+  "license": "Apache-2.0",
+  "files": [
+    "dist",
+    "src",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
+    "clean": "rm -rf dist",
+    "clean:build": "pnpm clean && pnpm build",
+    "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
+  },
+  "devDependencies": {
+    "@livekit/agents": "workspace:^x",
+    "@livekit/agents-plugin-openai": "workspace:^x",
+    "@livekit/agents-plugins-test": "workspace:^x",
+    "@livekit/rtc-node": "^0.13.4",
+    "@microsoft/api-extractor": "^7.35.0",
+    "@types/ws": "^8.5.10",
+    "tsup": "^8.3.5",
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "ws": "^8.16.0"
+  },
+  "peerDependencies": {
+    "@livekit/agents": "workspace:^x",
+    "@livekit/rtc-node": "^0.13.4"
+  }
+}
diff --git a/plugins/neuphonic/src/index.ts b/plugins/neuphonic/src/index.ts
@@ -0,0 +1,5 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+export * from './tts.js';
diff --git a/plugins/neuphonic/src/models.ts b/plugins/neuphonic/src/models.ts
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+export type TTSEncodings = 'pcm_linear' | 'pcm_mulaw';
+
+export type TTSModels = 'neu-fast' | 'neu-hq';
+
+export type TTSLangCodes = 'en' | 'nl' | 'es' | 'de' | 'hi' | 'en-hi' | 'ar';
diff --git a/plugins/neuphonic/src/tts.test.ts b/plugins/neuphonic/src/tts.test.ts
@@ -0,0 +1,11 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { STT } from '@livekit/agents-plugin-openai';
+import { tts } from '@livekit/agents-plugins-test';
+import { describe } from 'vitest';
+import { TTS } from './tts.js';
+
+describe('Neuphonic', async () => {
+  await tts(new TTS(), new STT());
+});