Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/famous-beds-call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-openai": patch
---

ignore apiKey if isAzure & entraToken
5 changes: 5 additions & 0 deletions .changeset/fix-synchronizer-race.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents": patch
---

fix: fix race condition in TextAudioSynchronizer causing "TextAudioSynchronizer is closed" errors in AgentPlayout
5 changes: 5 additions & 0 deletions .changeset/metal-keys-run.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-deepgram": patch
---

fix(deepgram): add keyterm for nova-3-general #337
6 changes: 6 additions & 0 deletions .changeset/rude-games-allow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@livekit/agents-plugin-neuphonic': minor
'@livekit/agents-plugin-resemble': minor
---

initial version
5 changes: 5 additions & 0 deletions .changeset/selfish-ducks-compare.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-openai": patch
---

feat(openai): add new TTS model and voices, include optional instruct…
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ jobs:
ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }}
DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }}
NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }}
RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }}
run: pnpm test plugins
- name: Test specific plugins
if: steps.filter.outputs.agents-or-tests == 'false' && steps.filter.outputs.plugins == 'true' && github.event_name == 'pull_request'
Expand All @@ -57,6 +59,8 @@ jobs:
ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }}
DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }}
NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }}
RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }}
run: |
plugins=$(git diff-tree --name-only --no-commit-id -r ${{ github.sha }} | grep '^plugins.*\.ts$' | cut -d/ -f2 | sort -u | tr '\n' ' ')
read -ra plugins <<< "$plugins"
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ The following plugins are available today:
| [@livekit/agents-plugin-openai](https://www.npmjs.com/package/@livekit/agents-plugin-openai) | STT, LLM, TTS, Realtime API |
| [@livekit/agents-plugin-deepgram](https://www.npmjs.com/package/@livekit/agents-plugin-deepgram) | STT |
| [@livekit/agents-plugin-elevenlabs](https://www.npmjs.com/package/@livekit/agents-plugin-elevenlabs) | TTS |
| [@livekit/agents-plugin-cartesia](https://www.npmjs.com/package/@livekit/agents-plugin-cartesia) | TTS |
| [@livekit/agents-plugin-resemble](https://www.npmjs.com/package/@livekit/agents-plugin-resemble) | TTS |
| [@livekit/agents-plugin-neuphonic](https://www.npmjs.com/package/@livekit/agents-plugin-neuphonic) | TTS |
| [@livekit/agents-plugin-silero](https://www.npmjs.com/package/@livekit/agents-plugin-silero) | VAD |
| [@livekit/agents-plugin-livekit](https://www.npmjs.com/package/@livekit/agents-plugin-livekit) | End-of-turn detection |

Expand Down
17 changes: 10 additions & 7 deletions agents/src/multimodal/agent_playout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ export class AgentPlayout extends EventEmitter {
}
handle.synchronizer.pushText(text);
}
handle.synchronizer.markTextSegmentEnd();
if (!cancelled) {
handle.synchronizer.markTextSegmentEnd();
}
resolveText();
} catch (error) {
rejectText(error);
Expand Down Expand Up @@ -231,23 +233,24 @@ export class AgentPlayout extends EventEmitter {
await gracefullyCancel(captureTask);
}

if (!readTextTask.isCancelled) {
await gracefullyCancel(readTextTask);
}

handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;

if (handle.interrupted || captureTask.error) {
await handle.synchronizer.close(true);
this.#audioSource.clearQueue(); // make sure to remove any queued frames
}

if (!readTextTask.isCancelled) {
await gracefullyCancel(readTextTask);
}

if (!firstFrame) {
this.emit('playout_stopped', handle.interrupted);
}

handle.doneFut.resolve();
await handle.synchronizer.close(false);

const isInterrupted = handle.interrupted || !!captureTask.error;
await handle.synchronizer.close(isInterrupted);
}

resolve();
Expand Down
5 changes: 4 additions & 1 deletion plugins/deepgram/src/stt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface STTOptions {
sampleRate: number;
numChannels: number;
keywords: [string, number][];
keyterm: string[];
profanityFilter: boolean;
dictation: boolean;
diarize: boolean;
Expand All @@ -42,6 +43,7 @@ const defaultSTTOptions: STTOptions = {
sampleRate: 16000,
numChannels: 1,
keywords: [],
keyterm: [],
profanityFilter: false,
dictation: false,
diarize: false,
Expand Down Expand Up @@ -138,6 +140,7 @@ export class SpeechStream extends stt.SpeechStream {
endpointing: this.#opts.endpointing || false,
filler_words: this.#opts.fillerWords,
keywords: this.#opts.keywords.map((x) => x.join(':')),
keyterm: this.#opts.keyterm,
profanity_filter: this.#opts.profanityFilter,
language: this.#opts.language,
};
Expand All @@ -146,7 +149,7 @@ export class SpeechStream extends stt.SpeechStream {
if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
streamURL.searchParams.append(k, encodeURIComponent(v));
} else {
v.forEach((x) => streamURL.searchParams.append('keywords', encodeURIComponent(x)));
v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));
}
}
});
Expand Down
16 changes: 16 additions & 0 deletions plugins/neuphonic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!--
SPDX-FileCopyrightText: 2024 LiveKit, Inc.

SPDX-License-Identifier: Apache-2.0
-->
# Neuphonic plugin for LiveKit Agents

The Agents Framework is designed for building realtime, programmable
participants that run on servers. Use it to create conversational, multi-modal
voice agents that can see, hear, and understand.

This package contains the Neuphonic plugin, which allows for voice synthesis.
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
information on how to use it.
See the [repository](https://github.com/livekit/agents-js) for more information
about the framework as a whole.
20 changes: 20 additions & 0 deletions plugins/neuphonic/api-extractor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
*/
{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",

/**
* Optionally specifies another JSON config file that this file extends from. This provides a way for
* standard settings to be shared across multiple projects.
*
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
* resolved using NodeJS require().
*
* SUPPORTED TOKENS: none
* DEFAULT VALUE: ""
*/
"extends": "../../api-extractor-shared.json",
"mainEntryPointFilePath": "./dist/index.d.ts"
}
49 changes: 49 additions & 0 deletions plugins/neuphonic/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"name": "@livekit/agents-plugin-neuphonic",
"version": "0.0.0",
"description": "Neuphonic plugin for LiveKit Node Agents",
"main": "dist/index.js",
"require": "dist/index.cjs",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"require": "./dist/index.cjs"
}
},
"author": "LiveKit",
"type": "module",
"repository": "git@github.com:livekit/agents-js.git",
"license": "Apache-2.0",
"files": [
"dist",
"src",
"README.md"
],
"scripts": {
"build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
"clean": "rm -rf dist",
"clean:build": "pnpm clean && pnpm build",
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
},
"devDependencies": {
"@livekit/agents": "workspace:^x",
"@livekit/agents-plugin-openai": "workspace:^x",
"@livekit/agents-plugins-test": "workspace:^x",
"@livekit/rtc-node": "^0.13.4",
"@microsoft/api-extractor": "^7.35.0",
"@types/ws": "^8.5.10",
"tsup": "^8.3.5",
"typescript": "^5.0.0"
},
"dependencies": {
"ws": "^8.16.0"
},
"peerDependencies": {
"@livekit/agents": "workspace:^x",
"@livekit/rtc-node": "^0.13.4"
}
}
5 changes: 5 additions & 0 deletions plugins/neuphonic/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0

export * from './tts.js';
9 changes: 9 additions & 0 deletions plugins/neuphonic/src/models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0

export type TTSEncodings = 'pcm_linear' | 'pcm_mulaw';

export type TTSModels = 'neu-fast' | 'neu-hq';

export type TTSLangCodes = 'en' | 'nl' | 'es' | 'de' | 'hi' | 'en-hi' | 'ar';
11 changes: 11 additions & 0 deletions plugins/neuphonic/src/tts.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { STT } from '@livekit/agents-plugin-openai';
import { tts } from '@livekit/agents-plugins-test';
import { describe } from 'vitest';
import { TTS } from './tts.js';

describe('Neuphonic', async () => {
await tts(new TTS(), new STT());
});
Loading
Loading