Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ The current WebUI ships with 23 WebUI pages and 42 authenticated WebUI API route
| **V2 platform features** | Semantic vector memory, associative memory graph, memory prioritization, prediction engine, predictive cache, anomaly detection, agent registry, task delegation, pipeline execution, self-correction loop, temporal context, zero-trust execution, audit trail, integrations, event bus/webhooks, dynamic dashboards, AI widget generator, feedback learning, adaptive prompting, and multi-agent network. |
| **WebUI** | Dashboard widgets, tools controls, plugins marketplace, Soul editor, Memory, Workspace, Tasks, Workflows, Pipelines, Events, MCP, Integrations, Network, Hooks, Sessions, Analytics, Feedback, Security, Self-Improve, Autonomous Mode, Configuration, setup/login flow, and bilingual user guide. |
| **Network and agents** | Managed agent runtimes with personal/bot auth, signed inter-agent messages, trust levels, replay protection, allowlist/recipient enforcement, manual inbox fallback, remote setup docs, and local-agent visibility in Network status totals. |
| **Telegram and providers** | MTProto proxy recovery, Bot API proxy support, startup resilience when Telegram is unavailable, ffmpeg-free Telegram voice notes, Groq STT/TTS fixes, NVIDIA NIM provider support, OpenRouter free models, and 16-provider model catalog. |
| **Telegram and providers** | MTProto proxy recovery, Bot API proxy support, startup resilience when Telegram is unavailable, ffmpeg-free Telegram voice notes (pure-JS OGG/Opus encoder; no system ffmpeg required), Groq STT/TTS support including streaming WAV header fix (`0xFFFFFFFF` placeholder), NVIDIA NIM provider support, OpenRouter free models, and 16-provider model catalog. |
| **Security and reliability** | Wallet encryption, plugin isolation, exec allowlist mode, shell-free allowlist execution, TON-proxy checksum verification, SSRF guards, path/symlink hardening, auth-token hashing, CSRF fixes, restart locks, transcript caps, session TTL enforcement, CI restoration, and audit reports. |

---
Expand Down
58 changes: 58 additions & 0 deletions src/utils/__tests__/audio.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { describe, it, expect } from "vitest";
import { wavToOggOpus } from "../audio.js";

/** Build a minimal valid 16-bit PCM WAV buffer. */
function buildWavBuffer({
dataSize = 4,
chunkSize,
pcmData,
}: {
dataSize?: number;
chunkSize?: number;
pcmData?: Buffer;
} = {}): Buffer {
const payload = pcmData ?? Buffer.alloc(dataSize);
const actualDataSize = payload.length;
const writtenChunkSize = chunkSize ?? actualDataSize;
const fmtChunkSize = 16;
const riffSize = 4 + (8 + fmtChunkSize) + (8 + actualDataSize);
const buf = Buffer.alloc(8 + riffSize);
buf.write("RIFF", 0, "ascii");
buf.writeUInt32LE(riffSize, 4);
buf.write("WAVE", 8, "ascii");
buf.write("fmt ", 12, "ascii");
buf.writeUInt32LE(fmtChunkSize, 16);
buf.writeUInt16LE(1, 20); // PCM
buf.writeUInt16LE(1, 22); // mono
buf.writeUInt32LE(48000, 24); // sample rate
buf.writeUInt32LE(96000, 28); // byte rate
buf.writeUInt16LE(2, 32); // block align
buf.writeUInt16LE(16, 34); // bits per sample
buf.write("data", 36, "ascii");
buf.writeUInt32LE(writtenChunkSize, 40);
payload.copy(buf, 44);
return buf;
}

describe("wavToOggOpus", () => {
it("converts a valid WAV buffer to OGG/Opus", () => {
const wav = buildWavBuffer({ pcmData: Buffer.alloc(960 * 2) }); // 960 samples, 16-bit
const ogg = wavToOggOpus(wav);
expect(ogg.slice(0, 4).toString("ascii")).toBe("OggS");
});

it("handles streaming WAV where data chunk size is 0xFFFFFFFF (Groq TTS format)", () => {
// Groq TTS generates WAV with streaming placeholder size 0xFFFFFFFF
// because the total audio length is not known at the time the header is written.
const pcmData = Buffer.alloc(960 * 2); // 960 mono 16-bit samples = one Opus frame
const wav = buildWavBuffer({ pcmData, chunkSize: 0xffffffff });
// Must NOT throw "WAV parse error: chunk 'data' size 4294967295 exceeds buffer"
const ogg = wavToOggOpus(wav);
expect(ogg.slice(0, 4).toString("ascii")).toBe("OggS");
});

it("rejects a buffer that is not a WAV file", () => {
const buf = Buffer.from("not a wav file");
expect(() => wavToOggOpus(buf)).toThrow("WAV parse error: missing RIFF/WAVE header");
});
});
6 changes: 5 additions & 1 deletion src/utils/audio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,12 @@ function parseWav(buf: Buffer): ParsedWav {

while (offset + 8 <= buf.length) {
const id = buf.toString("ascii", offset, offset + 4);
const size = buf.readUInt32LE(offset + 4);
const start = offset + 8;
// 0xFFFFFFFF is a streaming/unknown-size placeholder used by some TTS providers
// (e.g. Groq) that write the WAV header before the total audio length is known.
// Treat it as "rest of buffer" rather than rejecting the file.
const rawSize = buf.readUInt32LE(offset + 4);
const size = rawSize === 0xffffffff ? buf.length - start : rawSize;
if (start + size > buf.length) {
throw new Error(`WAV parse error: chunk '${id}' size ${size} exceeds buffer`);
}
Expand Down
Loading