DeepLcom · DeeJayTC · Mar 31, 2026 · Apr 8, 2026 · Copilot · Mar 31, 2026
diff --git a/DeepL/DeepL.csproj b/DeepL/DeepL.csproj
@@ -34,6 +34,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="5.0.1"/>
     <PackageReference Include="System.Text.Json" Version="5.0.2"/>
+    <PackageReference Include="System.Net.WebSockets.Client" Version="4.3.2" Condition="'$(TargetFramework)' == 'netstandard2.0'"/>
   </ItemGroup>
 
 

diff --git a/DeepL/DeepLClient.cs b/DeepL/DeepLClient.cs
@@ -6,6 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using System.Net.WebSockets;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 using System.Threading;
@@ -54,7 +55,7 @@ Task<WriteResult> RephraseTextAsync(
   ///   Client for the DeepL API. To use the DeepL API, initialize an instance of this class using your DeepL
   ///   Authentication Key. All functions are thread-safe, aside from <see cref="DeepLClient.Dispose" />.
   /// </summary>
-  public sealed class DeepLClient : Translator, IWriter, IGlossaryManager, IStyleRuleManager {
+  public sealed class DeepLClient : Translator, IWriter, IGlossaryManager, IStyleRuleManager, IVoiceManager {
     /// <summary>Initializes a new instance of the <see cref="AuthorizationException" /> class.</summary>
     /// <param name="message">The message that describes the error.</param>
     public DeepLClient(string authKey, DeepLClientOptions? options = null) : base(authKey, options) { }
@@ -939,6 +940,79 @@ private static (string Key, string Value)[] CreateLanguageQueryParams(
       DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
     };
 
+    /// <inheritdoc />
+    public async Task<IVoiceSession> CreateVoiceSessionAsync(
+          VoiceSessionOptions options,
+          CancellationToken cancellationToken = default) {
+      if (options == null) {
+        throw new ArgumentNullException(nameof(options));
+      }
+
+      if (options.TargetLanguages == null || options.TargetLanguages.Length == 0) {
+        throw new ArgumentException("At least one target language must be specified");
+      }
+
+      if (options.TargetLanguages.Length > 5) {
+        throw new ArgumentException("Maximum 5 target languages per session");
+      }
+
+      var requestData = new Dictionary<string, object> {
+        ["source_media_content_type"] = options.SourceMediaContentType,
+        ["target_languages"] = options.TargetLanguages
+      };
+
+      if (options.MessageFormat != null) {
+        requestData["message_format"] = options.MessageFormat.Value.ToApiValue();
+      }
+
+      if (options.SourceLanguage != null) {
+        requestData["source_language"] = options.SourceLanguage;
+      }
+
+      if (options.SourceLanguageMode != null) {
+        requestData["source_language_mode"] = options.SourceLanguageMode.Value.ToApiValue();
+      }
+
+      if (options.TargetMediaLanguages != null) {
+        requestData["target_media_languages"] = options.TargetMediaLanguages;
+      }
+
+      if (options.TargetMediaContentType != null) {
+        requestData["target_media_content_type"] = options.TargetMediaContentType;
+      }
+
+      if (options.TargetMediaVoice != null) {
+        requestData["target_media_voice"] = options.TargetMediaVoice.Value.ToApiValue();
+      }
+
+      if (options.GlossaryId != null) {
+        requestData["glossary_id"] = options.GlossaryId;
+      }
+
+      if (options.Formality != null) {
+        requestData["formality"] = options.Formality;
+      }
+
+      using var responseMessage = await _client
+            .ApiPostJsonAsync("v3/voice/realtime", cancellationToken, requestData, SerializationOptions)
+            .ConfigureAwait(false);
+
+      await DeepLHttpClient.CheckStatusCodeAsync(responseMessage).ConfigureAwait(false);
+      var sessionInfo = await JsonUtils.DeserializeAsync<VoiceSessionInfo>(responseMessage).ConfigureAwait(false);
+
+      // Establish WebSocket connection
+      var wsUri = new Uri($"{sessionInfo.StreamingUrl}?token={Uri.EscapeDataString(sessionInfo.Token)}");
+      var webSocket = new ClientWebSocket();
+      try {
+        await webSocket.ConnectAsync(wsUri, cancellationToken).ConfigureAwait(false);
+      } catch (Exception ex) {
+        webSocket.Dispose();
+        throw new DeepLException("Failed to establish Voice API WebSocket connection", ex);
+      }
+
+      return new VoiceSession(_client, webSocket, sessionInfo);
+    }
+
     /// <summary>Class used for JSON-deserialization of style rule list results.</summary>
     private readonly struct StyleRuleListResult {
       /// <summary>Initializes a new instance of <see cref="StyleRuleListResult" />, used for JSON deserialization.</summary>

diff --git a/DeepL/IVoiceManager.cs b/DeepL/IVoiceManager.cs
@@ -0,0 +1,28 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace DeepL {
+  /// <summary>Interface for creating Voice API streaming sessions.</summary>
+  public interface IVoiceManager : IDisposable {
+    /// <summary>
+    ///   Creates a new Voice API streaming session for real-time speech transcription and translation.
+    ///   This requests a session from the DeepL API and establishes a WebSocket connection.
+    /// </summary>
+    /// <param name="options">Options controlling session configuration including audio format, languages, etc.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <returns>An <see cref="IVoiceSession" /> for streaming audio and receiving transcripts.</returns>
+    /// <exception cref="ArgumentException">If any option is invalid.</exception>
+    /// <exception cref="DeepLException">
+    ///   If any error occurs while communicating with the DeepL API, a
+    ///   <see cref="DeepLException" /> or a derived class will be thrown.
+    /// </exception>
+    Task<IVoiceSession> CreateVoiceSessionAsync(
+          VoiceSessionOptions options,
+          CancellationToken cancellationToken = default);
+  }
+}
diff --git a/DeepL/IVoiceSession.cs b/DeepL/IVoiceSession.cs
@@ -0,0 +1,77 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using DeepL.Model;
+
+namespace DeepL {
+  /// <summary>
+  ///   Represents an active Voice API streaming session. Provides methods for sending audio data and receiving
+  ///   real-time transcriptions and translations via events.
+  /// </summary>
+  /// <remarks>
+  ///   Events fire on a background thread. Consumers are responsible for marshaling to the appropriate
+  ///   synchronization context if needed. Dispose the session to close the WebSocket connection.
+  /// </remarks>
+  public interface IVoiceSession : IDisposable {
+    /// <summary>Raised when a source transcript update is received from the server.</summary>
+    event EventHandler<TranscriptUpdate>? SourceTranscriptUpdated;
+
+    /// <summary>Raised when a target transcript update is received from the server.</summary>
+    event EventHandler<TranscriptUpdate>? TargetTranscriptUpdated;
+
+    /// <summary>
+    ///   Raised when a target media audio chunk is received from the server. This feature is in closed beta.
+    /// </summary>
+    event EventHandler<TargetMediaChunk>? TargetMediaChunkReceived;
+
+    /// <summary>Raised when an error message is received from the WebSocket connection.</summary>
+    event EventHandler<VoiceStreamError>? ErrorReceived;
+
+    /// <summary>Raised when the end-of-stream message is received, indicating all outputs are complete.</summary>
+    event EventHandler? StreamEnded;
+
+    /// <summary>The unique session identifier.</summary>
+    string? SessionId { get; }
+
+    /// <summary>Whether the WebSocket connection is currently open.</summary>
+    bool IsConnected { get; }
+
+    /// <summary>
+    ///   Sends a chunk of audio data to the server. The audio encoding must match the
+    ///   <see cref="VoiceSessionOptions.SourceMediaContentType" /> specified when creating the session.
+    /// </summary>
+    /// <param name="audioData">Audio data to send. Must not exceed 100 KB or 1 second duration.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task SendAudioAsync(byte[] audioData, CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Sends a chunk of audio data to the server using a memory-efficient overload.
+    /// </summary>
+    /// <param name="audioData">Audio data to send. Must not exceed 100 KB or 1 second duration.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task SendAudioAsync(ArraySegment<byte> audioData, CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Signals the end of the audio stream. Causes finalization of tentative transcript segments and
+    ///   triggers emission of final transcript updates, end-of-transcript, and end-of-stream messages.
+    ///   No more audio data can be sent after calling this method.
+    /// </summary>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task EndAudioAsync(CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Requests a reconnection token and establishes a new WebSocket connection, resuming the session.
+    ///   This should be called when the WebSocket connection is lost unexpectedly.
+    /// </summary>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If reconnection fails.</exception>
+    Task ReconnectAsync(CancellationToken cancellationToken = default);
+  }
+}
diff --git a/DeepL/Model/TargetMediaChunk.cs b/DeepL/Model/TargetMediaChunk.cs
@@ -0,0 +1,68 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>
+  ///   Represents a translated audio chunk from the Voice API. This feature is currently in closed beta.
+  ///   Audio data is provided as an array of base64-encoded indivisible chunks.
+  /// </summary>
+  public sealed class TargetMediaChunk {
+    /// <summary>Initializes a new instance of <see cref="TargetMediaChunk" />.</summary>
+    /// <param name="contentType">The content type of the audio data. Present in the first message.</param>
+    /// <param name="headers">Number of header packets at the start of the data array, or null if all are audio.</param>
+    /// <param name="data">Array of base64-encoded audio data packets.</param>
+    /// <param name="text">Text corresponding to this audio chunk, for subtitle synchronization.</param>
+    /// <param name="language">The target language of this audio chunk.</param>
+    /// <param name="duration">Duration of this audio chunk in seconds.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TargetMediaChunk(
+          string? contentType,
+          int? headers,
+          string[] data,
+          string? text,
+          string? language,
+          double? duration) {
+      ContentType = contentType;
+      Headers = headers;
+      Data = data;
+      Text = text;
+      Language = language;
+      Duration = duration;
+    }
+
+    /// <summary>The content type of the audio data. Present in the first message of a sequence.</summary>
+    [JsonPropertyName("content_type")]
+    public string? ContentType { get; }
+
+    /// <summary>
+    ///   Number of packets at the start of <see cref="Data" /> that contain initialization/header data.
+    ///   Null or absent when all packets are audio data.
+    /// </summary>
+    [JsonPropertyName("headers")]
+    public int? Headers { get; }
+
+    /// <summary>Array of base64-encoded indivisible audio data packets.</summary>
+    [JsonPropertyName("data")]
+    public string[] Data { get; }
+
+    /// <summary>Text corresponding to this audio chunk, for subtitle synchronization.</summary>
+    [JsonPropertyName("text")]
+    public string? Text { get; }
+
+    /// <summary>The target language of this audio chunk.</summary>
+    [JsonPropertyName("language")]
+    public string? Language { get; }
+
+    /// <summary>Duration of this audio chunk in seconds.</summary>
+    [JsonPropertyName("duration")]
+    public double? Duration { get; }
+  }
+}
diff --git a/DeepL/Model/TranscriptSegment.cs b/DeepL/Model/TranscriptSegment.cs
@@ -0,0 +1,29 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>A single text segment within a Voice API transcript update.</summary>
+  public sealed class TranscriptSegment {
+    /// <summary>Initializes a new instance of <see cref="TranscriptSegment" />.</summary>
+    /// <param name="text">The text content of this segment.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TranscriptSegment(string text) {
+      Text = text;
+    }
+
+    /// <summary>The text content of this segment.</summary>
+    [JsonPropertyName("text")]
+    public string Text { get; }
+
+    /// <summary>Returns the text content of this segment.</summary>
+    public override string ToString() => Text;
+  }
+}
diff --git a/DeepL/Model/TranscriptUpdate.cs b/DeepL/Model/TranscriptUpdate.cs
@@ -0,0 +1,41 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>
+  ///   Represents a transcript update from the Voice API, containing concluded (finalized) and tentative
+  ///   (in-progress) text segments. Used for both source and target transcript updates.
+  /// </summary>
+  public sealed class TranscriptUpdate {
+    /// <summary>Initializes a new instance of <see cref="TranscriptUpdate" />.</summary>
+    /// <param name="concluded">Finalized text segments that will not change.</param>
+    /// <param name="tentative">Preliminary text segments that may be refined.</param>
+    /// <param name="language">The language code of this transcript update. Only present on target updates.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TranscriptUpdate(TranscriptSegment[] concluded, TranscriptSegment[] tentative, string? language) {
+      Concluded = concluded;
+      Tentative = tentative;
+      Language = language;
+    }
+
+    /// <summary>Finalized text segments that will not change. These segments are sent once and remain fixed.</summary>
+    [JsonPropertyName("concluded")]
+    public TranscriptSegment[] Concluded { get; }
+
+    /// <summary>Preliminary text segments that may be refined as more audio context becomes available.</summary>
+    [JsonPropertyName("tentative")]
+    public TranscriptSegment[] Tentative { get; }
+
+    /// <summary>The language code of this transcript update. Only present on target transcript updates.</summary>
+    [JsonPropertyName("language")]
+    public string? Language { get; }
+  }
+}
diff --git a/DeepL/Model/VoiceSessionInfo.cs b/DeepL/Model/VoiceSessionInfo.cs
@@ -0,0 +1,40 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>Information about a Voice API session, received from the session request endpoint.</summary>
+  public sealed class VoiceSessionInfo {
+    /// <summary>Initializes a new instance of <see cref="VoiceSessionInfo" />.</summary>
+    /// <param name="streamingUrl">The WebSocket URL for establishing the stream connection.</param>
+    /// <param name="token">Ephemeral authentication token for the streaming endpoint.</param>
+    /// <param name="sessionId">Unique identifier for the session.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public VoiceSessionInfo(string streamingUrl, string token, string? sessionId) {
+      StreamingUrl = streamingUrl;
+      Token = token;
+      SessionId = sessionId;
+    }
+
+    /// <summary>The WebSocket URL to use for establishing the stream connection.</summary>
+    [JsonPropertyName("streaming_url")]
+    public string StreamingUrl { get; }
+
+    /// <summary>
+    ///   Ephemeral authentication token for the streaming endpoint. Valid for one-time use only.
+    /// </summary>
+    [JsonPropertyName("token")]
+    public string Token { get; }
+
+    /// <summary>Unique identifier for the session.</summary>
+    [JsonPropertyName("session_id")]
+    public string? SessionId { get; }
+  }
+}