GetStream · d3xvn · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/common/darwin/Classes/FlutterRTCDesktopCapturer.m b/common/darwin/Classes/FlutterRTCDesktopCapturer.m
@@ -12,9 +12,13 @@
 #import "LocalVideoTrack.h"
 
 #if TARGET_OS_OSX
+#import "SystemAudioMixer.h"
+#import "AudioManager.h"
 RTCDesktopMediaList* _screen = nil;
 RTCDesktopMediaList* _window = nil;
 NSArray<RTCDesktopSource*>* _captureSources;
+// Store active system audio mixers by track UUID (macOS 13.0+)
+NSMutableDictionary* _systemAudioMixers = nil;
 #endif
 
 @implementation FlutterWebRTCPlugin (DesktopCapturer)
@@ -78,7 +82,7 @@ - (void)getDisplayMedia:(NSDictionary*)constraints result:(FlutterResult)result
 #if TARGET_OS_OSX
   /* example for constraints:
       {
-          'audio': false,
+          'audio': false,  // or true for system audio capture
           'video": {
               'deviceId':  {'exact': sourceId},
               'mandatory': {
@@ -90,6 +94,15 @@ - (void)getDisplayMedia:(NSDictionary*)constraints result:(FlutterResult)result
   NSString* sourceId = nil;
   BOOL useDefaultScreen = NO;
   NSInteger fps = 30;
+
+  // Check if audio (system audio) is requested - enabled by default on macOS
+  BOOL includeSystemAudio = YES;  // Default to YES for macOS screen sharing
+  id audioConstraints = constraints[@"audio"];
+  if ([audioConstraints isKindOfClass:[NSNumber class]] && [audioConstraints boolValue] == NO) {
+    // Only disable if explicitly set to false
+    includeSystemAudio = NO;
+  }
+
   id videoConstraints = constraints[@"video"];
   if ([videoConstraints isKindOfClass:[NSNumber class]] && [videoConstraints boolValue] == YES) {
     useDefaultScreen = YES;
@@ -135,10 +148,56 @@ - (void)getDisplayMedia:(NSDictionary*)constraints result:(FlutterResult)result
   NSLog(@"start desktop capture: sourceId: %@, type: %@, fps: %lu", sourceId,
         source.sourceType == RTCDesktopSourceTypeScreen ? @"screen" : @"window", fps);
 
+  // Start system audio capture if requested (macOS 13.0+)
+  // Use id type to avoid availability warnings - actual type checking done at runtime
+  __block id systemAudioMixerObj = nil;
+  if (includeSystemAudio) {
+    if (@available(macOS 13.0, *)) {
+      if ([SystemAudioMixer isSupported]) {
+        // Initialize the mixers dictionary if needed
+        if (_systemAudioMixers == nil) {
+          _systemAudioMixers = [NSMutableDictionary dictionary];
+        }
+
+        SystemAudioMixer *mixer = [[SystemAudioMixer alloc] init];
+        systemAudioMixerObj = mixer;
+        _systemAudioMixers[trackUUID] = mixer;
+
+        // Register the mixer with the audio processing adapter
+        [AudioManager.sharedInstance.capturePostProcessingAdapter addProcessing:mixer];
+
+        // Start capturing system audio
+        [mixer startWithCompletion:^(NSError * _Nullable error) {
+          if (error) {
+            NSLog(@"Failed to start system audio capture: %@", error);
+          } else {
+            NSLog(@"System audio capture started for track: %@", trackUUID);
+          }
+        }];
+      } else {
+        NSLog(@"System audio mixing not supported on this device");
+      }
+    } else {
+      NSLog(@"System audio capture requires macOS 13.0 or later");
+    }
+  }
+
   self.videoCapturerStopHandlers[trackUUID] = ^(CompletionHandler handler) {
     NSLog(@"stop desktop capture: sourceId: %@, type: %@, trackID %@", sourceId,
           source.sourceType == RTCDesktopSourceTypeScreen ? @"screen" : @"window", trackUUID);
     [desktopCapturer stopCapture];
+
+    // Stop and cleanup system audio mixer if it was active
+    if (systemAudioMixerObj) {
+      if (@available(macOS 13.0, *)) {
+        SystemAudioMixer *mixer = (SystemAudioMixer *)systemAudioMixerObj;
+        [AudioManager.sharedInstance.capturePostProcessingAdapter removeProcessing:mixer];
+        [mixer stop];
+        [_systemAudioMixers removeObjectForKey:trackUUID];
+        NSLog(@"System audio capture stopped for track: %@", trackUUID);
+      }
+    }
+
     handler();
   };
 #endif

diff --git a/common/darwin/Classes/SystemAudioCapturer.h b/common/darwin/Classes/SystemAudioCapturer.h
@@ -0,0 +1,25 @@
+#import <Foundation/Foundation.h>
+#import <ScreenCaptureKit/ScreenCaptureKit.h>
+#import <AVFoundation/AVFoundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+API_AVAILABLE(macos(13.0))
+@protocol SystemAudioCapturerDelegate <NSObject>
+- (void)systemAudioCapturer:(id)capturer didCaptureAudioBuffer:(CMSampleBufferRef)sampleBuffer;
+@end
+
+API_AVAILABLE(macos(13.0))
+@interface SystemAudioCapturer : NSObject <SCStreamDelegate, SCStreamOutput>
+
+@property (nonatomic, weak, nullable) id<SystemAudioCapturerDelegate> delegate;
+@property (nonatomic, readonly) BOOL isCapturing;
+
++ (BOOL)isSupported;
+
+- (void)startCaptureWithCompletion:(void (^)(NSError * _Nullable error))completion;
+- (void)stopCapture;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/common/darwin/Classes/SystemAudioCapturer.m b/common/darwin/Classes/SystemAudioCapturer.m
@@ -0,0 +1,161 @@
+#import "SystemAudioCapturer.h"
+
+API_AVAILABLE(macos(13.0))
+@interface SystemAudioCapturer ()
+
+@property (nonatomic, strong, nullable) SCStream *stream;
+@property (nonatomic, strong, nullable) SCContentFilter *contentFilter;
+@property (nonatomic, strong) dispatch_queue_t captureQueue;
+@property (nonatomic, assign) BOOL isCapturing;
+
+@end
+
+API_AVAILABLE(macos(13.0))
+@implementation SystemAudioCapturer
+
++ (BOOL)isSupported {
+    if (@available(macOS 13.0, *)) {
+        return YES;
+    }
+    return NO;
+}
+
+- (instancetype)init {
+    self = [super init];
+    if (self) {
+        _captureQueue = dispatch_queue_create("io.getstream.webrtc.systemaudio", DISPATCH_QUEUE_SERIAL);
+        _isCapturing = NO;
+    }
+    return self;
+}
+
+- (void)startCaptureWithCompletion:(void (^)(NSError * _Nullable))completion {
+    if (self.isCapturing) {
+        if (completion) {
+            completion(nil);
+        }
+        return;
+    }
+
+    // Get shareable content to create a filter for system audio capture
+    [SCShareableContent getShareableContentWithCompletionHandler:^(SCShareableContent * _Nullable shareableContent, NSError * _Nullable error) {
+        if (error) {
+            NSLog(@"SystemAudioCapturer: Failed to get shareable content: %@", error);
+            if (completion) {
+                completion(error);
+            }
+            return;
+        }
+
+        if (shareableContent.displays.count == 0) {
+            NSError *noDisplayError = [NSError errorWithDomain:@"SystemAudioCapturer"
+                                                          code:-1
+                                                      userInfo:@{NSLocalizedDescriptionKey: @"No displays available"}];
+            NSLog(@"SystemAudioCapturer: No displays available");
+            if (completion) {
+                completion(noDisplayError);
+            }
+            return;
+        }
+
+        // Use the main display for audio capture
+        SCDisplay *mainDisplay = shareableContent.displays.firstObject;
+
+        // Create a content filter that excludes all windows (we only want audio)
+        // We need to capture from a display but we'll only use the audio
+        self.contentFilter = [[SCContentFilter alloc] initWithDisplay:mainDisplay
+                                                     excludingWindows:@[]];
+
+        // Configure stream for audio-only capture
+        SCStreamConfiguration *config = [[SCStreamConfiguration alloc] init];
+        config.capturesAudio = YES;
+        config.excludesCurrentProcessAudio = YES; // Don't capture our own audio
+        config.sampleRate = 48000;
+        config.channelCount = 2;
+
+        // Minimize video capture overhead since we only need audio
+        config.width = 2;
+        config.height = 2;
+        config.minimumFrameInterval = CMTimeMake(1, 1); // 1 FPS minimum
+        config.showsCursor = NO;
+
+        // Create the stream
+        self.stream = [[SCStream alloc] initWithFilter:self.contentFilter
+                                         configuration:config
+                                              delegate:self];
+
+        NSError *addOutputError = nil;
+
+        // Add audio output
+        BOOL audioAdded = [self.stream addStreamOutput:self
+                                                  type:SCStreamOutputTypeAudio
+                                    sampleHandlerQueue:self.captureQueue
+                                                 error:&addOutputError];
+
+        if (!audioAdded || addOutputError) {
+            NSLog(@"SystemAudioCapturer: Failed to add audio output: %@", addOutputError);
+            if (completion) {
+                completion(addOutputError);
+            }
+            return;
+        }
+
+        // Start capture
+        [self.stream startCaptureWithCompletionHandler:^(NSError * _Nullable startError) {
+            if (startError) {
+                NSLog(@"SystemAudioCapturer: Failed to start capture: %@", startError);
+                self.stream = nil;
+                self.contentFilter = nil;
+            } else {
+                self.isCapturing = YES;
+                NSLog(@"SystemAudioCapturer: Started capturing system audio");
+            }
+            if (completion) {
+                completion(startError);
+            }
+        }];
+    }];
+}
+
+- (void)stopCapture {
+    if (!self.isCapturing || !self.stream) {
+        return;
+    }
+
+    self.isCapturing = NO;
+
+    [self.stream stopCaptureWithCompletionHandler:^(NSError * _Nullable error) {
+        if (error) {
+            NSLog(@"SystemAudioCapturer: Error stopping capture: %@", error);
+        } else {
+            NSLog(@"SystemAudioCapturer: Stopped capturing system audio");
+        }
+    }];
+
+    self.stream = nil;
+    self.contentFilter = nil;
+}
+
+#pragma mark - SCStreamDelegate
+
+- (void)stream:(SCStream *)stream didStopWithError:(NSError *)error {
+    NSLog(@"SystemAudioCapturer: Stream stopped with error: %@", error);
+    self.isCapturing = NO;
+    self.stream = nil;
+    self.contentFilter = nil;
+}
+
+#pragma mark - SCStreamOutput
+
+- (void)stream:(SCStream *)stream didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer ofType:(SCStreamOutputType)type {
+    if (type == SCStreamOutputTypeAudio && self.delegate) {
+        [self.delegate systemAudioCapturer:self didCaptureAudioBuffer:sampleBuffer];
+    }
+    // Ignore video frames - we only care about audio
+}
+
+- (void)dealloc {
+    [self stopCapture];
+}
+
+@end
diff --git a/common/darwin/Classes/SystemAudioMixer.h b/common/darwin/Classes/SystemAudioMixer.h
@@ -0,0 +1,46 @@
+#import <Foundation/Foundation.h>
+#import <WebRTC/WebRTC.h>
+#import "AudioProcessingAdapter.h"
+#import "SystemAudioCapturer.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * SystemAudioMixer captures system audio and mixes it with the microphone audio stream.
+ * It conforms to ExternalAudioProcessingDelegate to intercept microphone audio
+ * and mix in system audio samples captured via ScreenCaptureKit.
+ * Requires macOS 13.0 or later for system audio capture via ScreenCaptureKit.
+ */
+API_AVAILABLE(macos(13.0))
+@interface SystemAudioMixer : NSObject <ExternalAudioProcessingDelegate, SystemAudioCapturerDelegate>
+
+@property (nonatomic, readonly) BOOL isCapturing;
+
++ (BOOL)isSupported;
+
+/**
+ * Start capturing and mixing system audio.
+ * System audio will be mixed into the microphone audio stream.
+ */
+- (void)startWithCompletion:(void (^_Nullable)(NSError * _Nullable error))completion;
+
+/**
+ * Stop capturing system audio.
+ */
+- (void)stop;
+
+/**
+ * Set the mix volume for system audio (0.0 - 1.0).
+ * Default is 1.0.
+ */
+@property (nonatomic, assign) float systemAudioVolume;
+
+/**
+ * Set the mix volume for microphone audio (0.0 - 1.0).
+ * Default is 1.0.
+ */
+@property (nonatomic, assign) float microphoneVolume;
+
+@end
+
+NS_ASSUME_NONNULL_END