wysaid · wysaid · Mar 28, 2026 · Mar 29, 2026
diff --git a/include/ccap_def.h b/include/ccap_def.h
@@ -35,7 +35,7 @@ namespace ccap {
 enum PixelFormatConstants : uint32_t {
     /// `kPixelFormatRGBBit` indicates that the pixel format is RGB or RGBA.
     kPixelFormatRGBBit = 1 << 3,
-    /// `kPixelFormatRGBBit` indicates that the pixel format is BGR or BGRA.
+    /// `kPixelFormatBGRBit` indicates that the pixel format is BGR or BGRA.
     kPixelFormatBGRBit = 1 << 4,
 
     /// Color Bit Mask
@@ -82,7 +82,6 @@ enum class PixelFormat : uint32_t {
      *    In software design, you can implement a toggle option to allow users to choose whether
      *    the received Frame is FullRange or VideoRange based on what they observe.
      * @note This format is also known by other names, such as YUV420P or IYUV.
-     * @refitem #NV12
      */
     I420 = 1 << 2 | kPixelFormatYUVColorBit,
 
@@ -191,10 +190,14 @@ enum class PropertyName {
 
     /**
      * @brief The output pixel format of ccap. Can be different from PixelFormatInternal.
-     * @note If PixelFormatInternal is RGB(A), PixelFormatOutput cannot be set to a YUV format.
+     * @note If PixelFormatInternal is RGB(A), PixelFormatOutput cannot be set to a YUV format (RGB->YUV conversion is not supported).
+     *       If PixelFormatInternal is YUV and PixelFormatOutput is a different YUV subtype, conversion requires libyuv;
+     *       without it the frame will keep the camera format and no conversion is performed.
      *       If PixelFormatInternal is YUV and PixelFormatOutput is RGB(A), BT.601 will be used for conversion.
-     *       For other cases, there are no issues.
-     *       If PixelFormatInternal and PixelFormatOutput are the same format, data conversion will be skipped and the original data will be used directly.
+     *       If PixelFormatOutput is set to PixelFormat::Unknown (or not set), the camera's native format is used as-is
+     *       and no conversion is performed.
+     *       If PixelFormatInternal and PixelFormatOutput are the same format AND the camera natively supports
+     *       PixelFormatInternal, data conversion will be skipped and the original data will be used directly.
      *       In general, setting both PixelFormatInternal and PixelFormatOutput to YUV formats can achieve better performance.
      */
     PixelFormatOutput = 0x30002,

diff --git a/src/ccap_convert_frame.cpp b/src/ccap_convert_frame.cpp
@@ -10,6 +10,7 @@
 
 #include "ccap_convert.h"
 #include "ccap_imp.h"
+#include "ccap_utils.h"
 
 #include <cassert>
 #include <cstring>
@@ -229,8 +230,23 @@ inline bool inplaceConvertFrameImp(VideoFrame* frame, PixelFormat toFormat, bool
             return inplaceConvertFrameYUV2YUV(frame, toFormat, verticalFlip);
 #endif
 
+        if (isInputYUV && isOutputYUV) {
+            static bool sLoggedYuv2YuvUnsupported = false;
+            if (!sLoggedYuv2YuvUnsupported) {
+                CCAP_LOG_W("ccap: YUV to different YUV subtype conversion is not supported without libyuv, skipping conversion\n");
+                sLoggedYuv2YuvUnsupported = true;
+            }
+            return false;
+        }
+
         if (isInputYUV) // yuv -> BGR
             return inplaceConvertFrameYUV2RGBColor(frame, toFormat, verticalFlip);
+
+        static bool sLoggedRgbToYuvUnsupported = false;
+        if (!sLoggedRgbToYuvUnsupported) {
+            CCAP_LOG_W("ccap: RGB to YUV conversion is not supported, skipping conversion\n");
+            sLoggedRgbToYuvUnsupported = true;
+        }
         return false; // no rgb -> yuv
     }
 

diff --git a/src/ccap_file_reader_apple.mm b/src/ccap_file_reader_apple.mm
@@ -382,10 +382,11 @@ - (void)processFrame:(CMSampleBufferRef)sampleBuffer {
 
     // Check if conversion or flip is needed
     auto& prop = _provider->getFrameProperty();
-    bool isOutputYUV = (newFrame->pixelFormat & kPixelFormatYUVColorBit) != 0;
+    PixelFormat effectiveOutputFormat = (prop.outputPixelFormat == PixelFormat::Unknown) ? newFrame->pixelFormat : prop.outputPixelFormat;
+    bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0;
     FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : _provider->frameOrientation();
     bool shouldFlip = !isOutputYUV && (inputOrientation != targetOrientation);
-    bool shouldConvert = newFrame->pixelFormat != prop.outputPixelFormat;
+    bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat;
 
     newFrame->orientation = targetOrientation;
 
@@ -397,8 +398,11 @@ - (void)processFrame:(CMSampleBufferRef)sampleBuffer {
             newFrame->allocator = f ? f() : std::make_shared<DefaultAllocator>();
         }
 
-        zeroCopy = !inplaceConvertFrame(newFrame.get(), prop.outputPixelFormat, shouldFlip);
-        CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+        zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip);
+        if (!zeroCopy) {
+            CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+            newFrame->nativeHandle = nullptr;
+        }
     }
 
     if (zeroCopy) {

diff --git a/src/ccap_file_reader_windows.cpp b/src/ccap_file_reader_windows.cpp
@@ -448,10 +448,11 @@ void FileReaderWindows::readLoop() {
 
                 // Check if conversion or flip is needed
                 auto& prop = m_provider->getFrameProperty();
-                bool isOutputYUV = (prop.outputPixelFormat & kPixelFormatYUVColorBit) != 0;
+                PixelFormat effectiveOutputFormat = (prop.outputPixelFormat == PixelFormat::Unknown) ? newFrame->pixelFormat : prop.outputPixelFormat;
+                bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0;
                 FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : m_provider->frameOrientation();
                 bool shouldFlip = !isOutputYUV && (inputOrientation != targetOrientation);
-                bool shouldConvert = newFrame->pixelFormat != prop.outputPixelFormat;
+                bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat;
 
                 newFrame->orientation = targetOrientation;
 
@@ -462,7 +463,7 @@ void FileReaderWindows::readLoop() {
                         auto&& f = m_provider->getAllocatorFactory();
                         newFrame->allocator = f ? f() : std::make_shared<DefaultAllocator>();
                     }
-                    inplaceConvertFrame(newFrame.get(), prop.outputPixelFormat, shouldFlip);
+                    zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip);
                 }
 
                 newFrame->frameIndex = m_currentFrameIndex;

diff --git a/src/ccap_imp_apple.mm b/src/ccap_imp_apple.mm
@@ -873,6 +873,9 @@ - (void)captureOutput:(AVCaptureOutput*)output
     CMTime timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
     auto internalFormat = _provider->getFrameProperty().cameraPixelFormat;
     auto outputFormat = _provider->getFrameProperty().outputPixelFormat;
+    if (outputFormat == PixelFormat::Unknown) {
+        outputFormat = internalFormat;
+    }
 
     newFrame->timestamp = (uint64_t)(CMTimeGetSeconds(timestamp) * 1e9);
     newFrame->width = (uint32_t)CVPixelBufferGetWidth(imageBuffer);
@@ -905,6 +908,8 @@ - (void)captureOutput:(AVCaptureOutput*)output
     }
 
     /// iOS/macOS does not support i420, and we do not intend to support nv12 to i420 conversion here.
+    /// When both internal and output formats are YUV, zeroCopy is used regardless of subtype differences
+    /// (e.g., NV12 vs I420). The frame will carry the actual camera format, not the requested output format.
     bool zeroCopy = ((internalFormat & kPixelFormatYUVColorBit) && (outputFormat & kPixelFormatYUVColorBit)) ||
         (internalFormat == outputFormat && _provider->frameOrientation() == kDefaultFrameOrientation);
 
@@ -924,7 +929,10 @@ - (void)captureOutput:(AVCaptureOutput*)output
 
         zeroCopy = !inplaceConvertFrame(newFrame.get(), outputFormat, (int)(newFrame->orientation != kDefaultFrameOrientation));
 
-        CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+        if (!zeroCopy) {
+            CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+            newFrame->nativeHandle = nullptr;
+        }
 
         if (verboseLogEnabled()) {
 #ifdef DEBUG

diff --git a/src/ccap_imp_linux.cpp b/src/ccap_imp_linux.cpp
@@ -549,16 +549,16 @@ bool ProviderV4L2::readFrame() {
 
     // Check input/output format types and orientations
     bool isInputYUV = (frame->pixelFormat & kPixelFormatYUVColorBit) != 0;
-    bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit) != 0;
+    PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? frame->pixelFormat : m_frameProp.outputPixelFormat;
+    bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0;
     auto inputOrientation = FrameOrientation::TopToBottom; // V4L2 always provides TopToBottom
 
     // Set output orientation based on format type
     frame->orientation = isOutputYUV ? FrameOrientation::TopToBottom : m_frameOrientation;
 
     // Check if we need conversion or flipping
     bool shouldFlip = frame->orientation != inputOrientation && !isOutputYUV;
-    bool shouldConvert = (m_frameProp.outputPixelFormat != PixelFormat::Unknown &&
-                          m_frameProp.outputPixelFormat != frame->pixelFormat);
+    bool shouldConvert = (effectiveOutputFormat != frame->pixelFormat);
     bool zeroCopy = !shouldConvert && !shouldFlip;
 
     uint8_t* bufferData = static_cast<uint8_t*>(m_buffers[buf.index].start);
@@ -614,7 +614,7 @@ bool ProviderV4L2::readFrame() {
 
             std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
-            zeroCopy = !inplaceConvertFrame(frame.get(), m_frameProp.outputPixelFormat, shouldFlip);
+            zeroCopy = !inplaceConvertFrame(frame.get(), effectiveOutputFormat, shouldFlip);
 
             double durInMs = (std::chrono::steady_clock::now() - startTime).count() / 1.e6;
             static double s_allCostTime = 0;
@@ -630,10 +630,10 @@ bool ProviderV4L2::readFrame() {
 
             CCAP_LOG_V(
                 "ccap: inplaceConvertFrame requested pixel format: %s, actual pixel format: %s, flip: %s, cost time %s: (cur %g ms, avg %g ms)\n",
-                pixelFormatToString(m_frameProp.outputPixelFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(),
+                pixelFormatToString(effectiveOutputFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(),
                 shouldFlip ? "YES" : "NO", mode, durInMs, s_allCostTime / s_frames);
         } else {
-            zeroCopy = !inplaceConvertFrame(frame.get(), m_frameProp.outputPixelFormat, shouldFlip);
+            zeroCopy = !inplaceConvertFrame(frame.get(), effectiveOutputFormat, shouldFlip);
         }
     }
 

diff --git a/src/ccap_imp_linux.h b/src/ccap_imp_linux.h
@@ -101,12 +101,12 @@ class ProviderV4L2 : public ProviderImp {
     bool m_isStreaming = false;
 
     // V4L2 device capabilities
-    struct v4l2_capability m_caps{};
+    struct v4l2_capability m_caps {};
     std::vector<V4L2Format> m_supportedFormats;
     std::vector<DeviceInfo::Resolution> m_supportedResolutions;
 
     // Current format
-    struct v4l2_format m_currentFormat{};
+    struct v4l2_format m_currentFormat {};
 
     // Buffer management
     std::vector<V4L2Buffer> m_buffers;

diff --git a/src/ccap_imp_windows.cpp b/src/ccap_imp_windows.cpp
@@ -843,16 +843,17 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia
 
     uint32_t bufferLen = mediaSample->GetActualDataLength();
     bool isInputYUV = (m_frameProp.cameraPixelFormat & kPixelFormatYUVColorBit);
-    bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit);
+    PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? m_frameProp.cameraPixelFormat : m_frameProp.outputPixelFormat;
+    bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit);
 
     newFrame->pixelFormat = m_frameProp.cameraPixelFormat;
     newFrame->width = m_frameProp.width;
     newFrame->height = m_frameProp.height;
     newFrame->orientation = isOutputYUV ? FrameOrientation::TopToBottom : m_frameOrientation;
-    newFrame->nativeHandle = mediaSample;
+    newFrame->nativeHandle = nullptr;
 
     bool shouldFlip = newFrame->orientation != m_inputOrientation && !isOutputYUV;
-    bool shouldConvert = m_frameProp.cameraPixelFormat != m_frameProp.outputPixelFormat;
+    bool shouldConvert = m_frameProp.cameraPixelFormat != effectiveOutputFormat;
     bool zeroCopy = !shouldConvert && !shouldFlip;
 
     if (isInputYUV) {
@@ -920,7 +921,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia
 
             std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
-            zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip);
+            zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip);
 
             double durInMs = (std::chrono::steady_clock::now() - startTime).count() / 1.e6;
             static double s_allCostTime = 0;
@@ -936,10 +937,10 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia
 
             CCAP_LOG_V(
                 "ccap: inplaceConvertFrame requested pixel format: %s, actual pixel format: %s, flip: %s, cost time %s: (cur %g ms, avg %g ms)\n",
-                pixelFormatToString(m_frameProp.outputPixelFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(),
+                pixelFormatToString(effectiveOutputFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(),
                 shouldFlip ? "YES" : "NO", mode, durInMs, s_allCostTime / s_frames);
         } else {
-            zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip);
+            zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip);
         }
 
         newFrame->sizeInBytes = newFrame->stride[0] * newFrame->height + (newFrame->stride[1] + newFrame->stride[2]) * newFrame->height / 2;
@@ -949,6 +950,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia
         // Conversion may fail. If conversion fails, fall back to zero-copy mode.
         // In this case, the returned format is the original camera input format.
         newFrame->sizeInBytes = bufferLen;
+        newFrame->nativeHandle = mediaSample;
 
         mediaSample->AddRef(); // Ensure data lifecycle
         auto manager = std::make_shared<FakeFrame>([newFrame, mediaSample]() mutable {
@@ -1001,7 +1003,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::BufferCB(double SampleTime, BYTE*
     return S_OK;
 }
 
-HRESULT STDMETHODCALLTYPE ProviderDirectShow::QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR * __RPC_FAR * ppvObject) {
+HRESULT STDMETHODCALLTYPE ProviderDirectShow::QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) {
     static constexpr const IID IID_ISampleGrabberCB = { 0x0579154A, 0x2B53, 0x4994, { 0xB0, 0xD0, 0xE7, 0x73, 0x14, 0x8E, 0xFF, 0x85 } };
 
     if (riid == IID_IUnknown) {
@@ -1166,7 +1168,7 @@ void ProviderDirectShow::close() {
 bool ProviderDirectShow::start() {
     if (!m_isOpened) return false;
 
-    // File mode
+        // File mode
 #ifdef CCAP_ENABLE_FILE_PLAYBACK
     if (m_isFileMode && m_fileReader) {
         return m_fileReader->start();

diff --git a/src/ccap_imp_windows.h b/src/ccap_imp_windows.h
@@ -93,7 +93,7 @@ class ProviderDirectShow : public ProviderImp, public ISampleGrabberCB {
     inline FrameOrientation frameOrientation() const { return m_frameOrientation; }
 
 private:
-    HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR * __RPC_FAR * ppvObject) override;
+    HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) override;
     ULONG STDMETHODCALLTYPE AddRef(void) override;
     ULONG STDMETHODCALLTYPE Release(void) override;
 

diff --git a/src/ccap_imp_windows_msmf.cpp b/src/ccap_imp_windows_msmf.cpp
@@ -747,7 +747,8 @@ void ProviderMSMF::readLoop() {
         newFrame->height = m_activeHeight;
         newFrame->nativeHandle = nullptr;
 
-        bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit) != 0;
+        PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? m_activePixelFormat : m_frameProp.outputPixelFormat;
+        bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0;
         FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : m_frameOrientation;
         newFrame->orientation = targetOrientation;
 
@@ -801,15 +802,15 @@ void ProviderMSMF::readLoop() {
         }
 
         bool shouldFlip = !isOutputYUV && targetOrientation != m_inputOrientation;
-        bool shouldConvert = newFrame->pixelFormat != m_frameProp.outputPixelFormat;
+        bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat;
         bool zeroCopy = !shouldConvert && !shouldFlip;
 
         if (!zeroCopy) {
             if (!newFrame->allocator) {
                 newFrame->allocator = m_allocatorFactory ? m_allocatorFactory() : std::make_shared<DefaultAllocator>();
             }
 
-            zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip);
+            zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip);
             newFrame->sizeInBytes = newFrame->stride[0] * newFrame->height +
                 (newFrame->stride[1] + newFrame->stride[2]) * newFrame->height / 2;
         }