software-mansion · benITo47 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
@@ -7,6 +7,7 @@ import {
   useObjectDetection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   ObjectDetectionModelSources,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image } from 'react-native';
@@ -18,6 +19,7 @@ import ScreenWrapper from '../../ScreenWrapper';
 const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'RF-DeTR Nano', value: RF_DETR_NANO },
   { label: 'SSDLite MobileNet', value: SSDLITE_320_MOBILENET_V3_LARGE },
+  { label: 'YOLO26N', value: YOLO26N },
 ];
 
 export default function ObjectDetectionScreen() {

diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
@@ -46,6 +46,7 @@ type ModelId =
   | 'classification'
   | 'objectDetectionSsdlite'
   | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n'
   | 'segmentationDeeplabResnet50'
   | 'segmentationDeeplabResnet101'
   | 'segmentationDeeplabMobilenet'
@@ -95,6 +96,7 @@ const TASKS: Task[] = [
     variants: [
       { id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' },
       { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
+      { id: 'objectDetectionYolo26n', label: 'YOLO26N' },
     ],
   },
   {
@@ -241,7 +243,10 @@ export default function VisionCameraScreen() {
         <ObjectDetectionTask
           {...taskProps}
           activeModel={
-            activeModel as 'objectDetectionSsdlite' | 'objectDetectionRfdetr'
+            activeModel as
+              | 'objectDetectionSsdlite'
+              | 'objectDetectionRfdetr'
+              | 'objectDetectionYolo26n'
           }
         />
       )}

diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
@@ -6,12 +6,18 @@ import {
   Detection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   useObjectDetection,
+  CocoLabel,
+  CocoLabelYolo,
 } from 'react-native-executorch';
 import { labelColor, labelColorBg } from '../utils/colors';
 import { TaskProps } from './types';
 
-type ObjModelId = 'objectDetectionSsdlite' | 'objectDetectionRfdetr';
+type ObjModelId =
+  | 'objectDetectionSsdlite'
+  | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n';
 
 type Props = TaskProps & { activeModel: ObjModelId };
 
@@ -34,10 +40,21 @@ export default function ObjectDetectionTask({
     model: RF_DETR_NANO,
     preventLoad: activeModel !== 'objectDetectionRfdetr',
   });
+  const yolo26n = useObjectDetection({
+    model: YOLO26N,
+    preventLoad: activeModel !== 'objectDetectionYolo26n',
+  });
+
+  const active =
+    activeModel === 'objectDetectionSsdlite'
+      ? ssdlite
+      : activeModel === 'objectDetectionRfdetr'
+        ? rfdetr
+        : yolo26n;
 
-  const active = activeModel === 'objectDetectionSsdlite' ? ssdlite : rfdetr;
+  type CommonDetection = Omit<Detection, 'label'> & { label: string };
 
-  const [detections, setDetections] = useState<Detection[]>([]);
+  const [detections, setDetections] = useState<CommonDetection[]>([]);
   const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
   const lastFrameTimeRef = useRef(Date.now());
 
@@ -56,8 +73,19 @@ export default function ObjectDetectionTask({
   const detRof = active.runOnFrame;
 
   const updateDetections = useCallback(
-    (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
-      setDetections(p.results);
+    (p: {
+      results:
+        | Detection<typeof CocoLabel>[]
+        | Detection<typeof CocoLabelYolo>[];
+      imageWidth: number;
+      imageHeight: number;
+    }) => {
+      setDetections(
+        p.results.map((det) => ({
+          ...det,
+          label: String(det.label),
+        }))
+      );
       setImageSize({ width: p.imageWidth, height: p.imageHeight });
       const now = Date.now();
       const diff = now - lastFrameTimeRef.current;
@@ -82,7 +110,9 @@ export default function ObjectDetectionTask({
         try {
           if (!detRof) return;
           const isFrontCamera = cameraPositionSync.getDirty() === 'front';
-          const result = detRof(frame, isFrontCamera, 0.5);
+          const result = detRof(frame, isFrontCamera, {
+            detectionThreshold: 0.5,
+          });
           // Sensor frames are landscape-native, so width/height are swapped
           // relative to portrait screen orientation.
           const screenW = frame.height;

diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
@@ -43,11 +43,21 @@ processing. Resizing is typically fast for small images but may be noticeably
 slower for very large images, which can increase total time.
 :::
 
+:::warning
+Times presented in the tables are measured for forward method with input size equal to 512. Other input sizes may yield slower or faster inference times.
+:::
+
 | Model / Device                                | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
 | :-------------------------------------------- | :----------------: | :------------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         20         |          18          |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         18         |          -           |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         8          |          -           |
+| RF_DETR_NANO (XNNPACK FP32)                   |        TBD         |         TBD          |
+| YOLO26N (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26S (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26M (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26L (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26X (XNNPACK FP32)                        |        TBD         |         TBD          |
 
 ## Style Transfer
 

diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
@@ -25,11 +25,21 @@ loaded and actively running inference, relative to the baseline app memory
 before model initialization.
 :::
 
+:::warning
+Data presented for YOLO models is based on inference with forward_640 method.
+:::
+
 | Model / Device                                | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
 | --------------------------------------------- | :----------------: | :------------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         94         |         104          |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         83         |          -           |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         62         |          -           |
+| RF_DETR_NANO (XNNPACK FP32)                   |        TBD         |         TBD          |
+| YOLO26N (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26S (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26M (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26L (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26X (XNNPACK FP32)                        |        TBD         |         TBD          |
 
 ## Style Transfer
 

diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
@@ -13,6 +13,12 @@ title: Model Size
 | Model                          | XNNPACK FP32 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
 | ------------------------------ | :---------------: | :---------------: | :---------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE |       13.9        |       15.6        |       8.46        |
+| RF_DETR_NANO                   |        112        |         -         |         -         |
+| YOLO26N                        |       10.3        |         -         |         -         |
+| YOLO26S                        |       38.6        |         -         |         -         |
+| YOLO26M                        |       82.3        |         -         |         -         |
+| YOLO26L                        |        100        |         -         |         -         |
+| YOLO26X                        |        224        |         -         |         -         |
 
 ## Instance Segmentation
 

diff --git a/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md b/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md
@@ -61,13 +61,18 @@ You need more details? Check the following resources:
 - `error` - An error object if the model failed to load or encountered a runtime error.
 - `downloadProgress` - A value between 0 and 1 representing the download progress of the model binary.
 - `forward` - A function to run inference on an image.
+- `getAvailableInputSizes` - A function that returns available input sizes for multi-method models (YOLO). Returns `undefined` for single-method models.
 
 ## Running the model
 
 To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDetectionType.md#forward) method. It accepts two arguments:
 
 - `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
-- `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score for a detection to be included in the results. Defaults to `0.7`.
+- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with the following properties:
+  - `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score. Defaults to model-specific value (typically `0.7`).
+  - `iouThreshold` (optional) - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value (typically `0.55`).
+  - `inputSize` (optional) - For multi-method models like YOLO, specify the input resolution (`384`, `512`, or `640`). Defaults to `384` for YOLO models.
+  - `classesOfInterest` (optional) - Array of class labels to filter detections. Only detections matching these classes will be returned.
 
 `forward` returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing:
 
@@ -78,11 +83,11 @@ To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDe
 ## Example
 
 ```typescript
-import { useObjectDetection, RF_DETR_NANO } from 'react-native-executorch';
+import { useObjectDetection, YOLO26N } from 'react-native-executorch';
 
 function App() {
   const model = useObjectDetection({
-    model: RF_DETR_NANO,
+    model: YOLO26N,
   });
 
   const handleDetect = async () => {
@@ -91,13 +96,12 @@ function App() {
     const imageUri = 'file:///Users/.../photo.jpg';
 
     try {
-      const detections = await model.forward(imageUri, 0.5);
+      const detections = await model.forward(imageUri, {
+        detectionThreshold: 0.5,
+        inputSize: 640,
+      });
 
-      for (const detection of detections) {
-        console.log('Label:', detection.label);
-        console.log('Score:', detection.score);
-        console.log('Bounding box:', detection.bbox);
-      }
+      console.log('Detected:', detections.length, 'objects');
     } catch (error) {
       console.error(error);
     }
@@ -113,7 +117,15 @@ See the full guide: [VisionCamera Integration](./visioncamera-integration.md).
 
 ## Supported models
 
-| Model                                                                                                                         | Number of classes | Class list                                               |
-| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | -------------------------------------------------------- |
-| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91                | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
-| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano)                                  | 80                | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
+| Model | Number of classes | Class list | Multi-size Support |\n| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | ------------------------------------------------------------ | ------------------ |
+| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
+| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano) | 80 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
+| [YOLO26N](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26S](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26M](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26L](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26X](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+
+:::tip
+YOLO models support multiple input sizes (384px, 512px, 640px). Smaller sizes are faster but less accurate, while larger sizes are more accurate but slower. Choose based on your speed/accuracy requirements.
+:::
diff --git a/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md b/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md
@@ -43,12 +43,26 @@ For more information on loading resources, take a look at [loading models](../..
 To run the model, use the [`forward`](../../06-api-reference/classes/ObjectDetectionModule.md#forward) method. It accepts two arguments:
 
 - `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
-- `detectionThreshold` (optional) - A number between 0 and 1. Defaults to `0.7`.
+- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with:
+  - `detectionThreshold` (optional) - Minimum confidence score (0-1). Defaults to model-specific value.
+  - `iouThreshold` (optional) - IoU threshold for NMS (0-1). Defaults to model-specific value.
+  - `inputSize` (optional) - For YOLO models: `384`, `512`, or `640`. Defaults to `384`.
+  - `classesOfInterest` (optional) - Array of class labels to filter detections.
 
 The method returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing the bounding box, label, and confidence score.
 
 For real-time frame processing, use [`runOnFrame`](../../03-hooks/02-computer-vision/visioncamera-integration.md) instead.
 
+### Example with Options
+
+```typescript
+const detections = await model.forward(imageUri, {
+  detectionThreshold: 0.5,
+  inputSize: 640, // YOLO models only
+  classesOfInterest: ['PERSON', 'CAR'],
+});
+```
+
 ## Using a custom model
 
 Use [`fromCustomModel`](../../06-api-reference/classes/ObjectDetectionModule.md#fromcustommodel) to load your own exported model binary instead of a built-in preset.