Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/computer-vision/app/object_detection/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
useObjectDetection,
RF_DETR_NANO,
SSDLITE_320_MOBILENET_V3_LARGE,
YOLO26N,
ObjectDetectionModelSources,
} from 'react-native-executorch';
import { View, StyleSheet, Image } from 'react-native';
Expand All @@ -18,6 +19,7 @@ import ScreenWrapper from '../../ScreenWrapper';
const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
{ label: 'RF-DeTR Nano', value: RF_DETR_NANO },
{ label: 'SSDLite MobileNet', value: SSDLITE_320_MOBILENET_V3_LARGE },
{ label: 'YOLO26N', value: YOLO26N },
];

export default function ObjectDetectionScreen() {
Expand Down
7 changes: 6 additions & 1 deletion apps/computer-vision/app/vision_camera/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type ModelId =
| 'classification'
| 'objectDetectionSsdlite'
| 'objectDetectionRfdetr'
| 'objectDetectionYolo26n'
| 'segmentationDeeplabResnet50'
| 'segmentationDeeplabResnet101'
| 'segmentationDeeplabMobilenet'
Expand Down Expand Up @@ -95,6 +96,7 @@ const TASKS: Task[] = [
variants: [
{ id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' },
{ id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
{ id: 'objectDetectionYolo26n', label: 'YOLO26N' },
],
},
{
Expand Down Expand Up @@ -241,7 +243,10 @@ export default function VisionCameraScreen() {
<ObjectDetectionTask
{...taskProps}
activeModel={
activeModel as 'objectDetectionSsdlite' | 'objectDetectionRfdetr'
activeModel as
| 'objectDetectionSsdlite'
| 'objectDetectionRfdetr'
| 'objectDetectionYolo26n'
}
/>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@ import {
Detection,
RF_DETR_NANO,
SSDLITE_320_MOBILENET_V3_LARGE,
YOLO26N,
useObjectDetection,
CocoLabel,
CocoLabelYolo,
} from 'react-native-executorch';
import { labelColor, labelColorBg } from '../utils/colors';
import { TaskProps } from './types';

type ObjModelId = 'objectDetectionSsdlite' | 'objectDetectionRfdetr';
type ObjModelId =
| 'objectDetectionSsdlite'
| 'objectDetectionRfdetr'
| 'objectDetectionYolo26n';

type Props = TaskProps & { activeModel: ObjModelId };

Expand All @@ -34,10 +40,21 @@ export default function ObjectDetectionTask({
model: RF_DETR_NANO,
preventLoad: activeModel !== 'objectDetectionRfdetr',
});
const yolo26n = useObjectDetection({
model: YOLO26N,
preventLoad: activeModel !== 'objectDetectionYolo26n',
});

const active =
activeModel === 'objectDetectionSsdlite'
? ssdlite
: activeModel === 'objectDetectionRfdetr'
? rfdetr
: yolo26n;

const active = activeModel === 'objectDetectionSsdlite' ? ssdlite : rfdetr;
type CommonDetection = Omit<Detection, 'label'> & { label: string };

const [detections, setDetections] = useState<Detection[]>([]);
const [detections, setDetections] = useState<CommonDetection[]>([]);
const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
const lastFrameTimeRef = useRef(Date.now());

Expand All @@ -56,8 +73,19 @@ export default function ObjectDetectionTask({
const detRof = active.runOnFrame;

const updateDetections = useCallback(
(p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
setDetections(p.results);
(p: {
results:
| Detection<typeof CocoLabel>[]
| Detection<typeof CocoLabelYolo>[];
imageWidth: number;
imageHeight: number;
}) => {
setDetections(
p.results.map((det) => ({
...det,
label: String(det.label),
}))
);
setImageSize({ width: p.imageWidth, height: p.imageHeight });
const now = Date.now();
const diff = now - lastFrameTimeRef.current;
Expand All @@ -82,7 +110,9 @@ export default function ObjectDetectionTask({
try {
if (!detRof) return;
const isFrontCamera = cameraPositionSync.getDirty() === 'front';
const result = detRof(frame, isFrontCamera, 0.5);
const result = detRof(frame, isFrontCamera, {
detectionThreshold: 0.5,
});
// Sensor frames are landscape-native, so width/height are swapped
// relative to portrait screen orientation.
const screenW = frame.height;
Expand Down
10 changes: 10 additions & 0 deletions docs/docs/02-benchmarks/inference-time.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,21 @@ processing. Resizing is typically fast for small images but may be noticeably
slower for very large images, which can increase total time.
:::

:::warning
Times presented in the tables are measured for forward method with input size equal to 512. Other input sizes may yield slower or faster inference times.
:::

| Model / Device | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
| :-------------------------------------------- | :----------------: | :------------------: |
| SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) | 20 | 18 |
| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) | 18 | - |
| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) | 8 | - |
| RF_DETR_NANO (XNNPACK FP32) | TBD | TBD |
| YOLO26N (XNNPACK FP32) | TBD | TBD |
| YOLO26S (XNNPACK FP32) | TBD | TBD |
| YOLO26M (XNNPACK FP32) | TBD | TBD |
| YOLO26L (XNNPACK FP32) | TBD | TBD |
| YOLO26X (XNNPACK FP32) | TBD | TBD |

## Style Transfer

Expand Down
10 changes: 10 additions & 0 deletions docs/docs/02-benchmarks/memory-usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@ loaded and actively running inference, relative to the baseline app memory
before model initialization.
:::

:::warning
Data presented for YOLO models is based on inference with forward_640 method.
:::

| Model / Device | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
| --------------------------------------------- | :----------------: | :------------------: |
| SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) | 94 | 104 |
| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) | 83 | - |
| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) | 62 | - |
| RF_DETR_NANO (XNNPACK FP32) | TBD | TBD |
| YOLO26N (XNNPACK FP32) | TBD | TBD |
| YOLO26S (XNNPACK FP32) | TBD | TBD |
| YOLO26M (XNNPACK FP32) | TBD | TBD |
| YOLO26L (XNNPACK FP32) | TBD | TBD |
| YOLO26X (XNNPACK FP32) | TBD | TBD |

## Style Transfer

Expand Down
6 changes: 6 additions & 0 deletions docs/docs/02-benchmarks/model-size.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ title: Model Size
| Model | XNNPACK FP32 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
| ------------------------------ | :---------------: | :---------------: | :---------------: |
| SSDLITE_320_MOBILENET_V3_LARGE | 13.9 | 15.6 | 8.46 |
| RF_DETR_NANO | 112 | - | - |
| YOLO26N | 10.3 | - | - |
| YOLO26S | 38.6 | - | - |
| YOLO26M | 82.3 | - | - |
| YOLO26L | 100 | - | - |
| YOLO26X | 224 | - | - |

## Instance Segmentation

Expand Down
38 changes: 25 additions & 13 deletions docs/docs/03-hooks/02-computer-vision/useObjectDetection.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,18 @@ You need more details? Check the following resources:
- `error` - An error object if the model failed to load or encountered a runtime error.
- `downloadProgress` - A value between 0 and 1 representing the download progress of the model binary.
- `forward` - A function to run inference on an image.
- `getAvailableInputSizes` - A function that returns available input sizes for multi-method models (YOLO). Returns `undefined` for single-method models.

## Running the model

To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDetectionType.md#forward) method. It accepts two arguments:

- `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
- `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score for a detection to be included in the results. Defaults to `0.7`.
- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with the following properties:
- `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score. Defaults to model-specific value (typically `0.7`).
- `iouThreshold` (optional) - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value (typically `0.55`).
- `inputSize` (optional) - For multi-method models like YOLO, specify the input resolution (`384`, `512`, or `640`). Defaults to `384` for YOLO models.
- `classesOfInterest` (optional) - Array of class labels to filter detections. Only detections matching these classes will be returned.

`forward` returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing:

Expand All @@ -78,11 +83,11 @@ To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDe
## Example

```typescript
import { useObjectDetection, RF_DETR_NANO } from 'react-native-executorch';
import { useObjectDetection, YOLO26N } from 'react-native-executorch';

function App() {
const model = useObjectDetection({
model: RF_DETR_NANO,
model: YOLO26N,
});

const handleDetect = async () => {
Expand All @@ -91,13 +96,12 @@ function App() {
const imageUri = 'file:///Users/.../photo.jpg';

try {
const detections = await model.forward(imageUri, 0.5);
const detections = await model.forward(imageUri, {
detectionThreshold: 0.5,
inputSize: 640,
});

for (const detection of detections) {
console.log('Label:', detection.label);
console.log('Score:', detection.score);
console.log('Bounding box:', detection.bbox);
}
console.log('Detected:', detections.length, 'objects');
} catch (error) {
console.error(error);
}
Expand All @@ -113,7 +117,15 @@ See the full guide: [VisionCamera Integration](./visioncamera-integration.md).

## Supported models

| Model | Number of classes | Class list |
| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | -------------------------------------------------------- |
| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano) | 80 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
| Model | Number of classes | Class list | Multi-size Support |\n| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | ------------------------------------------------------------ | ------------------ |
| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano) | 80 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
| [YOLO26N](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
| [YOLO26S](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
| [YOLO26M](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
| [YOLO26L](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
| [YOLO26X](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |

:::tip
YOLO models support multiple input sizes (384px, 512px, 640px). Smaller sizes are faster but less accurate, while larger sizes are more accurate but slower. Choose based on your speed/accuracy requirements.
:::
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,26 @@ For more information on loading resources, take a look at [loading models](../..
To run the model, use the [`forward`](../../06-api-reference/classes/ObjectDetectionModule.md#forward) method. It accepts two arguments:

- `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
- `detectionThreshold` (optional) - A number between 0 and 1. Defaults to `0.7`.
- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with:
- `detectionThreshold` (optional) - Minimum confidence score (0-1). Defaults to model-specific value.
- `iouThreshold` (optional) - IoU threshold for NMS (0-1). Defaults to model-specific value.
- `inputSize` (optional) - For YOLO models: `384`, `512`, or `640`. Defaults to `384`.
- `classesOfInterest` (optional) - Array of class labels to filter detections.

The method returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing the bounding box, label, and confidence score.

For real-time frame processing, use [`runOnFrame`](../../03-hooks/02-computer-vision/visioncamera-integration.md) instead.

### Example with Options

```typescript
const detections = await model.forward(imageUri, {
detectionThreshold: 0.5,
inputSize: 640, // YOLO models only
classesOfInterest: ['PERSON', 'CAR'],
});
```

## Using a custom model

Use [`fromCustomModel`](../../06-api-reference/classes/ObjectDetectionModule.md#fromcustommodel) to load your own exported model binary instead of a built-in preset.
Expand Down
Loading
Loading