software-mansion · benITo47 · Mar 20, 2026
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.cpp
@@ -0,0 +1,57 @@
+#include "CVProcessing.h"
+#include <algorithm>
+#include <cmath>
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
+
+namespace rnexecutorch::cv_processing {
+
+float computeIoU(const BBox &a, const BBox &b) {
+  float x1 = std::max(a.x1, b.x1);
+  float y1 = std::max(a.y1, b.y1);
+  float x2 = std::min(a.x2, b.x2);
+  float y2 = std::min(a.y2, b.y2);
+
+  float intersectionArea = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
+  float areaA = a.area();
+  float areaB = b.area();
+  float unionArea = areaA + areaB - intersectionArea;
+
+  return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f;
+}
+
+std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
+                                            const char *paramName) {
+  if (values.size() == 3) {
+    return cv::Scalar(values[0], values[1], values[2]);
+  } else if (!values.empty()) {
+    log(LOG_LEVEL::Warn,
+        std::string(paramName) +
+            " must have 3 elements — ignoring provided value.");
+  }
+  return std::nullopt;
+}
+
+std::set<int32_t>
+prepareAllowedClasses(const std::vector<int32_t> &classIndices) {
+  std::set<int32_t> allowedClasses;
+  if (!classIndices.empty()) {
+    allowedClasses.insert(classIndices.begin(), classIndices.end());
+  }
+  return allowedClasses;
+}
+
+void validateThresholds(double confidenceThreshold, double iouThreshold) {
+  if (confidenceThreshold < 0.0 || confidenceThreshold > 1.0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "Confidence threshold must be in range [0, 1].");
+  }
+
+  if (iouThreshold < 0.0 || iouThreshold > 1.0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "IoU threshold must be in range [0, 1].");
+  }
+}
+
+} // namespace rnexecutorch::cv_processing
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.h
@@ -0,0 +1,107 @@
+#pragma once
+
+#include "CVTypes.h"
+#include <algorithm>
+#include <opencv2/opencv.hpp>
+#include <optional>
+#include <set>
+#include <vector>
+
+namespace rnexecutorch::cv_processing {
+
+/**
+ * @brief Compute Intersection over Union (IoU) between two bounding boxes
+ * @param a First bounding box
+ * @param b Second bounding box
+ * @return IoU value between 0.0 and 1.0
+ *
+ * Moved from utils/computer_vision/Processing.h for consolidation.
+ */
+float computeIoU(const BBox &a, const BBox &b);
+
+/**
+ * @brief Non-Maximum Suppression for detection/segmentation results
+ * @tparam T Type that has bbox and score fields (satisfies HasBBoxAndScore)
+ * @param items Vector of items to filter
+ * @param iouThreshold IoU threshold for suppression (typically 0.5)
+ * @return Filtered vector with overlapping detections removed
+ *
+ * Moved from utils/computer_vision/Processing.h for consolidation.
+ * Handles both class-aware and class-agnostic NMS automatically.
+ */
+template <HasBBoxAndScore T>
+std::vector<T> nonMaxSuppression(std::vector<T> items, double iouThreshold) {
+  if (items.empty()) {
+    return {};
+  }
+
+  // Sort by score in descending order
+  std::ranges::sort(items,
+                    [](const T &a, const T &b) { return a.score > b.score; });
+
+  std::vector<T> result;
+  std::vector<bool> suppressed(items.size(), false);
+
+  for (size_t i = 0; i < items.size(); ++i) {
+    if (suppressed[i]) {
+      continue;
+    }
+
+    result.push_back(items[i]);
+
+    // Suppress overlapping boxes
+    for (size_t j = i + 1; j < items.size(); ++j) {
+      if (suppressed[j]) {
+        continue;
+      }
+
+      // If type has classIndex, only suppress boxes of same class
+      if constexpr (requires(T t) { t.classIndex; }) {
+        if (items[i].classIndex != items[j].classIndex) {
+          continue;
+        }
+      }
+
+      float iou = computeIoU(items[i].bbox, items[j].bbox);
+      if (iou > iouThreshold) {
+        suppressed[j] = true;
+      }
+    }
+  }
+
+  return result;
+}
+
+/**
+ * @brief Validate and convert normalization parameter vector to cv::Scalar
+ * @param values Vector of normalization values (should have 3 elements for RGB)
+ * @param paramName Parameter name for logging (e.g., "normMean", "normStd")
+ * @return Optional cv::Scalar if valid (3 elements), nullopt otherwise
+ *
+ * Replaces duplicate validation logic across ObjectDetection,
+ * BaseInstanceSegmentation, and BaseSemanticSegmentation.
+ */
+std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
+                                            const char *paramName);
+
+/**
+ * @brief Convert class indices vector to a set for efficient filtering
+ * @param classIndices Vector of class indices to allow
+ * @return Set of allowed class indices (empty set = allow all classes)
+ *
+ * Used by detection and segmentation models to filter results by class.
+ */
+std::set<int32_t>
+prepareAllowedClasses(const std::vector<int32_t> &classIndices);
+
+/**
+ * @brief Validate confidence and IoU thresholds are in valid range [0, 1]
+ * @param confidenceThreshold Detection confidence threshold
+ * @param iouThreshold Non-maximum suppression IoU threshold
+ * @throws RnExecutorchError if either threshold is out of range
+ *
+ * Used by detection and segmentation models to validate user input.
+ */
+void validateThresholds(double confidenceThreshold, double iouThreshold);
+
+} // namespace rnexecutorch::cv_processing
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVTypes.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVTypes.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <concepts>
+#include <opencv2/opencv.hpp>
+
+namespace rnexecutorch::cv_processing {
+
+/**
+ * @brief Bounding box representation with x1, y1, x2, y2 coordinates
+ *
+ * Moved from utils/computer_vision/Types.h for consolidation.
+ */
+struct BBox {
+  float x1, y1, x2, y2;
+
+  float width() const { return x2 - x1; }
+
+  float height() const { return y2 - y1; }
+
+  float area() const { return width() * height(); }
+
+  bool isValid() const {
+    return x2 > x1 && y2 > y1 && x1 >= 0.0f && y1 >= 0.0f;
+  }
+
+  BBox scale(float widthRatio, float heightRatio) const {
+    return {x1 * widthRatio, y1 * heightRatio, x2 * widthRatio,
+            y2 * heightRatio};
+  }
+};
+
+/**
+ * @brief Concept for types that have a bounding box and confidence score
+ *
+ * Used for NMS and other detection/segmentation operations.
+ */
+template <typename T>
+concept HasBBoxAndScore = requires(T t) {
+  { t.bbox } -> std::convertible_to<BBox>;
+  { t.score } -> std::convertible_to<float>;
+};
+
+/**
+ * @brief Scale ratios for mapping between original and model input dimensions
+ *
+ * Replaces duplicate scale ratio calculation code across multiple models.
+ */
+struct ScaleRatios {
+  float widthRatio;
+  float heightRatio;
+
+  /**
+   * @brief Compute scale ratios from original size to model input size
+   * @param original Original image dimensions
+   * @param model Model input dimensions
+   * @return ScaleRatios struct containing width and height ratios
+   */
+  static ScaleRatios compute(cv::Size original, cv::Size model) {
+    return {static_cast<float>(original.width) / model.width,
+            static_cast<float>(original.height) / model.height};
+  }
+};
+
+} // namespace rnexecutorch::cv_processing
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -15,6 +15,7 @@
 #include <rnexecutorch/host_objects/JSTensorViewOut.h>
 #include <rnexecutorch/jsi/OwningArrayBuffer.h>
 
+#include <rnexecutorch/data_processing/CVTypes.h>
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
 #include <rnexecutorch/models/instance_segmentation/Types.h>
 #include <rnexecutorch/models/object_detection/Constants.h>
@@ -25,7 +26,6 @@
 #include <rnexecutorch/models/speech_to_text/common/types/TranscriptionResult.h>
 #include <rnexecutorch/models/style_transfer/Types.h>
 #include <rnexecutorch/models/voice_activity_detection/Types.h>
-#include <rnexecutorch/utils/computer_vision/Types.h>
 
 using namespace rnexecutorch::models::speech_to_text;
 
@@ -433,7 +433,7 @@ getJsiValue(const std::unordered_map<std::string_view, float> &map,
   return mapObj;
 }
 
-inline jsi::Value getJsiValue(const utils::computer_vision::BBox &bbox,
+inline jsi::Value getJsiValue(const cv_processing::BBox &bbox,
                               jsi::Runtime &runtime) {
   jsi::Object obj(runtime);
   obj.setProperty(runtime, "x1", bbox.x1);

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -1,6 +1,7 @@
 #include "VisionModel.h"
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/data_processing/CVProcessing.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 #include <rnexecutorch/utils/FrameTransform.h>
 
@@ -18,6 +19,18 @@ void VisionModel::unload() noexcept {
 }
 
 cv::Size VisionModel::modelInputSize() const {
+  // For multi-method models, query the currently loaded method's input shape
+  if (!currentlyLoadedMethod_.empty()) {
+    auto inputShapes = getAllInputShapes(currentlyLoadedMethod_);
+    if (!inputShapes.empty() && !inputShapes[0].empty() &&
+        inputShapes[0].size() >= 2) {
+      const auto &shape = inputShapes[0];
+      return {static_cast<int>(shape[shape.size() - 2]),
+              static_cast<int>(shape[shape.size() - 1])};
+    }
+  }
+
+  // Default: use cached modelInputShape_ from single-method models
   if (modelInputShape_.size() < 2) {
     return {0, 0};
   }
@@ -51,4 +64,42 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
   return ::rnexecutorch::utils::pixelsToMat(tensorView);
 }
 
+void VisionModel::ensureMethodLoaded(const std::string &methodName) {
+  if (methodName.empty()) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidConfig,
+        "Method name cannot be empty. Use 'forward' for single-method models "
+        "or 'forward_{inputSize}' for multi-method models.");
+  }
+
+  if (currentlyLoadedMethod_ == methodName) {
+    return;
+  }
+
+  if (!module_) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Model not loaded. Cannot load method '" +
+                                methodName + "'.");
+  }
+
+  if (!currentlyLoadedMethod_.empty()) {
+    module_->unload_method(currentlyLoadedMethod_);
+  }
+
+  auto loadResult = module_->load_method(methodName);
+  if (loadResult != executorch::runtime::Error::Ok) {
+    throw RnExecutorchError(
+        loadResult, "Failed to load method '" + methodName +
+                        "'. Ensure the method exists in the exported model.");
+  }
+
+  currentlyLoadedMethod_ = methodName;
+}
+
+void VisionModel::initializeNormalization(const std::vector<float> &normMean,
+                                          const std::vector<float> &normStd) {
+  normMean_ = cv_processing::validateNormParam(normMean, "normMean");
+  normStd_ = cv_processing::validateNormParam(normStd, "normStd");
+}
+
 } // namespace rnexecutorch::models
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -3,6 +3,7 @@
 #include <jsi/jsi.h>
 #include <mutex>
 #include <opencv2/opencv.hpp>
+#include <optional>
 #include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
 #include <rnexecutorch/models/BaseModel.h>
 #include <rnexecutorch/utils/FrameTransform.h>
@@ -78,6 +79,42 @@ class VisionModel : public BaseModel {
    */
   mutable std::mutex inference_mutex_;
 
+  /// Name of the currently loaded method (for multi-method models).
+  /// Empty for single-method models using default "forward".
+  std::string currentlyLoadedMethod_;
+
+  /// Optional per-channel mean for input normalisation.
+  std::optional<cv::Scalar> normMean_;
+
+  /// Optional per-channel standard deviation for input normalisation.
+  std::optional<cv::Scalar> normStd_;
+
+  /**
+   * @brief Ensures the specified method is loaded, unloading any previous
+   * method if necessary.
+   *
+   * For single-method models, pass "forward" (the default).
+   * For multi-method models, pass the specific method name (e.g.,
+   * "forward_384").
+   *
+   * @param methodName Name of the method to load. Defaults to "forward".
+   * @throws RnExecutorchError if the method cannot be loaded.
+   */
+  void ensureMethodLoaded(const std::string &methodName = "forward");
+
+  /**
+   * @brief Initializes normalization parameters from vectors.
+   *
+   * Uses cv_processing::validateNormParam() for validation.
+   *
+   * @param normMean Per-channel mean values (must be exactly 3 elements, or
+   * empty to skip).
+   * @param normStd Per-channel std dev values (must be exactly 3 elements, or
+   * empty to skip).
+   */
+  void initializeNormalization(const std::vector<float> &normMean,
+                               const std::vector<float> &normStd);
+
   /**
    * @brief Resize an RGB image to the model's expected input size
    *