Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "CVProcessing.h"
#include <algorithm>
#include <cmath>
#include <rnexecutorch/Error.h>
#include <rnexecutorch/ErrorCodes.h>
#include <rnexecutorch/Log.h>

namespace rnexecutorch::cv_processing {

float computeIoU(const BBox &a, const BBox &b) {
float x1 = std::max(a.x1, b.x1);
float y1 = std::max(a.y1, b.y1);
float x2 = std::min(a.x2, b.x2);
float y2 = std::min(a.y2, b.y2);

float intersectionArea = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
float areaA = a.area();
float areaB = b.area();
float unionArea = areaA + areaB - intersectionArea;

return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f;
}

std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
const char *paramName) {
if (values.size() == 3) {
return cv::Scalar(values[0], values[1], values[2]);
} else if (!values.empty()) {
log(LOG_LEVEL::Warn,
std::string(paramName) +
" must have 3 elements — ignoring provided value.");
}
return std::nullopt;
}

std::set<int32_t>
prepareAllowedClasses(const std::vector<int32_t> &classIndices) {
std::set<int32_t> allowedClasses;
if (!classIndices.empty()) {
allowedClasses.insert(classIndices.begin(), classIndices.end());
}
return allowedClasses;
}

void validateThresholds(double confidenceThreshold, double iouThreshold) {
if (confidenceThreshold < 0.0 || confidenceThreshold > 1.0) {
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
"Confidence threshold must be in range [0, 1].");
}

if (iouThreshold < 0.0 || iouThreshold > 1.0) {
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
"IoU threshold must be in range [0, 1].");
}
}

} // namespace rnexecutorch::cv_processing
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#pragma once

#include "CVTypes.h"
#include <algorithm>
#include <opencv2/opencv.hpp>
#include <optional>
#include <set>
#include <vector>

namespace rnexecutorch::cv_processing {

/**
* @brief Compute Intersection over Union (IoU) between two bounding boxes
* @param a First bounding box
* @param b Second bounding box
* @return IoU value between 0.0 and 1.0
*
* Moved from utils/computer_vision/Processing.h for consolidation.
*/
float computeIoU(const BBox &a, const BBox &b);

/**
* @brief Non-Maximum Suppression for detection/segmentation results
* @tparam T Type that has bbox and score fields (satisfies HasBBoxAndScore)
* @param items Vector of items to filter
* @param iouThreshold IoU threshold for suppression (typically 0.5)
* @return Filtered vector with overlapping detections removed
*
* Moved from utils/computer_vision/Processing.h for consolidation.
* Handles both class-aware and class-agnostic NMS automatically.
*/
template <HasBBoxAndScore T>
std::vector<T> nonMaxSuppression(std::vector<T> items, double iouThreshold) {
if (items.empty()) {
return {};
}

// Sort by score in descending order
std::ranges::sort(items,
[](const T &a, const T &b) { return a.score > b.score; });

std::vector<T> result;
std::vector<bool> suppressed(items.size(), false);

for (size_t i = 0; i < items.size(); ++i) {
if (suppressed[i]) {
continue;
}

result.push_back(items[i]);

// Suppress overlapping boxes
for (size_t j = i + 1; j < items.size(); ++j) {
if (suppressed[j]) {
continue;
}

// If type has classIndex, only suppress boxes of same class
if constexpr (requires(T t) { t.classIndex; }) {
if (items[i].classIndex != items[j].classIndex) {
continue;
}
}

float iou = computeIoU(items[i].bbox, items[j].bbox);
if (iou > iouThreshold) {
suppressed[j] = true;
}
}
}

return result;
}

/**
* @brief Validate and convert normalization parameter vector to cv::Scalar
* @param values Vector of normalization values (should have 3 elements for RGB)
* @param paramName Parameter name for logging (e.g., "normMean", "normStd")
* @return Optional cv::Scalar if valid (3 elements), nullopt otherwise
*
* Replaces duplicate validation logic across ObjectDetection,
* BaseInstanceSegmentation, and BaseSemanticSegmentation.
*/
std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
const char *paramName);

/**
* @brief Convert class indices vector to a set for efficient filtering
* @param classIndices Vector of class indices to allow
* @return Set of allowed class indices (empty set = allow all classes)
*
* Used by detection and segmentation models to filter results by class.
*/
std::set<int32_t>
prepareAllowedClasses(const std::vector<int32_t> &classIndices);

/**
* @brief Validate confidence and IoU thresholds are in valid range [0, 1]
* @param confidenceThreshold Detection confidence threshold
* @param iouThreshold Non-maximum suppression IoU threshold
* @throws RnExecutorchError if either threshold is out of range
*
* Used by detection and segmentation models to validate user input.
*/
void validateThresholds(double confidenceThreshold, double iouThreshold);

} // namespace rnexecutorch::cv_processing
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#pragma once

#include <concepts>
#include <opencv2/opencv.hpp>

namespace rnexecutorch::cv_processing {

/**
* @brief Bounding box representation with x1, y1, x2, y2 coordinates
*
* Moved from utils/computer_vision/Types.h for consolidation.
*/
struct BBox {
float x1, y1, x2, y2;

float width() const { return x2 - x1; }

float height() const { return y2 - y1; }

float area() const { return width() * height(); }

bool isValid() const {
return x2 > x1 && y2 > y1 && x1 >= 0.0f && y1 >= 0.0f;
}

BBox scale(float widthRatio, float heightRatio) const {
return {x1 * widthRatio, y1 * heightRatio, x2 * widthRatio,
y2 * heightRatio};
}
};

/**
* @brief Concept for types that have a bounding box and confidence score
*
* Used for NMS and other detection/segmentation operations.
*/
template <typename T>
concept HasBBoxAndScore = requires(T t) {
{ t.bbox } -> std::convertible_to<BBox>;
{ t.score } -> std::convertible_to<float>;
};

/**
* @brief Scale ratios for mapping between original and model input dimensions
*
* Replaces duplicate scale ratio calculation code across multiple models.
*/
struct ScaleRatios {
float widthRatio;
float heightRatio;

/**
* @brief Compute scale ratios from original size to model input size
* @param original Original image dimensions
* @param model Model input dimensions
* @return ScaleRatios struct containing width and height ratios
*/
static ScaleRatios compute(cv::Size original, cv::Size model) {
return {static_cast<float>(original.width) / model.width,
static_cast<float>(original.height) / model.height};
}
};

} // namespace rnexecutorch::cv_processing
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <rnexecutorch/host_objects/JSTensorViewOut.h>
#include <rnexecutorch/jsi/OwningArrayBuffer.h>

#include <rnexecutorch/data_processing/CVTypes.h>
#include <rnexecutorch/metaprogramming/TypeConcepts.h>
#include <rnexecutorch/models/instance_segmentation/Types.h>
#include <rnexecutorch/models/object_detection/Constants.h>
Expand All @@ -25,7 +26,6 @@
#include <rnexecutorch/models/speech_to_text/common/types/TranscriptionResult.h>
#include <rnexecutorch/models/style_transfer/Types.h>
#include <rnexecutorch/models/voice_activity_detection/Types.h>
#include <rnexecutorch/utils/computer_vision/Types.h>

using namespace rnexecutorch::models::speech_to_text;

Expand Down Expand Up @@ -433,7 +433,7 @@ getJsiValue(const std::unordered_map<std::string_view, float> &map,
return mapObj;
}

inline jsi::Value getJsiValue(const utils::computer_vision::BBox &bbox,
inline jsi::Value getJsiValue(const cv_processing::BBox &bbox,
jsi::Runtime &runtime) {
jsi::Object obj(runtime);
obj.setProperty(runtime, "x1", bbox.x1);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "VisionModel.h"
#include <rnexecutorch/Error.h>
#include <rnexecutorch/ErrorCodes.h>
#include <rnexecutorch/data_processing/CVProcessing.h>
#include <rnexecutorch/utils/FrameProcessor.h>
#include <rnexecutorch/utils/FrameTransform.h>

Expand All @@ -18,6 +19,18 @@ void VisionModel::unload() noexcept {
}

cv::Size VisionModel::modelInputSize() const {
// For multi-method models, query the currently loaded method's input shape
if (!currentlyLoadedMethod_.empty()) {
auto inputShapes = getAllInputShapes(currentlyLoadedMethod_);
if (!inputShapes.empty() && !inputShapes[0].empty() &&
inputShapes[0].size() >= 2) {
const auto &shape = inputShapes[0];
return {static_cast<int>(shape[shape.size() - 2]),
static_cast<int>(shape[shape.size() - 1])};
}
}

// Default: use cached modelInputShape_ from single-method models
if (modelInputShape_.size() < 2) {
return {0, 0};
}
Expand Down Expand Up @@ -51,4 +64,42 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
return ::rnexecutorch::utils::pixelsToMat(tensorView);
}

void VisionModel::ensureMethodLoaded(const std::string &methodName) {
if (methodName.empty()) {
throw RnExecutorchError(
RnExecutorchErrorCode::InvalidConfig,
"Method name cannot be empty. Use 'forward' for single-method models "
"or 'forward_{inputSize}' for multi-method models.");
}

if (currentlyLoadedMethod_ == methodName) {
return;
}

if (!module_) {
throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
"Model not loaded. Cannot load method '" +
methodName + "'.");
}

if (!currentlyLoadedMethod_.empty()) {
module_->unload_method(currentlyLoadedMethod_);
}

auto loadResult = module_->load_method(methodName);
if (loadResult != executorch::runtime::Error::Ok) {
throw RnExecutorchError(
loadResult, "Failed to load method '" + methodName +
"'. Ensure the method exists in the exported model.");
}

currentlyLoadedMethod_ = methodName;
}

void VisionModel::initializeNormalization(const std::vector<float> &normMean,
const std::vector<float> &normStd) {
normMean_ = cv_processing::validateNormParam(normMean, "normMean");
normStd_ = cv_processing::validateNormParam(normStd, "normStd");
}

} // namespace rnexecutorch::models
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <jsi/jsi.h>
#include <mutex>
#include <opencv2/opencv.hpp>
#include <optional>
#include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
#include <rnexecutorch/models/BaseModel.h>
#include <rnexecutorch/utils/FrameTransform.h>
Expand Down Expand Up @@ -78,6 +79,42 @@ class VisionModel : public BaseModel {
*/
mutable std::mutex inference_mutex_;

/// Name of the currently loaded method (for multi-method models).
/// Empty for single-method models using default "forward".
std::string currentlyLoadedMethod_;

/// Optional per-channel mean for input normalisation.
std::optional<cv::Scalar> normMean_;

/// Optional per-channel standard deviation for input normalisation.
std::optional<cv::Scalar> normStd_;

/**
* @brief Ensures the specified method is loaded, unloading any previous
* method if necessary.
*
* For single-method models, pass "forward" (the default).
* For multi-method models, pass the specific method name (e.g.,
* "forward_384").
*
* @param methodName Name of the method to load. Defaults to "forward".
* @throws RnExecutorchError if the method cannot be loaded.
*/
void ensureMethodLoaded(const std::string &methodName = "forward");

/**
* @brief Initializes normalization parameters from vectors.
*
* Uses cv_processing::validateNormParam() for validation.
*
* @param normMean Per-channel mean values (must be exactly 3 elements, or
* empty to skip).
* @param normStd Per-channel std dev values (must be exactly 3 elements, or
* empty to skip).
*/
void initializeNormalization(const std::vector<float> &normMean,
const std::vector<float> &normStd);

/**
* @brief Resize an RGB image to the model's expected input size
*
Expand Down
Loading
Loading