Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.openvino
Original file line number Diff line number Diff line change
Expand Up @@ -296,4 +296,4 @@ ENV GLOG_logtostderr=1
ENV LD_LIBRARY_PATH=/usr/local/lib:/opt/intel/openvino/runtime/lib/intel64/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
WORKDIR /mediapipe

## End of demos image #########################################################
## End of demos image #########################################################
3 changes: 3 additions & 0 deletions build_desktop_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ for app in ${apps}; do
target="${app}:extract_yt8m_features"
echo "Skipping target ${target}"
continue
elif [[ "${target_name}" == "bytetrack" ]]; then
echo "Skipping target ${target_name} ."
continue
else
target="${app}:${target_name}_cpu"
fi
Expand Down
26 changes: 26 additions & 0 deletions mediapipe/calculators/openvino/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,32 @@ cc_library(
alwayslink = 1,
)

cc_library(
name = "openvino_yolox_tensors_to_detections_calculator",
srcs = ["openvino_yolox_tensors_to_detections_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":openvino_yolox_tensors_to_detections_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//third_party:openvino",
],
alwayslink = 1,
)

mediapipe_proto_library(
name = "openvino_yolox_tensors_to_detections_calculator_proto",
srcs = ["openvino_yolox_tensors_to_detections_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)

# To run this with native GPU on Linux, use:
# bazel test //mediapipe/calculators/tflite:tflite_inference_calculator_test --copt=-DTFLITE_GPU_EXTRA_GLES_DEPS --copt=-DMESA_EGL_NO_X11_HEADERS --copt=-DEGL_NO_X11 --config=grte_v5 --test_strategy=local
cc_test(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include "mediapipe/calculators/openvino/openvino_yolox_tensors_to_detections_calculator.pb.h"
#include <vector>
#include <algorithm>
#include <numeric>
#include <openvino/openvino.hpp>
#include <cmath>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"

namespace mediapipe {

// Converts YOLOX output OV tensors to MediaPipe Detections.
//
// YOLOX output tensor shape: [1, 3549, 85]
// Layout: [batch, num_boxes, num_attrs]
// decode_in_inference=True: sigmoid already applied, coords already decoded
// Attributes: [cx, cy, w, h, obj_score, class_0, ..., class_79]
// Coordinates are in PIXEL space (input image 416x416), NOT normalized
//
// Input:
// TENSORS: Vector of ov::Tensor
// Output:
// DETECTIONS: Vector of Detection protos

class OpenVINOYoloXTensorsToDetectionsCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(!cc->Inputs().GetTags().empty());
RET_CHECK(!cc->Outputs().GetTags().empty());
if (cc->Inputs().HasTag("TENSORS"))
cc->Inputs().Tag("TENSORS").Set<std::vector<ov::Tensor>>();
if (cc->Outputs().HasTag("DETECTIONS"))
cc->Outputs().Tag("DETECTIONS").Set<std::vector<Detection>>();
return absl::OkStatus();
}

absl::Status Open(CalculatorContext* cc) override {
const auto& options =
cc->Options<mediapipe::OpenVINOYoloXTensorsToDetectionsCalculatorOptions>();
min_thresh_ = options.has_conf_thresh() ? options.conf_thresh() : 0.1f;
obj_thresh_ = options.has_obj_thresh() ? options.obj_thresh() : 0.1f;
input_size_ = options.has_input_size() ? options.input_size() : 416.0f;
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}

absl::Status Process(CalculatorContext* cc) override {
if (cc->Inputs().Tag("TENSORS").IsEmpty())
return absl::OkStatus();

const auto& tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<ov::Tensor>>();
RET_CHECK(!tensors.empty());
const ov::Tensor& raw = tensors[0];
RET_CHECK(raw.get_element_type() == ov::element::f32);

const auto& shape = raw.get_shape();
RET_CHECK_EQ(shape.size(), 3u);
RET_CHECK_EQ(shape[0], 1u);
// Actual layout from TFLite: [1, 85, 3549] — attr-first
RET_CHECK_EQ(shape[1], static_cast<size_t>(num_attrs_)); // 85
RET_CHECK_EQ(shape[2], static_cast<size_t>(num_boxes_)); // 3549

const float* data = raw.data<float>();
RET_CHECK(data != nullptr);

// Accessor for [attr, box] layout
auto at = [&](int attr, int box) -> float {
return data[attr * num_boxes_ + box];
};

// Grid strides for 416x416:
// stride 8 → 52x52 = 2704 boxes
// stride 16 → 26x26 = 676 boxes
// stride 32 → 13x13 = 169 boxes
// total = 3549
struct GridInfo { int stride; int cols; int rows; };
const std::vector<GridInfo> grids = {
{8, 52, 52},
{16, 26, 26},
{32, 13, 13},
};

auto output_detections = absl::make_unique<std::vector<Detection>>();

int box_idx = 0;
for (const auto& g : grids) {
for (int gy = 0; gy < g.rows; ++gy) {
for (int gx = 0; gx < g.cols; ++gx, ++box_idx) {

// Sigmoid already baked in by TFLite Logistic ops
float obj = at(4, box_idx);
if (obj < obj_thresh_) continue;

int best_cls = 0;
float best_cls_score = 0.0f;
for (int c = 0; c < num_classes_; ++c) {
float s = at(5 + c, box_idx);
if (s > best_cls_score) { best_cls_score = s; best_cls = c; }
}

float score = obj * best_cls_score;
if (score < min_thresh_) continue;
LOG(INFO)<<"CLASS: "<<best_cls<<", CLASS_SCORE: "<<best_cls_score<<", OBJECTNESS SCORE: "<<obj<< ", FINAL SCORE: "<<score;
// Coords are raw logits — grid decode needed
// cx, cy are offsets from grid cell origin
// w, h are log-scale relative to stride
float cx = (at(0, box_idx) + gx) * g.stride;
float cy = (at(1, box_idx) + gy) * g.stride;
float w = std::exp(at(2, box_idx)) * g.stride;
float h = std::exp(at(3, box_idx)) * g.stride;

// Normalize to [0, 1]
float x1 = std::max(0.0f, (cx - w * 0.5f) / input_size_);
float y1 = std::max(0.0f, (cy - h * 0.5f) / input_size_);
float x2 = std::min(1.0f, (cx + w * 0.5f) / input_size_);
float y2 = std::min(1.0f, (cy + h * 0.5f) / input_size_);

if (x2 <= x1 || y2 <= y1) continue;

Detection det;
auto* loc = det.mutable_location_data();
loc->set_format(LocationData::RELATIVE_BOUNDING_BOX);
auto* bbox = loc->mutable_relative_bounding_box();
bbox->set_xmin(x1);
bbox->set_ymin(y1);
bbox->set_width(x2 - x1);
bbox->set_height(y2 - y1);
det.add_score(score);
det.add_label_id(best_cls);
output_detections->emplace_back(det);
}
}
}

cc->Outputs().Tag("DETECTIONS")
.Add(output_detections.release(), cc->InputTimestamp());
return absl::OkStatus();
}
private:
const int num_boxes_ = 3549;
const int num_attrs_ = 85;
const int num_classes_ = 80;
float input_size_;
float obj_thresh_;
float min_thresh_;
};

REGISTER_CALCULATOR(OpenVINOYoloXTensorsToDetectionsCalculator);

} // namespace mediapipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
syntax = 'proto2';

package mediapipe;

import "mediapipe/framework/calculator.proto";

message OpenVINOYoloXTensorsToDetectionsCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional OpenVINOYoloXTensorsToDetectionsCalculatorOptions ext = 211376657;
}

optional float conf_thresh = 1 [default = 0.10];
optional float obj_thresh = 2 [default = 0.10];
optional float input_size = 3 [default = 416.0];

}
1 change: 1 addition & 0 deletions mediapipe/calculators/ovms/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ cc_library(
"//mediapipe/calculators/openvino:openvino_tensors_to_detections_calculator_cc_proto",
"//mediapipe/calculators/openvino:openvino_converter_calculator_cc_proto",
"//mediapipe/calculators/openvino:openvino_converter_calculator",
"//mediapipe/calculators/openvino:openvino_yolox_tensors_to_detections_calculator",
"//mediapipe/calculators/openvino:openvino_tensors_to_classification_calculator",
"//mediapipe/calculators/openvino:openvino_tensors_to_detections_calculator",
":modelapiovmsadapter",
Expand Down
6 changes: 6 additions & 0 deletions mediapipe/calculators/ovms/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
"name":"ssdlite_object_detection",
"base_path":"/mediapipe/mediapipe/models/ovms/ssdlite_object_detection"
}
},
{
"config":{
"name":"yoloxn_float32",
"base_path":"/mediapipe/mediapipe/models/ovms/yoloxn_float32"
}
}
]
}
25 changes: 25 additions & 0 deletions mediapipe/calculators/tflite/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,31 @@ mediapipe_proto_library(
],
)

cc_library(
name = "yolox_tensors_to_detections_calculator",
srcs = ["yolox_tensors_to_detections_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":yolox_tensors_to_detections_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@org_tensorflow//tensorflow/lite:framework",
],
alwayslink = 1,
)

mediapipe_proto_library(
name = "yolox_tensors_to_detections_calculator_proto",
srcs = ["yolox_tensors_to_detections_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)

mediapipe_proto_library(
name = "tflite_tensors_to_landmarks_calculator_proto",
srcs = ["tflite_tensors_to_landmarks_calculator.proto"],
Expand Down
Loading