added headpose estimation

mmphego · mmphego · commit c6be7c8b749c · 2020-07-14T23:41:43.000+02:00
Signed-off-by: Mpho Mphego &lt;mpho112@gmail.com&gt;
diff --git a/main.py b/main.py
@@ -142,7 +142,7 @@ def main(args):
 
     for frame in video_feed.next_frame():
 
-        predict_end_time, face_bboxes = face_detection.predict(frame, draw=True)
+        predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=True)
         text = f"Face Detection Inference time: {predict_end_time:.3f} s"
         face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
 
@@ -165,13 +165,22 @@ def main(args):
                 if face_height < 20 or face_width < 20:
                     continue
 
-                predict_end_time, eyes_coords = facial_landmarks.predict(face, draw=True)
+                predict_end_time, eyes_coords = facial_landmarks.predict(
+                    face, show_bbox=True
+                )
                 text = f"Facial Landmarks Est. Inference time: {predict_end_time:.3f} s"
                 facial_landmarks.add_text(
                     text, frame, (15, video_feed.source_height - 60)
                 )
 
-
+                predict_end_time, head_pose_angles = head_pose_estimation.predict(
+                    face, show_bbox=True
+                )
+                text = f"Head Pose Est. Inference time: {predict_end_time:.3f} s"
+                head_pose_estimation.add_text(
+                    text, frame, (15, video_feed.source_height - 40)
+                )
+                # print (f"head pose: {head_pose_angles}")
 
         if args.debug:
             video_feed.show(video_feed.resize(frame))
diff --git a/src/model.py b/src/model.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+import math
 import sys
 import time
 import subprocess
@@ -23,6 +24,10 @@
 ]
 
 
+class InvalidModel(Exception):
+    pass
+
+
 class Base(ABC):
     """Model Base Class"""
 
@@ -53,12 +58,19 @@ def __init__(
         self.input_name = next(iter(self.model.inputs))
         self.input_shape = self.model.inputs[self.input_name].shape
         self.output_name = next(iter(self.model.outputs))
+        self._output_shape = None
         self.output_shape = self.model.outputs[self.output_name].shape
         self._init_image_w = source_width
         self._init_image_h = source_height
         self.exec_network = None
         self.load_model()
 
+    # @property
+    # def output_shape(self):
+    #     if not self._output_shape:
+    #         self._output_shape =
+    #     return self._output_shape
+
     def _get_model(self):
         """Helper function for reading the network."""
         try:
@@ -91,7 +103,7 @@ def load_model(self):
                 f"Model: {self.model_structure} took {self._model_load_time:.3f} ms to load."
             )
 
-    def predict(self, image, request_id=0, draw=False):
+    def predict(self, image, request_id=0, show_bbox=False):
         if not isinstance(image, np.ndarray):
             raise IOError("Image not parsed correctly.")
 
@@ -100,15 +112,17 @@ def predict(self, image, request_id=0, draw=False):
             request_id=request_id, inputs={self.input_name: p_image}
         )
         status = self.exec_network.requests[request_id].wait(-1)
-        bbox = None
         if status == 0:
             predict_start_time = time.time()
-            pred_result = self.exec_network.requests[request_id].outputs[
-                self.output_name
-            ]
+            pred_result = []
+            for output_name, data_ptr in self.model.outputs.items():
+                pred_result.append(
+                    self.exec_network.requests[request_id].outputs[output_name]
+                )
             predict_end_time = float(time.time() - predict_start_time) * 1000
-            if draw:
-                bbox, _ = self.preprocess_output(pred_result, image, show_bbox=draw)
+            bbox, _ = self.preprocess_output(
+                pred_result, image, show_bbox=show_bbox
+            )
             return (predict_end_time, bbox)
 
     @abstractmethod
@@ -162,6 +176,8 @@ def preprocess_output(self, inference_results, image, show_bbox=False):
         """Draw bounding boxes onto the Face Detection frame."""
         if not (self._init_image_w and self._init_image_h):
             raise RuntimeError("Initial image width and height cannot be None.")
+        if len(inference_results) == 1:
+            inference_results = inference_results[0]
 
         coords = []
         for box in inference_results[0][0]:  # Output shape is 1x1xNx7
@@ -282,11 +298,86 @@ def __init__(
             model_name, source_width, source_height, device, threshold, extensions,
         )
 
-    def preprocess_output(self, inference_results, image):
-        pass
+    def preprocess_output(self, inference_results, image, show_bbox):
+        """
+        Estimate the Head Pose on a cropped face.
+
+        Example
+        -------
+        Model: head-pose-estimation-adas-0001
+
+            Output layer names in Inference Engine format:
+
+            name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees).
+            name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees).
+            name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees).
+
+        """
+        if len(inference_results) != 3:
+            msg = (
+                f"The model:{self.model_structure} does not contain expected output "
+                "shape as per the docs."
+            )
+            self.logger.error(msg)
+            raise InvalidModel(msg)
+
+        output_layer_names = ["yaw", "pitch", "roll"]
+        flattened_predictions = np.vstack(inference_results).ravel()
+        head_pose_angles = dict(zip(output_layer_names, flattened_predictions))
+        if show_bbox:
+            self.draw_output(head_pose_angles, image)
+
+        return head_pose_angles, image
 
+    @staticmethod
     def draw_output(coords, image):
-        pass
+        """Draw head pose estimation on frame.
+
+        Ref: https://github.com/natanielruiz/deep-head-pose/blob/master/code/utils.py#L86+L117
+        """
+        yaw, pitch, roll = coords.values()
+
+        pitch = pitch * np.pi / 180
+        yaw = -(yaw * np.pi / 180)
+        roll = roll * np.pi / 180
+
+        height, width = image.shape[:2]
+        tdx = width / 2
+        tdy = height / 2
+        size = 1000
+
+        # X-Axis pointing to right. drawn in red
+        x1 = size * (math.cos(yaw) * math.cos(roll)) + tdx
+        y1 = (
+            size
+            * (
+                math.cos(pitch) * math.sin(roll)
+                + math.cos(roll) * math.sin(pitch) * math.sin(yaw)
+            )
+            + tdy
+        )
+
+        # Y-Axis | drawn in green
+        #        v
+        x2 = size * (-math.cos(yaw) * math.sin(roll)) + tdx
+        y2 = (
+            size
+            * (
+                math.cos(pitch) * math.cos(roll)
+                - math.sin(pitch) * math.sin(yaw) * math.sin(roll)
+            )
+            + tdy
+        )
+
+        # Z-Axis (out of the screen) drawn in blue
+        x3 = size * (math.sin(yaw)) + tdx
+        y3 = size * (-math.cos(yaw) * math.sin(pitch)) + tdy
+
+        cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 3)
+        cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 3)
+        cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
+
+        return image
 
 
 class Gaze_Estimation(Base):
@@ -305,8 +396,9 @@ def __init__(
             model_name, source_width, source_height, device, threshold, extensions,
         )
 
-    def preprocess_output(self, inference_results, image):
+    def preprocess_output(self, inference_results, image, show_bbox):
         pass
 
+    @staticmethod
     def draw_output(coords, image):
         pass