Skip to content

Commit 03ba54e

Browse files
committed
added facial landmarks estimation and code refactor
Signed-off-by: Mpho Mphego <mpho112@gmail.com>
1 parent a2728fb commit 03ba54e

File tree

3 files changed

+106
-30
lines changed

3 files changed

+106
-30
lines changed

main.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
bash -c "source /opt/intel/openvino/bin/setupvars.sh && \
1212
python main.py \
1313
--face-model models/face-detection-adas-binary-0001 \
14+
--facial-landmarks-model models/landmarks-regression-retail-0009 \
1415
--head-pose-model models/head-pose-estimation-adas-0001 \
15-
--facial-landmarks-model models/face-detection-adas-binary-0001 \
1616
--gaze-model models/gaze-estimation-adas-0002 \
1717
--input resources/demo.mp4";
1818
"""
@@ -117,13 +117,19 @@ def main(args):
117117
mouse_controller = MouseController(
118118
precision=args.mouse_precision, speed=args.mouse_speed
119119
)
120+
video_feed = InputFeeder(input_file=args.input)
121+
120122
face_detection = Face_Detection(
121-
args.face_model, device=args.device, threshold=args.prob_threshold
123+
model_name=args.face_model,
124+
source_width=video_feed.source_width,
125+
source_height=video_feed.source_height,
126+
device=args.device,
127+
threshold=args.prob_threshold,
122128
)
129+
facial_landmarks = Facial_Landmarks(args.facial_landmarks_model, device=args.device)
123130
head_pose_estimation = Head_Pose_Estimation(
124131
args.head_pose_model, device=args.device
125132
)
126-
facial_landmarks = Facial_Landmarks(args.facial_landmarks_model, device=args.device)
127133
gaze_estimation = Gaze_Estimation(args.gaze_model, device=args.device)
128134

129135
model_load_time = (
@@ -134,19 +140,38 @@ def main(args):
134140
) / 1000
135141
logger.info(f"Total time taken to load all the models: {model_load_time:.2f} secs.")
136142

137-
video_feed = InputFeeder(input_file=args.input)
138-
139-
# Add source width and height for face detection.
140-
face_detection._init_image_w = video_feed.source_width
141-
face_detection._init_image_h = video_feed.source_height
142-
143143
for frame in video_feed.next_frame():
144-
predict_end_time, pred_result = face_detection.predict(frame,draw=True)
145-
text = f"Inference time: {predict_end_time:.2f}ms"
146-
face_detection.add_text(text, frame, (15, face_detection._init_image_h - 50))
144+
predict_end_time, _, face_bboxes = face_detection.predict(frame, draw=True)
145+
text = f"Face Detection Inference time: {predict_end_time:.3f} s"
146+
face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
147+
148+
if face_bboxes:
149+
for face_bbox in face_bboxes:
150+
# Useful resource: https://www.pyimagesearch.com/2018/09/24/opencv-face-recognition/
151+
152+
# Face bounding box coordinates cropped from the face detection inference
153+
# are face_bboxes i.e `xmin, ymin, xmax, ymax`
154+
# Therefore the face can be cropped by:
155+
# frame[face_bbox[1]:face_bbox[3], face_bbox[0]:face_bbox[2]]
156+
157+
# extract the face ROI
158+
(x, y, w, h) = face_bbox
159+
face = frame[y:h, x:w]
160+
(face_height, face_width) = face.shape[:2]
161+
# video_feed.show(frame[y:h, x:w], "face")
162+
163+
# ensure the face width and height are sufficiently large
164+
if face_height < 20 or face_width < 20:
165+
continue
166+
167+
predict_end_time, _, landmarks_bboxes = facial_landmarks.predict(face)
168+
text = f"Facial Landmarks Est. Inference time: {predict_end_time:.3f} s"
169+
facial_landmarks.add_text(
170+
text, frame, (15, video_feed.source_height - 60)
171+
)
147172

148173
if args.debug:
149-
video_feed.show(frame)
174+
video_feed.show(video_feed.resize(frame))
150175

151176
video_feed.close()
152177

src/input_feeder.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,13 @@ def progress_bar(self):
7878
self._progress_bar = tqdm(total=int(self.video_len - self.fps + 1))
7979
return self._progress_bar
8080

81-
def resize(self,frame):
82-
return cv2.resize(frame, (self.source_width - 200, self.source_height - 200))
81+
def resize(self, frame, height=None, width=None):
82+
if (height and width) is None:
83+
width, height = (self.source_width - 200, self.source_height - 200)
84+
return cv2.resize(frame, (width, height))
8385

8486
def show(self, frame, frame_name="video"):
85-
cv2.imshow(frame_name, self.resize(frame))
87+
cv2.imshow(frame_name, frame)
8688

8789
def write_video(self, output_path=".", filename="output_video.mp4"):
8890
out_video = cv2.VideoWriter(

src/model.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,15 @@
2525
class Base(abc.ABC):
2626
"""Model Base Class"""
2727

28-
def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
28+
def __init__(
29+
self,
30+
model_name,
31+
source_width=None,
32+
source_height=None,
33+
device="CPU",
34+
threshold=0.60,
35+
extensions=None,
36+
):
2937
self.model_weights = f"{model_name}.bin"
3038
self.model_structure = f"{model_name}.xml"
3139
assert (
@@ -45,8 +53,8 @@ def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
4553
self.input_shape = self.model.inputs[self.input_name].shape
4654
self.output_name = next(iter(self.model.outputs))
4755
self.output_shape = self.model.outputs[self.output_name].shape
48-
self._init_image_w = None
49-
self._init_image_h = None
56+
self._init_image_w = source_width
57+
self._init_image_h = source_height
5058
self.exec_network = None
5159
self.load_model()
5260

@@ -91,15 +99,16 @@ def predict(self, image, request_id=0, draw=False):
9199
request_id=request_id, inputs={self.input_name: p_image}
92100
)
93101
status = self.exec_network.requests[request_id].wait(-1)
102+
bbox = None
94103
if status == 0:
95104
predict_start_time = time.time()
96105
pred_result = self.exec_network.requests[request_id].outputs[
97106
self.output_name
98107
]
99-
predict_end_time = (time.time() - predict_start_time) * 1000
108+
predict_end_time = float(time.time() - predict_start_time) * 1000
100109
if draw:
101-
self.preprocess_output(pred_result, image, show_bbox=draw)
102-
return (predict_end_time, pred_result)
110+
bbox, _ = self.preprocess_output(pred_result, image, show_bbox=draw)
111+
return (predict_end_time, pred_result, bbox)
103112

104113
@abc.abstractmethod
105114
def preprocess_output(self, inference_results, image, show_bbox=False):
@@ -128,8 +137,18 @@ def preprocess_input(self, image):
128137
class Face_Detection(Base):
129138
"""Class for the Face Detection Model."""
130139

131-
def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
132-
super().__init__(model_name, device="CPU", threshold=0.60, extensions=None)
140+
def __init__(
141+
self,
142+
model_name,
143+
source_width=None,
144+
source_height=None,
145+
device="CPU",
146+
threshold=0.60,
147+
extensions=None,
148+
):
149+
super().__init__(
150+
model_name, source_width, source_height, device, threshold, extensions,
151+
)
133152

134153
def preprocess_output(self, inference_results, image, show_bbox=False):
135154
"""Draw bounding boxes onto the frame."""
@@ -199,8 +218,18 @@ def draw_output(
199218
class Head_Pose_Estimation(Base):
200219
"""Class for the Head Pose Estimation Model."""
201220

202-
def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
203-
super().__init__(model_name, device="CPU", threshold=0.60, extensions=None)
221+
def __init__(
222+
self,
223+
model_name,
224+
source_width=None,
225+
source_height=None,
226+
device="CPU",
227+
threshold=0.60,
228+
extensions=None,
229+
):
230+
super().__init__(
231+
model_name, source_width, source_height, device, threshold, extensions,
232+
)
204233

205234
def preprocess_output(self, inference_results, image):
206235
pass
@@ -212,8 +241,18 @@ def draw_output(coords, image):
212241
class Facial_Landmarks(Base):
213242
"""Class for the Facial Landmarks Detection Model."""
214243

215-
def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
216-
super().__init__(model_name, device="CPU", threshold=0.60, extensions=None)
244+
def __init__(
245+
self,
246+
model_name,
247+
source_width=None,
248+
source_height=None,
249+
device="CPU",
250+
threshold=0.60,
251+
extensions=None,
252+
):
253+
super().__init__(
254+
model_name, source_width, source_height, device, threshold, extensions,
255+
)
217256

218257
def preprocess_output(self, inference_results, image):
219258
pass
@@ -225,8 +264,18 @@ def draw_output(coords, image):
225264
class Gaze_Estimation(Base):
226265
"""Class for the Gaze Estimation Detection Model."""
227266

228-
def __init__(self, model_name, device="CPU", threshold=0.60, extensions=None):
229-
super().__init__(model_name, device="CPU", threshold=0.60, extensions=None)
267+
def __init__(
268+
self,
269+
model_name,
270+
source_width=None,
271+
source_height=None,
272+
device="CPU",
273+
threshold=0.60,
274+
extensions=None,
275+
):
276+
super().__init__(
277+
model_name, source_width, source_height, device, threshold, extensions,
278+
)
230279

231280
def preprocess_output(self, inference_results, image):
232281
pass

0 commit comments

Comments
 (0)