[Day 28] 來做一個人臉互動的程式吧！

13th鐵人賽 computer vision

山姆大叔

2021-10-11 13:35:42

3899 瀏覽

分享至

在我完成人臉關鍵點與人臉對齊的學習後，覺得眼睛有點累想要休息 -- 這時一個應用就出來了！

我們每天會接觸到"螢幕"的機會有很多：

電視
電腦
手機
平板

而真正讓眼睛休息的時間，通常都是準備要就寢了。

適時的還是要讓眼睛做一些放鬆的運動，像是：
眨眼放鬆
眼球轉動
凝視遠方
眼部肌肉按摩
...等等等

假設今天有一個App，就像內建的鬧鐘一樣，

會定期跳出提醒你：嘿！你好像用手機有點久了喔？來作一下眼部放鬆的運動吧！

只需要花個幾分鐘，

換來長時間用眼的舒緩，

我們就來作一個人臉互動的應用 -- 眼部放鬆App

(如果想要玩玩的可以到這裡看一下安裝與執行步驟，我們接下來將一步一步完成這個App)

本文開始

在專案下分別建立目錄：

- applications
   - easy-eye-app
      - utils

在當前Python環境安裝：
- imutils
- opencv-contrib-python
- dlib
接下來讓我們先想一下流程：
- 開啟攝影機
- 偵測人臉，提示使用者作人臉對齊
- 辨識人臉關鍵點，判斷眼睛位置
- 根據眼睛的眨眼與眼球位置做出對應的判斷
根據前一步的流程，我們需要幾個方法來幫助我們開發：
- 偵測人臉
- 辨識人臉關鍵點
- 偵測人臉面對方向

我們就依序開發各個方法吧！

建立公用方法

打開utils目錄，新增一個face_detector.py (內容與之前Dlib MMOD的類似，只是改成class方式)：

# 匯入必要套件
import ntpath
import os
from bz2 import decompress
from urllib.request import urlretrieve

import cv2
import dlib


class FaceDetector:
    def __init__(self):
        # 下載模型檔案(.bz2)與解壓縮
        model_name = "mmod_human_face_detector.dat"
        model_path = os.sep.join([ntpath.dirname(ntpath.abspath(__file__)), model_name])
        if not os.path.exists(model_path):
            urlretrieve(f"https://github.com/davisking/dlib-models/raw/master/mmod_human_face_detector.dat.bz2",
                        model_name + ".bz2")
            with open(model_name, "wb") as new_file, open(model_name + ".bz2", "rb") as file:
                data = decompress(file.read())
                new_file.write(data)
            os.remove(model_name + ".bz2")

        # 初始化模型
        self._detector = dlib.cnn_face_detection_model_v1(model_path)

    def detect(self, img):
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = self._detector(rgb, 1)
        rects = [r.rect for r in results]
        return rects

新增另一個檔案landmark_detector.py (內容一樣與之前Dlib人臉關鍵點辨識的類似，只是改成class方式)；由於偵測人臉是使用Dlib MMOD方法，辨識人臉關鍵點就直接拿偵測人臉的Bounding Box來用就好：

import ntpath
import os
from bz2 import decompress
from urllib.request import urlretrieve

import cv2
import dlib
from imutils import face_utils


class LandmarkDetector:
    def __init__(self, predictor_type):
        if predictor_type == 5:
            model_url = f"http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"
            model_name = "shape_predictor_5_face_landmarks.dat"
        elif predictor_type == 68:
            model_url = f"https://github.com/davisking/dlib-models/raw/master/shape_predictor_68_face_landmarks_GTX.dat.bz2"
            model_name = "shape_predictor_68_face_landmarks_GTX.dat"
        else:
            raise ValueError(f"un-support predictor type: {predictor_type}, must be 5 or 68!")

        model_path = os.sep.join([ntpath.dirname(ntpath.abspath(__file__)), model_name])
        if not os.path.exists(model_path):
            urlretrieve(model_url, model_name + ".bz2")
            with open(model_name, "wb") as new_file, open(model_name + ".bz2", "rb") as file:
                data = decompress(file.read())
                new_file.write(data)
            os.remove(model_name + ".bz2")

        # 初始化關鍵點偵測模型
        self._predictor = dlib.shape_predictor(model_path)

    def detect(self, img, rects):
        shapes = []
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        for rect in rects:
            shape = self._predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            shapes.append(shape)
        return shapes

最後是偵測人臉方向用來做人臉對齊。
還記得之前做人臉對齊提到需要用到人臉關鍵點，但只能解決2D平面的問題嗎？
這裡我們一樣會用到人臉關鍵點，但會使用3D人臉模型估計臉部面對方向
新增一個檔案hand_pose_estimator.py：

import numpy as np
import cv2

# 3D 模型
model_points = np.array([
    (0.0, 0.0, 0.0),  # 鼻頭
    (0.0, -330.0, -65.0),  # 下巴
    (-225.0, 170.0, -135.0),  # 左眼中心
    (225.0, 170.0, -135.0),  # 右眼中心
    (-150.0, -150.0, -125.0),  # 嘴巴左邊中心
    (150.0, -150.0, -125.0)  # 嘴巴右邊中心
])


class HeadPoseEstimator:
    def __init__(self, frame_width, frame_height):
        self.frame_width = frame_width
        self.frame_height = frame_height

    @staticmethod
    def _get_2d_points(rotation_vector, translation_vector, camera_matrix, dist_coeffs, val):
        point_3d = []
        rear_size = val[0]
        rear_depth = val[1]
        point_3d.append((-rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, -rear_size, rear_depth))

        front_size = val[2]
        front_depth = val[3]
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d.append((-front_size, front_size, front_depth))
        point_3d.append((front_size, front_size, front_depth))
        point_3d.append((front_size, -front_size, front_depth))
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)

        # 將3D座標投影到2D平面上
        (point_2d, _) = cv2.projectPoints(point_3d, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
        point_2d = np.int32(point_2d.reshape(-1, 2))
        return point_2d

    def _head_pose_points(self, rotation_vector, translation_vector, camera_matrix, dist_coeffs):
        rear_size = 1
        rear_depth = 0
        front_size = self.frame_width
        front_depth = front_size * 2
        val = [rear_size, rear_depth, front_size, front_depth]
        point_2d = self._get_2d_points(rotation_vector, translation_vector, camera_matrix, dist_coeffs, val)
        p1 = point_2d[2]
        p2 = (point_2d[5] + point_2d[8]) // 2
        return p1, p2

    def head_pose_estimate(self, shape):
        face_3d_points = np.array([
            shape[33],  # 鼻頭
            shape[8],  # 下巴
            shape[36],  # 左眼中心
            shape[45],  # 右眼中心
            shape[48],  # 嘴巴左邊中心
            shape[54]  # 嘴巴右邊中心
        ], dtype="double")

        # 粗估攝影機相關參數
        focal_length = self.frame_width
        center = (self.frame_width / 2, self.frame_height / 2)
        camera_matrix = np.array([[
            focal_length, 0, center[0]],
            [0, focal_length, center[1]],
            [0, 0, 1]], dtype="double")

        # 假設攝影機都是已對焦
        dist_coeffs = np.zeros((4, 1))

        # 計算旋轉與轉換矩陣
        (_, rotation_vector, translation_vector) = cv2.solvePnP(
            model_points,
            face_3d_points,
            camera_matrix,
            dist_coeffs,
            flags=cv2.SOLVEPNP_ITERATIVE)

        # 將一個"與臉部垂直"的3D座標投影到2D平面上
        (nose_end_point2D, jacobian) = cv2.projectPoints(np.array([0.0, 0.0, 1000.0]), rotation_vector,
                                                         translation_vector, camera_matrix, dist_coeffs)

        # 取得投影到2D平面的點 (後面用來計算臉部垂直方向角度)
        vertical_p1 = (int(face_3d_points[0][0]), int(face_3d_points[0][1]))
        vertical_p2 = (int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))

        # 取得水平方向角度用的座標
        (horizontal_p1, horizontal_p2) = self._head_pose_points(rotation_vector, translation_vector, camera_matrix, dist_coeffs)
        return vertical_p1, vertical_p2, horizontal_p1, horizontal_p2