引言
img
面部检测识别已成为许多现代应用程序中不可或缺的组成部分,包括用于设备解锁和在社交媒体应用程序中添加实时效果的应用程序。然而,准确和有效地检测面部特征,包括鼻子,嘴巴,眼睛,甚至虹膜,可能是一个具有挑战性的过程。Google开发的开源框架Mediapipe通过提供强大的预训练机器学习模型来解决这个问题,这些模型允许开发人员高精度地跟踪和分析面部标志。
Mediapipe为计算机视觉任务提供了一套全面的预构建解决方案,包括手部跟踪,姿势估计和面部标志检测。轻量级设计确保了实时性能,使其成为集成到移动的和基于Web的应用程序中的最佳选择。
本文将详细介绍如何利用Mediapipe检测和跟踪特定的面部特征,包括鼻子,嘴巴,眼睛和虹膜。通过本指南的总结,读者不仅将全面了解Mediapipe的功能,而且还将能够在自己的项目中实现面部特征检测。这将有助于探索如何利用Mediapipe的力量来轻松检测这些关键的面部特征。
详细步骤
步骤1:安装必要的库
pip install opencv-python mediapipe
第2步:导入库
import mediapipe as mp
import cv2
步骤3:初始化FaceMesh模型并定义面部特征标志
class FaceMeshDetector:
def __init__(self, static_image_mode=False, max_num_faces=1, refine_landmarks=False, min_detection_con=0.5,
min_tracking_con=0.5):
# Initialize the parameters for face mesh detection
self.static_image_mode = static_image_mode # Whether to process images (True) or video stream (False)
self.max_num_faces = max_num_faces # Maximum number of faces to detect
self.refine_landmarks = refine_landmarks # Whether to refine iris landmarks for better precision
self.min_detection_con = min_detection_con # Minimum confidence for face detection
self.min_tracking_con = min_tracking_con # Minimum confidence for tracking
# Initialize Mediapipe FaceMesh solution
self.mpFaceMesh = mp.solutions.face_mesh
self.faceMesh = self.mpFaceMesh.FaceMesh(self.static_image_mode,
self.max_num_faces,
self.refine_landmarks,
self.min_detection_con,
self.min_tracking_con)
# Store the landmark indices for specific facial features
# These are predefined Mediapipe indices for left and right eyes, iris, nose, and mouth
self.LEFT_EYE_LANDMARKS = [463, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374,
380, 381, 382, 362] # Left eye landmarks
self.RIGHT_EYE_LANDMARKS = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145,
144, 163, 7] # Right eye landmarks
self.LEFT_IRIS_LANDMARKS = [474, 475, 477, 476] # Left iris landmarks
self.RIGHT_IRIS_LANDMARKS = [469, 470, 471, 472] # Right iris landmarks
self.NOSE_LANDMARKS = [193, 168, 417, 122, 351, 196, 419, 3, 248, 236, 456, 198, 420, 131, 360, 49, 279, 48,
278, 219, 439, 59, 289, 218, 438, 237, 457, 44, 19, 274] # Nose landmarks
self.MOUTH_LANDMARKS = [0, 267, 269, 270, 409, 306, 375, 321, 405, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39,
37] # Mouth landmarks
代码定义了一个类,指定为FaceMeshDetector
,它采用Mediapipe开发的FaceMesh解决方案来检测面部标志。这种检测的重点是特定区域,即眼睛,虹膜,鼻子和嘴巴。从图像中提取所识别的界标,并以像素坐标的形式返回。
__init__方法
用于初始化检测器,这是通过配置Mediapipe的FaceMesh解决方案并存储眼睛、虹膜、鼻子和嘴巴的特定地标索引来实现的。
步骤4:处理图像以检测面部标志,提取我们想要的面部特征的坐标
def findMeshInFace(self, img):
# Initialize a dictionary to store the landmarks for facial features
landmarks = {}
# Convert the input image to RGB as Mediapipe expects RGB images
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Process the image to find face landmarks using the FaceMesh model
results = self.faceMesh.process(imgRGB)
# Check if any faces were detected
if results.multi_face_landmarks:
# Iterate over detected faces (here, max_num_faces = 1, so usually one face)
for faceLms in results.multi_face_landmarks:
# Initialize lists in the landmarks dictionary to store each facial feature's coordinates
landmarks["left_eye_landmarks"] = []
landmarks["right_eye_landmarks"] = []
landmarks["left_iris_landmarks"] = []
landmarks["right_iris_landmarks"] = []
landmarks["nose_landmarks"] = []
landmarks["mouth_landmarks"] = []
landmarks["all_landmarks"] = [] # Store all face landmarks for complete face mesh
# Loop through all face landmarks
for i, lm in enumerate(faceLms.landmark):
h, w, ic = img.shape # Get image height, width, and channel count
x, y = int(lm.x * w), int(lm.y * h) # Convert normalized coordinates to pixel values
# Store the coordinates of all landmarks
landmarks["all_landmarks"].append((x, y))
# Store specific feature landmarks based on the predefined indices
if i in self.LEFT_EYE_LANDMARKS:
landmarks["left_eye_landmarks"].append((x, y)) # Left eye
if i in self.RIGHT_EYE_LANDMARKS:
landmarks["right_eye_landmarks"].append((x, y)) # Right eye
if i in self.LEFT_IRIS_LANDMARKS:
landmarks["left_iris_landmarks"].append((x, y)) # Left iris
if i in self.RIGHT_IRIS_LANDMARKS:
landmarks["right_iris_landmarks"].append((x, y)) # Right iris
if i in self.NOSE_LANDMARKS:
landmarks["nose_landmarks"].append((x, y)) # Nose
if i in self.MOUTH_LANDMARKS:
landmarks["mouth_landmarks"].append((x, y)) # Mouth
# Return the processed image and the dictionary of feature landmarks
return img, landmarks
findMeshInFace
方法:该方法处理输入图像,检测面部特征点,并返回图像连同面部特征的坐标沿着。
步骤5:定义图像的主函数
# Initialize the FaceMeshDetector with refined iris landmarks for better precision
detector = FaceMeshDetector(refine_landmarks=True)
# Define the facial features (eyes, nose, mouth, iris, and all landmarks) we are interested in
face_parts = ["left_eye_landmarks", "right_eye_landmarks", "nose_landmarks",
"mouth_landmarks", "all_landmarks", "left_iris_landmarks",
"right_iris_landmarks"]
# Specify which facial feature to detect (index 2 refers to the nose landmarks here)
face_part = 2
# Read an image from a specified file path
image = cv2.imread("<YourImagePath>") # Replace <YourImagePath> with the actual image path
# Use the FaceMeshDetector to find facial landmarks in the current frame
image, landmarks = detector.findMeshInFace(image)
# Try to draw the landmarks for the specified face part (nose, in this case)
try:
for landmark in landmarks[face_parts[face_part]]:
# Draw a small green circle at each landmark coordinate
cv2.circle(image, (landmark[0], landmark[1]), 3, (0, 255, 0), -1) # Circle parameters: center, radius, color, thickness
except KeyError:
# If the landmark for the specified part is not found, skip drawing
pass
# Display the name of the facial feature being detected (e.g., "nose_landmarks") on the frame
cv2.putText(image, f"{face_parts[face_part]}", (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 255, 0), 5)
# cv2.putText parameters: image, text, position, font, font size, color, thickness
# Show the modified frame with the detected landmarks in a window titled "Image"
cv2.imshow("Image", image)
# Wait for a key press to close the displayed image window
cv2.waitKey(0)
下面表示上述图像的每个面部特征的结果。
img
步骤6:定义视频检测的main函数
# Initialize the FaceMeshDetector with refined iris landmarks for better precision
detector = FaceMeshDetector(refine_landmarks=True)
# Define the facial features (eyes, nose, mouth, iris, and all landmarks) we are interested in
face_parts = ["left_eye_landmarks", "right_eye_landmarks", "nose_landmarks",
"mouth_landmarks", "all_landmarks", "left_iris_landmarks",
"right_iris_landmarks"]
# Specify which facial feature to detect (index 2 refers to the nose landmarks here)
face_part = 2
# Capture video from the file "woman_face.mp4"
cap = cv2.VideoCapture("<YourVideoPath") # 0 for webcam
# Start a loop to process the video frame by frame
while True:
# Read the next frame from the video capture
success, image = cap.read()
# Use the FaceMeshDetector to find facial landmarks in the current frame
image, landmarks = detector.findMeshInFace(image)
# If reading the frame was unsuccessful (e.g., end of video), break the loop
if not success:
break
# Try to draw the landmarks for the specified face part (nose, in this case)
try:
for landmark in landmarks[face_parts[face_part]]:
# Draw a small green circle at each landmark coordinate
cv2.circle(image, (landmark[0], landmark[1]), 3, (0, 255, 0), -1)
except KeyError:
# If the landmark for the specified part is not found, skip drawing
pass
# Display the name of the facial feature being detected (e.g., "nose_landmarks") on the frame
cv2.putText(image, f"{face_parts[face_part]}", (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 255, 0), 5)
# Show the modified frame with the detected landmarks in a window titled "Image"
cv2.imshow("Image", image)
# Wait for a key press for 1 millisecond and check if the user pressed the 'q' key to quit
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
视频检测完整代码:
import mediapipe as mp
import cv2
class FaceMeshDetector:
def __init__(self, static_image_mode=False, max_num_faces=1, refine_landmarks=False, min_detection_con=0.5,
min_tracking_con=0.5):
# Initialize the parameters for face mesh detection
self.static_image_mode = static_image_mode # Whether to process images (True) or video stream (False)
self.max_num_faces = max_num_faces # Maximum number of faces to detect
self.refine_landmarks = refine_landmarks # Whether to refine iris landmarks for better precision
self.min_detection_con = min_detection_con # Minimum confidence for face detection
self.min_tracking_con = min_tracking_con # Minimum confidence for tracking
# Initialize Mediapipe FaceMesh solution
self.mpFaceMesh = mp.solutions.face_mesh
self.faceMesh = self.mpFaceMesh.FaceMesh(self.static_image_mode,
self.max_num_faces,
self.refine_landmarks,
self.min_detection_con,
self.min_tracking_con)
# Store the landmark indices for specific facial features
# These are predefined Mediapipe indices for left and right eyes, iris, nose, and mouth
self.LEFT_EYE_LANDMARKS = [463, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374,
380, 381, 382, 362] # Left eye landmarks
self.RIGHT_EYE_LANDMARKS = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145,
144, 163, 7] # Right eye landmarks
self.LEFT_IRIS_LANDMARKS = [474, 475, 477, 476] # Left iris landmarks
self.RIGHT_IRIS_LANDMARKS = [469, 470, 471, 472] # Right iris landmarks
self.NOSE_LANDMARKS = [193, 168, 417, 122, 351, 196, 419, 3, 248, 236, 456, 198, 420, 131, 360, 49, 279, 48,
278, 219, 439, 59, 289, 218, 438, 237, 457, 44, 19, 274] # Nose landmarks
self.MOUTH_LANDMARKS = [0, 267, 269, 270, 409, 306, 375, 321, 405, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39,
37] # Mouth landmarks
def findMeshInFace(self, img):
# Initialize a dictionary to store the landmarks for facial features
landmarks = {}
# Convert the input image to RGB as Mediapipe expects RGB images
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Process the image to find face landmarks using the FaceMesh model
results = self.faceMesh.process(imgRGB)
# Check if any faces were detected
if results.multi_face_landmarks:
# Iterate over detected faces (here, max_num_faces = 1, so usually one face)
for faceLms in results.multi_face_landmarks:
# Initialize lists in the landmarks dictionary to store each facial feature's coordinates
landmarks["left_eye_landmarks"] = []
landmarks["right_eye_landmarks"] = []
landmarks["left_iris_landmarks"] = []
landmarks["right_iris_landmarks"] = []
landmarks["nose_landmarks"] = []
landmarks["mouth_landmarks"] = []
landmarks["all_landmarks"] = [] # Store all face landmarks for complete face mesh
# Loop through all face landmarks
for i, lm in enumerate(faceLms.landmark):
h, w, ic = img.shape # Get image height, width, and channel count
x, y = int(lm.x * w), int(lm.y * h) # Convert normalized coordinates to pixel values
# Store the coordinates of all landmarks
landmarks["all_landmarks"].append((x, y))
# Store specific feature landmarks based on the predefined indices
if i in self.LEFT_EYE_LANDMARKS:
landmarks["left_eye_landmarks"].append((x, y)) # Left eye
if i in self.RIGHT_EYE_LANDMARKS:
landmarks["right_eye_landmarks"].append((x, y)) # Right eye
if i in self.LEFT_IRIS_LANDMARKS:
landmarks["left_iris_landmarks"].append((x, y)) # Left iris
if i in self.RIGHT_IRIS_LANDMARKS:
landmarks["right_iris_landmarks"].append((x, y)) # Right iris
if i in self.NOSE_LANDMARKS:
landmarks["nose_landmarks"].append((x, y)) # Nose
if i in self.MOUTH_LANDMARKS:
landmarks["mouth_landmarks"].append((x, y)) # Mouth
# Return the processed image and the dictionary of feature landmarks
return img, landmarks
# Initialize the FaceMeshDetector with refined iris landmarks for better precision
detector = FaceMeshDetector(refine_landmarks=True)
# Define the facial features (eyes, nose, mouth, iris, and all landmarks) we are interested in
face_parts = ["left_eye_landmarks", "right_eye_landmarks", "nose_landmarks",
"mouth_landmarks", "all_landmarks", "left_iris_landmarks",
"right_iris_landmarks"]
# Specify which facial feature to detect (index 2 refers to the nose landmarks here)
face_part = 2
# Capture video from the file "woman_face.mp4"
cap = cv2.VideoCapture("<YourVideoPath") # 0 for webcam
# Start a loop to process the video frame by frame
while True:
# Read the next frame from the video capture
success, image = cap.read()
# Use the FaceMeshDetector to find facial landmarks in the current frame
image, landmarks = detector.findMeshInFace(image)
# If reading the frame was unsuccessful (e.g., end of video), break the loop
if not success:
break
# Try to draw the landmarks for the specified face part (nose, in this case)
try:
for landmark in landmarks[face_parts[face_part]]:
# Draw a small green circle at each landmark coordinate
cv2.circle(image, (landmark[0], landmark[1]), 3, (0, 255, 0), -1)
except KeyError:
# If the landmark for the specified part is not found, skip drawing
pass
# Display the name of the facial feature being detected (e.g., "nose_landmarks") on the frame
cv2.putText(image, f"{face_parts[face_part]}", (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 255, 0), 5)
# Show the modified frame with the detected landmarks in a window titled "Image"
cv2.imshow("Image", image)
# Wait for a key press for 1 millisecond and check if the user pressed the 'q' key to quit
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break