MTCNN 人脸识别：从不同场景照片中获取登记照（2）优化

Hello 0 1

已于 2024-10-25 10:28:46 修改

阅读量679

点赞数 10

CC 4.0 BY-SA版权

文章标签：人脸识别自动裁剪并行处理人工智能 MTCNN 人脸对齐

于 2024-10-25 10:23:40 首次发布

本文链接：https://blog.youkuaiyun.com/chinagaobo/article/details/143227533

对MTCNN 人脸识别：从不同场景照片中获取登记照（1）进行了优化：

优化修改说明

保持原始文件名处理：
- 删除了所有与重命名文件相关的代码，直接使用原始文件名进行处理和保存，确保支持中文文件名。
并行处理：
- 继续使用 ProcessPoolExecutor 进行多进程并行处理，提高处理效率。
- 确保每个进程独立初始化 MTCNN，以避免进程间共享问题。
错误处理：
- 继续保留了对图像读取、关键点检测和裁剪过程中的错误处理，确保单个图像处理失败不会中断整个流程。

效果
在这里插入图片描述
说明：以上原始图片来至于互联网，若有侵权，联系删除！！！

环境创建请参考 MTCNN 人脸识别：从不同场景照片中获取登记照（1）

完整代码

"""
@File    : facial_recognition.py
@Author  : Bobo
@Blog    : https://blog.youkuaiyun.com/chinagaobo
@Note    : This code is for learning and communication purposes only
"""


import os
import cv2
from PIL import Image
from mtcnn import MTCNN
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed


def read_image_pil(img_path):
    """
    使用 PIL 读取图像，并转换为 RGB 格式的 NumPy 数组。
    """
    try:
        with Image.open(img_path) as pil_img:
            pil_img = pil_img.convert('RGB')  # 确保图像为 RGB 模式
            img_rgb = np.array(pil_img)
            return img_rgb
    except Exception as e:
        print(f"无法使用 PIL 读取文件 {img_path}，错误: {e}")
        return None


def process_image(img_path, output_dir, target_ratio=4 / 3, target_size=(460, 345)):
    """
    处理单张图像：检测人脸、对齐、裁剪、调整尺寸，并保存结果。
    """
    try:
        detector = MTCNN()

        # 读取图像（支持中文路径）
        img_rgb = read_image_pil(img_path)
        if img_rgb is None:
            return False

        # 在图像中检测人脸
        faces = detector.detect_faces(img_rgb)
        if not faces:
            print(f"未检测到人脸：{img_path}")
            return False

        # 处理检测到的第一个人脸
        face = faces[0]
        x, y, width, height = face['box']
        keypoints = face['keypoints']

        # 确保关键点存在
        if 'left_eye' not in keypoints or 'right_eye' not in keypoints:
            print(f"未检测到关键点（眼睛）：{img_path}")
            return False

        # 对齐：使用两眼的位置计算旋转角度
        left_eye = keypoints['left_eye']
        right_eye = keypoints['right_eye']
        dx = right_eye[0] - left_eye[0]
        dy = right_eye[1] - left_eye[1]
        angle = np.degrees(np.arctan2(dy, dx))

        # 绕人脸中心旋转图像，使两眼水平
        center_x = x + width // 2
        center_y = y + height // 2
        center = (center_x, center_y)
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated_img = cv2.warpAffine(img_rgb, rotation_matrix, (img_rgb.shape[1], img_rgb.shape[0]),
                                     flags=cv2.INTER_LINEAR)

        # 在旋转后的图像中重新检测人脸
        faces_rotated = detector.detect_faces(rotated_img)
        if not faces_rotated:
            print(f"旋转后未检测到人脸：{img_path}")
            return False

        # 使用重新检测到的人脸信息
        face_rot = faces_rotated[0]
        x_rot, y_rot, width_rot, height_rot = face_rot['box']

        # 计算人脸中心
        center_x_rot = x_rot + width_rot // 2
        center_y_rot = y_rot + height_rot // 2

        # 定义新的高度和宽度，维持目标长宽比
        new_height = int(height_rot * 1.5)
        new_width = int(new_height * target_ratio)

        # 计算裁剪区域，确保在图像边界内
        new_x1 = max(0, center_x_rot - new_width // 2)
        new_y1 = max(0, center_y_rot - new_height // 2)
        new_x2 = min(rotated_img.shape[1], center_x_rot + new_width // 2)
        new_y2 = min(rotated_img.shape[0], center_y_rot + new_height // 2)

        # 裁剪图像
        crop_image = rotated_img[new_y1:new_y2, new_x1:new_x2]

        # 调整裁剪后的图像尺寸
        adjusted_image = cv2.resize(crop_image, target_size, interpolation=cv2.INTER_AREA)

        # 从调整后的图像中裁剪左右两侧的50像素
        left_crop = 50
        right_crop = target_size[0] - 50
        if adjusted_image.shape[1] < right_crop:
            print(f"图像宽度不足以裁剪：{img_path}")
            return False
        cropped_final_image = adjusted_image[:, left_crop:right_crop]

        # 将最终裁剪后的图像转换为 PIL 格式
        pil_image = Image.fromarray(cropped_final_image)

        # 保存最终图像，保留原始文件名
        filename = os.path.basename(img_path)
        output_path = os.path.join(output_dir, filename)

        # 确保输出目录存在
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        pil_image.save(output_path)
        print(f"已保存裁剪并对齐后的图像至 '{output_path}'")
        return True

    except Exception as e:
        print(f"处理文件 {img_path} 时出错: {e}")
        return False


def main():
    # 定义输入和输出目录
    input_dir = "images"
    output_dir = "to_image"

    # 如果输出目录不存在则创建
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 定义目标长宽比和目标尺寸
    target_ratio = 4 / 3
    target_size = (460, 345)  # 设计图像的目标尺寸

    # 支持的图像格式
    supported_formats = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}

    # 获取输入目录中的文件列表，支持中文路径
    image_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir)
                   if os.path.splitext(f)[1].lower() in supported_formats]

    if not image_files:
        print("输入目录中没有支持的图像文件。")
        return

    # 使用 ProcessPoolExecutor 进行并行处理
    num_workers = os.cpu_count() or 4
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        # 提交所有任务
        futures = {executor.submit(process_image, img_path, output_dir, target_ratio, target_size): img_path
                   for img_path in image_files}

        # 处理完成的任务
        for future in as_completed(futures):
            img_path = futures[future]
            try:
                result = future.result()
                if not result:
                    print(f"处理失败: {img_path}")
            except Exception as exc:
                print(f"处理文件 {img_path} 时发生异常: {exc}")


if __name__ == "__main__":
    main()

关键步骤说明

1. 读取图像

使用 PIL 读取图像，这样可以更好地支持包含中文字符的文件名。将图像转换为 RGB 模式，并转换为 NumPy 数组以供 MTCNN 处理。

def read_image_pil(img_path):
    """
    使用 PIL 读取图像，并转换为 RGB 格式的 NumPy 数组。
    """
    try:
        with Image.open(img_path) as pil_img:
            pil_img = pil_img.convert('RGB')  # 确保图像为 RGB 模式
            img_rgb = np.array(pil_img)
            return img_rgb
    except Exception as e:
        print(f"无法使用 PIL 读取文件 {img_path}，错误: {e}")
        return None

2. 检测人脸并对齐

使用 MTCNN 检测人脸及其关键点（左眼和右眼）。根据两眼的位置计算旋转角度，使得两眼水平对齐。旋转图像后，重新检测人脸以确保对齐后的裁剪位置准确。

# 对齐：使用两眼的位置计算旋转角度
left_eye = keypoints['left_eye']
right_eye = keypoints['right_eye']
dx = right_eye[0] - left_eye[0]
dy = right_eye[1] - left_eye[1]
angle = np.degrees(np.arctan2(dy, dx))

# 绕人脸中心旋转图像，使两眼水平
center_x = x + width // 2
center_y = y + height // 2
center = (center_x, center_y)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated_img = cv2.warpAffine(img_rgb, rotation_matrix, (img_rgb.shape[1], img_rgb.shape[0]), flags=cv2.INTER_LINEAR)

# 在旋转后的图像中重新检测人脸
faces_rotated = detector.detect_faces(rotated_img)
if not faces_rotated:
    print(f"旋转后未检测到人脸：{img_path}")
    return False

# 使用重新检测到的人脸信息
face_rot = faces_rotated[0]
x_rot, y_rot, width_rot, height_rot = face_rot['box']

3. 裁剪和调整尺寸

根据检测到的人脸位置，计算裁剪区域，保持预定义的长宽比。裁剪图像后，调整到目标尺寸，并从左右两侧各裁剪50像素，以保持统一的框架。

# 计算人脸中心
center_x_rot = x_rot + width_rot // 2
center_y_rot = y_rot + height_rot // 2

# 定义新的高度和宽度，维持目标长宽比
new_height = int(height_rot * 1.5)
new_width = int(new_height * target_ratio)

# 计算裁剪区域，确保在图像边界内
new_x1 = max(0, center_x_rot - new_width // 2)
new_y1 = max(0, center_y_rot - new_height // 2)
new_x2 = min(rotated_img.shape[1], center_x_rot + new_width // 2)
new_y2 = min(rotated_img.shape[0], center_y_rot + new_height // 2)

# 裁剪图像
crop_image = rotated_img[new_y1:new_y2, new_x1:new_x2]

# 调整裁剪后的图像尺寸
adjusted_image = cv2.resize(crop_image, target_size, interpolation=cv2.INTER_AREA)

# 从调整后的图像中裁剪左右两侧的50像素
left_crop = 50
right_crop = target_size[0] - 50
if adjusted_image.shape[1] < right_crop:
    print(f"图像宽度不足以裁剪：{img_path}")
    return False
cropped_final_image = adjusted_image[:, left_crop:right_crop]

4. 保存处理后的图像

将裁剪和对齐后的图像转换为 PIL 格式，并保存到输出目录中，保留原始文件名。

# 将最终裁剪后的图像转换为 PIL 格式
pil_image = Image.fromarray(cropped_final_image)

# 保存最终图像，保留原始文件名
filename = os.path.basename(img_path)
output_path = os.path.join(output_dir, filename)

# 确保输出目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)

pil_image.save(output_path)
print(f"已保存裁剪并对齐后的图像至 '{output_path}'")

确保您的项目目录结构如下：

your_project/
├── images/          # 输入图像目录（包含需要处理的图像）
├── to_image/        # 输出图像目录（程序会自动创建）
├── facial_recognition.py       # 您的脚本文件（例如 facial_recognition.py）