yolo识别人物情绪

原创于 2025-07-28 09:21:10 发布 · 21 阅读

CC 4.0 BY-SA版权

文章标签：

# 安装 YOLOv5 依赖
git clone https://github.com/ultralytics/yolov5
cd yolov5
pip install -r requirements.txt

1. 加载 YOLOv5-face 模型用于五官检测

from yolov5_face.face_detector import YoloDetector
import cv2

# 初始化模型（下载好模型或替换为你自己的路径）
model_path = "weights/yolov5s-face.pt"
detector = YoloDetector(model_path, device='cuda')  # 或 'cpu'

img = cv2.imread("test.jpg")
bboxes = detector.predict(img)

for box in bboxes:
    x1, y1, x2, y2, conf, landmarks = box
    # landmarks = [left_eye, right_eye, nose, left_mouth, right_mouth]
    cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    for (x, y) in landmarks:
        cv2.circle(img, (int(x), int(y)), 2, (0,255,0), -1)

2. 加载情绪识别模型（CNN）

import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image

# 假设我们使用一个训练好的 FER 模型
class EmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(64, 7)  # 7类表情
        )

    def forward(self, x):
        return self.net(x)

emotion_model = EmotionCNN()
emotion_model.load_state_dict(torch.load("emotion_model.pth", map_location="cpu"))
emotion_model.eval()

transform = transforms.Compose([
    transforms.Resize((48, 48)),
    transforms.Grayscale(),
    transforms.ToTensor()
])

# 裁剪人脸区域
face_crop = img[y1:y2, x1:x2]
face_pil = Image.fromarray(cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB))
input_tensor = transform(face_pil).unsqueeze(0)

with torch.no_grad():
    output = emotion_model(input_tensor)
    pred = torch.argmax(output, dim=1).item()

emotions = ["愤怒", "厌恶", "恐惧", "高兴", "难过", "惊讶", "平静"]
emotion_label = emotions[pred]
cv2.putText(img, emotion_label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

3. 完整流程打包整合

def detect_face_and_emotion(image_path):
    img = cv2.imread(image_path)
    faces = detector.predict(img)
    
    for box in faces:
        x1, y1, x2, y2, conf, landmarks = box
        face_crop = img[y1:y2, x1:x2]
        face_pil = Image.fromarray(cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB))
        input_tensor = transform(face_pil).unsqueeze(0)
        output = emotion_model(input_tensor)
        pred = torch.argmax(output, dim=1).item()
        emotion_label = emotions[pred]
        
        # 显示框和文字
        cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
        cv2.putText(img, emotion_label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
    
    cv2.imshow("Result", img)
    cv2.waitKey(0)