行人跟踪之身份识别（三）

最新推荐文章于 2024-07-11 16:39:12 发布

多财多亿,凭亿近人

最新推荐文章于 2024-07-11 16:39:12 发布

阅读量888

点赞数 3

分类专栏：计算机视觉文章标签：计算机视觉深度学习

本文链接：https://blog.youkuaiyun.com/qq_38284951/article/details/124173847

版权

计算机视觉专栏收录该内容

7 篇文章

订阅专栏

前两篇写了人脸识别和yolo+deepsort

这篇就写下两个方法的结合，在保持一定帧率的前提下进行身份识别和跟踪：

代码直接在yolo+deepsort下的main.py 修改，先备份一个main.py 重命名为 main_track.py:

import numpy as np
import time
from tracker import face_track
from detector import Detector
from lib_face import *
import cv2
from rtsp import myThread
from face_recognize import person_face
import threading
from lib_face import YuNet
thread_lock = threading.Lock()
thread_exit = False
start_tracking = True

class detector_service(person_face):
    def __init__(self,cam=None):
        super(detector_service, self).__init__()
        # self.loadface = person_face()
        self.camera = cam
        self.lib_face = YuNet()
        X, y, self.names = self.LoadImages()
        # print('x',X)
        self.ontrack = True
        self.start_time = 0
        self.model = cv2.face.EigenFaceRecognizer_create()
        self.model.train(X, y)
        self.name_dic = {}
        self.image = None
        # self.face_casecade = cv2.CascadeClassifier(r'E:\workspace\person_tracking\data\haarcascade_frontalface_default.xml')

    def detect_face(self,img):
        faces = self.face_casecade.detectMultiScale(img, 1.3, 5)
        for (x, y, w, h) in faces:
            frame = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)  # 蓝色
            roi_gray = img[y:y + h, x:x + w]
            try:
                # 将图像转换为宽92 高112的图像
                # resize（原图像，目标大小，（插值方法）interpolation=，）
                roi_gray = cv2.resize(roi_gray, (92, 112), interpolation=cv2.INTER_LINEAR)
                params = self.model.predict(roi_gray)
                print('Label:%s,confidence:%.2f' % (params[0], params[1]))
                '''
                putText:给照片添加文字
                putText(输入图像，'所需添加的文字'，左上角的坐标，字体，字体大小，颜色，字体粗细)
                '''
                # cv2.putText(frame, self.names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)
                if self.names[params[0]] not in self.name_dic.keys():
                    self.name_dic[self.names[params[0]]] = [x, y, x + w, y + h]
            except:
                continue
        # return frame

    def dlib_detect(self):
        res = self.face_detecting(self.image)  # 0.032s
        if res is not None:
            face, self.all_face_location = res
            for i in range(self.face_num):
                try:
                    [left, right, top, bottom] = self.all_face_location[i]
                    self.face_img = self.image[top:bottom, left:right]
                    gray_img = cv2.cvtColor(self.face_img, cv2.COLOR_BGR2GRAY)
                    roi_gray = cv2.resize(gray_img, (92, 112), interpolation=cv2.INTER_LINEAR)
                    params = self.model.predict(roi_gray)
                    if self.names[params[0]] not in self.name_dic.keys():
                        self.name_dic[self.names[params[0]]] = [left, top, right, bottom]

                except:
                    continue

    def lib_face_det(self):
        self.lib_face.setInputSize([self.image.shape[1], self.image.shape[0]])
        face1 = self.lib_face.infer(self.image)
        for det in face1:
            try:
                bbox = det[0:4].astype(np.int32)
                if int(bbox[2])*int(bbox[3])>2000:
                    print("xxxxxxxxx",bbox[0], bbox[1], bbox[2], bbox[3])
                    self.face_img = self.image[bbox[1]: bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]]
                    gray_img = cv2.cvtColor(self.face_img, cv2.COLOR_BGR2GRAY)
                    roi_gray = cv2.resize(gray_img, (92, 112), interpolation=cv2.INTER_LINEAR)
                    params = self.model.predict(roi_gray)
                    if self.names[params[0]] not in self.name_dic.keys():
                        self.name_dic[self.names[params[0]]] = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
            except:
                continue

    def detector_camera_person(self):
        global start_tracking
        detector = Detector()
        track = face_track()
        i = 0
        while self.camera.isOpened() and not self.quit_flag:
            val, self.image = self.camera.read()
            if val == False: continue
            # ref, frame = capture.read()
            i+=1
            detection = time.time()
            self.image = cv2.resize(self.image, (1280, 720))

            bboxes = detector.detect(self.image) #0.017s
            if len(bboxes) > 0:
                # self.dlib_detect()  # 0.032s
                self.lib_face_det()
                print("+++++++name+++2++++",self.name_dic)
                if self.name_dic:
                    list_bboxs, confirmed_name, statue = track.person_update(bboxes, self.image, self.name_dic, confirmed_name=confirmed_name)
                else:
                    print("无权限--------------------------------------")
                    list_bboxs, confirmed_name, statue = track.person_update(bboxes, self.image, confirmed_name=confirmed_name)
                output_image_frame = track.draw_bboxes(self.image, list_bboxs, line_thickness=None)
                print("start_person_update ==use time====", str(time.time() - start_person))
            else:
                # 如果画面中 没有bbox
                output_image_frame = self.image
            self.get_fps(output_image_frame) #平均12-13帧  
            cv2.imshow('live', output_image_frame)
            print(" ==all  use time====", str(time.time() - detection))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        self.camera.release()
        cv2.destroyAllWindows()

    


def main():
    # cam = cv2.VideoCapture('./video/1.mp4')
    cam = cv2.VideoCapture(0)
    process = detector_service(cam)
    process.detector_camera_person()


if __name__ == '__main__':
    # data = r"E:\workspace\person_tracking\data\face"
    # de = detector_service()
    # de.detector_rtsp_person()
    # de.detector_camera_person()
    main()

为了方便测试我把10种人脸检测的方法，挑选了部分加进去了，有兴趣的朋友可以了解下这篇文章：10种轻量级人脸检测算法的比拼_nihate的博客-优快云博客_轻量级人脸检测

有libface:

import numpy as np
import cv2
import time
import argparse
from itertools import product
from _testcapi import FLT_MIN
# from face_recognize import person_face

class YuNet:
    def __init__(self, inputSize=[320, 320], confThreshold=0.9, nmsThreshold=0.3, topK=5000, keepTopK=750):
        modelPath = r'xxx'
        self._model = cv2.dnn.readNet(modelPath)
        self._inputNames = ''
        self._outputNames = ['loc', 'conf', 'iou']
        self._inputSize = inputSize # [w, h]
        self._confThreshold = confThreshold
        self._nmsThreshold = nmsThreshold
        self._topK = topK
        self._keepTopK = keepTopK
        self._min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
        self._steps = [8, 16, 32, 64]
        self._variance = [0.1, 0.2]
        self.start_time = 0

        # Generate priors
        self._priorGen()

    @property
    def name(self):
        return self.__class__.__name__

    def get_fps(self,img):
        now = time.time()
        time_period = now - self.start_time
        self.fps = 1.0 / time_period
        self.start_time = now
        color = (0,255,0)
        if self.fps < 15:
            color = (0,0,255)
        cv2.putText(img, str(self.fps.__round__(2)), (20, 50), cv2.FONT_HERSHEY_DUPLEX, 1, color)

    def setBackend(self, backend):
        self._model.setPreferableBackend(backend)

    def setTarget(self, target):
        self._model.setPreferableTarget(target)

    def setInputSize(self, input_size):
        self._inputSize = input_size # [w, h]
        # Regenerate priors
        self._priorGen()

    def infer(self, image):
        assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
        assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])

        # Preprocess
        inputBlob = cv2.dnn.blobFromImage(image)

        # Forward
        self._model.setInput(inputBlob, self._inputNames)
        outputBlob = self._model.forward(self._outputNames)

        # Postprocess
        results = self._postprocess(outputBlob)

        return results

    def _postprocess(self, outputBlob):
        # Decode
        dets = self._decode(outputBlob)

        # NMS
        keepIdx = cv2.dnn.NMSBoxes(
            bboxes=dets[:, 0:4].tolist(),
            scores=dets[:, -1].tolist(),
            score_threshold=self._confThreshold,
            nms_threshold=self._nmsThreshold,
            top_k=self._topK
        ) # box_num x class_num
        if len(keepIdx) > 0:
            dets = dets[keepIdx]
            # dets = np.squeeze(dets, axis=1)
            return dets[:self._keepTopK]
        else:
            return np.empty(shape=(0, 15))

    def _priorGen(self):
        w, h = self._inputSize
        feature_map_2th = [int(int((h + 1) / 2) / 2),
                           int(int((w + 1) / 2) / 2)]
        feature_map_3th = [int(feature_map_2th[0] / 2),
                           int(feature_map_2th[1] / 2)]
        feature_map_4th = [int(feature_map_3th[0] / 2),
                           int(feature_map_3th[1] / 2)]
        feature_map_5th = [int(feature_map_4th[0] / 2),
                           int(feature_map_4th[1] / 2)]
        feature_map_6th = [int(feature_map_5th[0] / 2),
                           int(feature_map_5th[1] / 2)]

        feature_maps = [feature_map_3th, feature_map_4th,
                        feature_map_5th, feature_map_6th]

        priors = []
        for k, f in enumerate(feature_maps):
            min_sizes = self._min_sizes[k]
            for i, j in product(range(f[0]), range(f[1])): # i->h, j->w
                for min_size in min_sizes:
                    s_kx = min_size / w
                    s_ky = min_size / h

                    cx = (j + 0.5) * self._steps[k] / w
                    cy = (i + 0.5) * self._steps[k] / h

                    priors.append([cx, cy, s_kx, s_ky])
        self.priors = np.array(priors, dtype=np.float32)

    def _decode(self, outputBlob):
        loc, conf, iou = outputBlob
        # get score
        cls_scores = conf[:, 1]
        iou_scores = iou[:, 0]
        # clamp
        _idx = np.where(iou_scores < 0.)
        iou_scores[_idx] = 0.
        _idx = np.where(iou_scores > 1.)
        iou_scores[_idx] = 1.
        scores = np.sqrt(cls_scores * iou_scores)
        scores = scores[:, np.newaxis]

        scale = np.array(self._inputSize)

        # get bboxes
        bboxes = np.hstack((
            (self.priors[:, 0:2] + loc[:, 0:2] * self._variance[0] * self.priors[:, 2:4]) * scale,
            (self.priors[:, 2:4] * np.exp(loc[:, 2:4] * self._variance)) * scale
        ))
        # (x_c, y_c, w, h) -> (x1, y1, w, h)
        bboxes[:, 0:2] -= bboxes[:, 2:4] / 2

        # get landmarks
        landmarks = np.hstack((
            (self.priors[:, 0:2] + loc[:,  4: 6] * self._variance[0] * self.priors[:, 2:4]) * scale,
            (self.priors[:, 0:2] + loc[:,  6: 8] * self._variance[0] * self.priors[:, 2:4]) * scale,
            (self.priors[:, 0:2] + loc[:,  8:10] * self._variance[0] * self.priors[:, 2:4]) * scale,
            (self.priors[:, 0:2] + loc[:, 10:12] * self._variance[0] * self.priors[:, 2:4]) * scale,
            (self.priors[:, 0:2] + loc[:, 12:14] * self._variance[0] * self.priors[:, 2:4]) * scale
        ))

        dets = np.hstack((bboxes, landmarks, scores))
        return dets

def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
    output = image.copy()
    landmark_color = [
        (255,   0,   0), # right eye
        (  0,   0, 255), # left eye
        (  0, 255,   0), # nose tip
        (255,   0, 255), # right mouth corner
        (  0, 255, 255)  # left mouth corner
    ]

    if fps is not None:
        cv2.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color)

    for det in results:
        bbox = det[0:4].astype(np.int32)
        cv2.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)

        conf = det[-1]
        cv2.putText(output, '{:.4f}'.format(conf), (bbox[0], bbox[1]+12), cv2.FONT_HERSHEY_DUPLEX, 0.5, text_color)

        landmarks = det[4:14].astype(np.int32).reshape((5,2))
        for idx, landmark in enumerate(landmarks):
            cv2.circle(output, landmark, 2, landmark_color[idx], 2)
    return output

class SFace:
    def __init__(self):
        modelPath ='weights/face_detection_yunet.onnx'
        self._model = cv2.dnn.readNet(modelPath)
        self._input_size = [112, 112]
        self._dst = np.array([
            [38.2946, 51.6963],
            [73.5318, 51.5014],
            [56.0252, 71.7366],
            [41.5493, 92.3655],
            [70.7299, 92.2041]
        ], dtype=np.float32)
        self._dst_mean = np.array([56.0262, 71.9008], dtype=np.float32)

    @property
    def name(self):
        return self.__class__.__name__

    def setBackend(self, backend_id):
        self._model.setPreferableBackend(backend_id)

    def setTarget(self, target_id):
        self._model.setPreferableTarget(target_id)

    def _preprocess(self, image, bbox):
        aligned_image = self._alignCrop(image, bbox)
        return cv2.dnn.blobFromImage(aligned_image)

    def infer(self, image, bbox):
        # Preprocess
        # inputBlob = self._preprocess(image, bbox)
        inputBlob = cv2.dnn.blobFromImage(self._alignCrop(image, bbox))
        # Forward
        self._model.setInput(inputBlob)
        outputBlob = self._model.forward()

        # Postprocess
        results = outputBlob / cv2.norm(outputBlob)
        return results

    def match(self, image1, face1, image2, face2, dis_type=0):
        feature1 = self.infer(image1, face1)
        feature2 = self.infer(image2, face2)

        if dis_type == 0: # COSINE
            return np.sum(feature1 * feature2)
        elif dis_type == 1: # NORM_L2
            return cv2.norm(feature1, feature2)
        else:
            raise NotImplementedError()

    def _alignCrop(self, image, face):
        # Retrieve landmarks
        if face.shape[-1] == (4 + 5 * 2):
            landmarks = face[4:].reshape(5, 2)
        else:
            raise NotImplementedError()
        warp_mat = self._getSimilarityTransformMatrix(landmarks)
        aligned_image = cv2.warpAffine(image, warp_mat, self._input_size, flags=cv2.INTER_LINEAR)
        return aligned_image

    def _getSimilarityTransformMatrix(self, src):
        # compute the mean of src and dst
        src_mean = np.array([np.mean(src[:, 0]), np.mean(src[:, 1])], dtype=np.float32)
        dst_mean = np.array([56.0262, 71.9008], dtype=np.float32)
        # subtract the means from src and dst
        src_demean = src.copy()
        src_demean[:, 0] = src_demean[:, 0] - src_mean[0]
        src_demean[:, 1] = src_demean[:, 1] - src_mean[1]
        dst_demean = self._dst.copy()
        dst_demean[:, 0] = dst_demean[:, 0] - dst_mean[0]
        dst_demean[:, 1] = dst_demean[:, 1] - dst_mean[1]

        A = np.array([[0., 0.], [0., 0.]], dtype=np.float64)
        for i in range(5):
            A[0][0] += dst_demean[i][0] * src_demean[i][0]
            A[0][1] += dst_demean[i][0] * src_demean[i][1]
            A[1][0] += dst_demean[i][1] * src_demean[i][0]
            A[1][1] += dst_demean[i][1] * src_demean[i][1]
        A = A / 5

        d = np.array([1.0, 1.0], dtype=np.float64)
        if A[0][0] * A[1][1] - A[0][1] * A[1][0] < 0:
            d[1] = -1

        T = np.array([
            [1.0, 0.0, 0.0],
            [0.0, 1.0, 0.0],
            [0.0, 0.0, 1.0]
        ], dtype=np.float64)

        s, u, vt = cv2.SVDecomp(A)
        smax = s[0][0] if s[0][0] > s[1][0] else s[1][0]
        tol = smax * 2 * FLT_MIN
        rank = int(0)
        if s[0][0] > tol:
            rank += 1
        if s[1][0] > tol:
            rank += 1
        det_u = u[0][0] * u[1][1] - u[0][1] * u[1][0]
        det_vt = vt[0][0] * vt[1][1] - vt[0][1] * vt[1][0]
        if rank == 1:
            if det_u * det_vt > 0:
                uvt = np.matmul(u, vt)
                T[0][0] = uvt[0][0]
                T[0][1] = uvt[0][1]
                T[1][0] = uvt[1][0]
                T[1][1] = uvt[1][1]
            else:
                temp = d[1]
                d[1] = -1
                D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64)
                Dvt = np.matmul(D, vt)
                uDvt = np.matmul(u, Dvt)
                T[0][0] = uDvt[0][0]
                T[0][1] = uDvt[0][1]
                T[1][0] = uDvt[1][0]
                T[1][1] = uDvt[1][1]
                d[1] = temp
        else:
            D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64)
            Dvt = np.matmul(D, vt)
            uDvt = np.matmul(u, Dvt)
            T[0][0] = uDvt[0][0]
            T[0][1] = uDvt[0][1]
            T[1][0] = uDvt[1][0]
            T[1][1] = uDvt[1][1]

        var1 = 0.0
        var2 = 0.0
        for i in range(5):
            var1 += src_demean[i][0] * src_demean[i][0]
            var2 += src_demean[i][1] * src_demean[i][1]
        var1 /= 5
        var2 /= 5

        scale = 1.0 / (var1 + var2) * (s[0][0] * d[0] + s[1][0] * d[1])
        TS = [
            T[0][0] * src_mean[0] + T[0][1] * src_mean[1],
            T[1][0] * src_mean[0] + T[1][1] * src_mean[1]
        ]
        T[0][2] = dst_mean[0] - scale * TS[0]
        T[1][2] = dst_mean[1] - scale * TS[1]
        T[0][0] *= scale
        T[0][1] *= scale
        T[1][0] *= scale
        T[1][1] *= scale
        return np.array([
            [T[0][0], T[0][1], T[0][2]],
            [T[1][0], T[1][1], T[1][2]]
        ], dtype=np.float64)

if __name__=='__main__':
    cap = cv2.VideoCapture(0)
    img1path='telangpu.png'
    img2path='telangpu2.png'
    dis_type = 0 #  0: cosine, 1: norm_l1
    detector = YuNet()
    # recognizer = SFace()
    #摄像头入口人脸检测
    # img2 = cv2.imread(img2path)
    # face2 = detector.infer(img2)
    while True:
        ret, img = cap.read()
        srcimg = cv2.resize(img,(640,480))
        print(srcimg)
        # srcimg_1 = cv2.flip(srcimg,1)
        detector.setInputSize([srcimg.shape[1], srcimg.shape[0]])
        face1 = detector.infer(srcimg)
        for det in face1:
            bbox = det[0:4].astype(np.int32)
            cv2.rectangle(srcimg, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 2)

        # srcimg = visualize(srcimg,face1)
        winName = 'Deep learning object detection in OpenCV'
        detector.get_fps(srcimg)
        cv2.namedWindow(winName, 0)
        cv2.imshow(winName, srcimg)
        # distance = recognizer.match(srcimg_1, face1[0][:-1], img2, face2[0][:-1], dis_type)
        # if dis_type == 0:
        #     dis_type = 'Cosine'
        #     threshold = 0.363
        #     result = 'same identity' if distance >= threshold else 'different identity'
        # else:
        #     dis_type = 'Norm-L2'
        #     threshold = 1.128
        #     result = 'same identity' if distance <= threshold else 'different identity'

        if cv2.waitKey(100) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    # print('Using {} distance, threshold {} :distance{} ,{}.'.format(dis_type, threshold, distance, result))
    print(face1)

有dlib face: 代码部分加下cv2的摄像头读取就可以查看了，就不再写了，自行更改

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 27 11:43:47 2018

@author: Administrator
"""
'''
调用opencv的库实现人脸识别
'''

import time
import cv2,dlib
import numpy as np
import os
import threading
from rtsp import myThread
import config
thread_lock = threading.Lock()
thread_exit=False
from lib_face import YuNet
import shutil
# from camera_catch import camera_service
class person_face():
    def __init__(self, name="y", xml_path=None, root_dir=None):
        self.name =name
        self.root_dir = r"E:\workspace\person_tracking\data\face"
        self.face_dir =os.path.join(self.root_dir,name)
        
        self.detector = dlib.get_frontal_face_detector()
        self.fps = 0  # 帧率

        self.image = None
        self.face_img = None


    def face_detecting(self,img):
        face_location = []
        all_face_location = []
        faces = self.detector(img, 0)
        self.face_num = len(faces)
        if len(faces) != 0:
            self.face_flag = True
            for i, face in enumerate(faces):
                face_location.append(face)
                w, h = (face.right() - face.left()), (face.bottom() - face.top())
                left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4

                all_face_location.append([left, right, top, bottom])

            return face_location, all_face_location
        else:
            self.face_flag = False

        return None

   

if __name__ == '__main__':
    data = r"E:\workspace\person_tracking\data\face"
    serve= person_face()
    serve.face_detecting()

对人脸检测这块就不再赘述：目前方法非常多也很成熟

人脸识别部分：从main_track.py 种可以看出用的是

model = cv2.face.EigenFaceRecognizer_create()

具体的先采集人脸，再测试人脸，检测和识别都再此代码中。：

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 27 11:43:47 2018

@author: Administrator
"""
'''
调用opencv的库实现人脸识别
'''

import time
import cv2,dlib
import numpy as np
import os
import threading
from rtsp import myThread
import config
thread_lock = threading.Lock()
thread_exit=False
from lib_face import YuNet
import shutil
# from camera_catch import camera_service
class person_face():
    def __init__(self, name="aaa", xml_path=None, root_dir=None):
        self.name =name #需要采集的人名
        self.root_dir = r"E:\workspace\person_tracking\data\face"
        self.face_dir =os.path.join(self.root_dir,name)# 采集图像保存地址
        self.lib_face = YuNet()
       
        self.xml_path =r'E:\workspace\person_tracking\data\haarcascade_frontalface_default.xml'
        self.detector = dlib.get_frontal_face_detector()
        self.face_num = 0  # 这一帧的人脸个数
        self.last_face_num = 0  # 上一帧的人脸个数
        self.face_num_change_flag = False  # 当前帧人脸数量变化的标志位，用于后续人脸识别提高帧率
        self.quit_flag = False  # 退出程序标志位
        self.img_num = 0  # 人脸数据文件夹内的图像个数
        self.collect_face_data = True  # 是否进行人脸数据的采集，只有为真时才会进行采集
        self.image = None
        self.face_img = None


    def lib_detect(self,img):
        face_bbox = []
        self.lib_face.setInputSize([img.shape[1], img.shape[0]])
        face1 = self.lib_face.infer(img)
        for det in face1:
            bbox = det[0:4].astype(np.int32)
            face_bbox.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
        return face_bbox

    # 采集自己的人脸数据
    def generator_camera(self):
        '''
        打开摄像头，读取帧，检测该帧图像中的人脸，并进行剪切、缩放
        生成图片满足以下格式：
        1.灰度图，后缀为 .png
        2.图像大小相同
        params:
            data:指定生成的人脸数据的保存路径
        '''

        img_height = 480
        img_width = 720
        capture = cv2.VideoCapture(0)
        if not os.path.exists(self.face_dir):
            os.mkdir(self.face_dir)

 
        while True:
            ref, frame = capture.read()
            #人脸检测器
            # face = self.detector(frame, 0)#dlib
            face = self.lib_detect(frame)#libface
            print(face)
           
            for i,[left, top, right, bottom] in enumerate(face):
                w = int(right-left)
                h = int(bottom-top)
                area = w*h
                print(area)
                if area<600:
                    continue
                    # 在原图上绘制矩形
                    # w, h = (face.right() - face.left()), (face.bottom() - face.top())
                    # left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4
                    # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
                else:
                    cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                    # cv2.rectangle(frame, (face[0], face[1]), (face[2], face[3]), (0, 0, 255), 2)
                    # 调整图像大小
                    # new_frame = cv2.resize(frame[y:y + h, x:x + w], (92, 112))
                    # new_frame = frame[top+2:bottom-2, left+2:right-2]
                    new_frame = cv2.resize(frame[top+2:bottom-2, left+2:right-2], (92, 112))
                    # 保存人脸
                    cv2.imwrite('%s/%s.png' % (self.face_dir, str(time.time())), new_frame)
            cv2.imshow('Dynamic', frame)
            # 按下q键退出
            if cv2.waitKey(100) & 0xff == ord('q'):
                break
        cv2.destroyAllWindows()

    # 载入图像   读取ORL人脸数据库，准备训练数据
    def LoadImages(self):
        '''
        加载图片数据用于训练
        params:
            data:训练数据所在的目录，要求图片尺寸一样
        ret:
            images:[m,height,width]  m为样本数，height为高，width为宽
            names：名字的集合
            labels：标签
        '''
        images = []
        names = []
        labels = []

        label = 0

        # 遍历所有文件夹
        for subdir in os.listdir(self.root_dir):
            subpath = os.path.join(self.root_dir, subdir)
            # print('path',subpath)
            # 判断文件夹是否存在
            if os.path.isdir(subpath):
                # 在每一个文件夹中存放着一个人的许多照片
                names.append(subdir)
                # 遍历文件夹中的图片文件
                for filename in os.listdir(subpath):
                    imgpath = os.path.join(subpath, filename)
                    img = cv2.imread(imgpath, cv2.IMREAD_COLOR)
                    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    # cv2.imshow('1',img)
                    # cv2.waitKey(0)
                    images.append(gray_img)
                    labels.append(label)
                label += 1
        images = np.asarray(images)
        # names=np.asarray(names)
        labels = np.asarray(labels)
        return images, labels, names


    # 检验训练结果
    def FaceRec(self):
        # 加载训练的数据
        X, y, names = self.LoadImages()
        # print('x',X)
        model = cv2.face.EigenFaceRecognizer_create()
        model.train(X, y)

        # 打开摄像头
      
        cap = cv2.VideoCapture(0)
        cv2.namedWindow('Dynamic')
        # 创建级联分类器
        face_casecade = cv2.CascadeClassifier(self.xml_path)
        while (True):
            # 读取一帧图像
            # ret:图像是否读取成功
            # frame：该帧图像
            ret, frame = cap.read()
            frame = cv2.resize(frame,(1280,720))
            # 判断图像是否读取成功
            # print('ret',ret)
            if ret:
                # 转换为灰度图
                gray_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                # 利用级联分类器鉴别人脸
                faces = face_casecade.detectMultiScale(gray_img, 1.3, 5)
                # faces = self.lib_detect(frame)

                # 遍历每一帧图像，画出矩形
                for (x, y, w, h) in faces:
                    frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)  # 蓝色
                    # frame = cv2.rectangle(frame, (x, y), (w,  h), (255, 0, 0), 2)  # 蓝色
                    roi_gray = gray_img[y:y + h, x:x + w]

                    try:
                        # 将图像转换为宽92 高112的图像
                        # resize（原图像，目标大小，（插值方法）interpolation=，）
                        roi_gray = cv2.resize(roi_gray, (92, 112), interpolation=cv2.INTER_LINEAR)
                        params = model.predict(roi_gray)
                        print('Label:%s,confidence:%.2f' % (params[0], params[1]))
                        '''
                        putText:给照片添加文字
                        putText(输入图像，'所需添加的文字'，左上角的坐标，字体，字体大小，颜色，字体粗细)
                        '''
                        cv2.putText(frame, names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)
                    except:
                        continue
                
                cv2.imshow('Dynamic', frame)

                # 按下q键退出
                if cv2.waitKey(100) & 0xff == ord('q'):
                    break
        cap.release()
        cv2.destroyAllWindows()


if __name__ == '__main__':
    data = r"E:\workspace\person_tracking\data\face"
    serve= person_face()
    # LoadImages(data)
    # image = camera_service()
    # generator(image,"ybj")
    serve.generator_camera()
    # serve.FaceRec()

人脸部分到底结束，前面两篇也具体介绍了，此部分为扩展方法。

接下来回到main_track.py中

通过人脸检测和人脸识别，类似于门禁一样先对人脸进行识别，并把身份id和人脸的bbox进行保存，再结合人和人脸直接bbox之间的关系，到deepsort中直接对track_id 进行身份id的赋值：

到deep_sort.py中的25行

 def update(self, bbox_xywh, confidences, ori_img, name_list=None):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        features = self._get_features(bbox_xywh, ori_img, name_list=name_list)
        bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)

        detections = [Detection(bbox_tlwh[i], conf, list(features.values())[i], list(features.keys())[i]) for i, conf in enumerate(confidences) if conf>self.min_confidence]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)
        # output bbox identities
        outputs = []
        out_name = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
            print("==========track.track_id=========:",track.track_id)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2], dtype=np.int))
            out_name.append(track_id)
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        return outputs, out_name

将name dic和bbox的对应关系转为name:bbox的dic 传入features中，根据features中对bbox区域提取特征后再对应为 name:features 的dic

再将name和features 传入Detection类中 Detection中加 self.name = name

本身tracker是对Detection信息做处理，因此在deep_sort/deep_sort/sort/tracker.py中第58行update中备注的地方加入self._initiate_track(detections[detection_idx])，进行身份的更新。

    def update(self, detections):
        """Perform measurement update and track management.

        Parameters
        ----------
        detections : List[deep_root.detection.Detection]
            A list of detections at the current time step.

        """
        # Run matching cascade.
        matches, unmatched_tracks, unmatched_detections = self._match(detections)

        # Update track set.
        for track_idx, detection_idx in matches:
            #添加下行 对每每帧都进行name对track_id的重新赋值
            self._initiate_track(detections[detection_idx])
            self.tracks[track_idx].update(
                self.kf, detections[detection_idx])
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])
        self.tracks = [t for t in self.tracks if not t.is_deleted()]

        # Update distance metric.
        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
        features, targets = [], []
        for track in self.tracks:
            if not track.is_confirmed():
                continue
            features += track.features
            targets += [track.track_id for _ in track.features]
            track.features = []
        self.metric.partial_fit(
            np.asarray(features), np.asarray(targets), active_targets)

def _initiate_track(): 如下

    def _initiate_track(self, detection):
        mean, covariance = self.kf.initiate(detection.to_xyah())
        print("==========detection.name=========",detection.name)
        # if len(name_dic)>=self._next_id:
        #     name = name_dic[self._next_id]
        # else:
        #     name = self._next_id
        # if isinstance(detection.name, int):
        #     detection.name = self._next_id
        self.tracks.append(Track(
            mean, covariance, detection.name, self.n_init, self.max_age,
            detection.feature))
        # self.tracks.append(Track(
        #         mean, covariance, self._next_id, self.n_init, self.max_age,
        #         detection.feature))
        # self._next_id += 1

至此，遍初步完成对身份的定义和跟踪，此方法目前还不完美，有许多bug的地方。但是目前好像还没有在博客中看到做人脸识别+行人跟踪的，还在改进中，希望有大佬能有更好的方法能告知我，万分感谢。