前两篇写了人脸识别和yolo+deepsort
这篇就写下两个方法的结合,在保持一定帧率的前提下进行身份识别和跟踪:
代码直接在yolo+deepsort下的main.py 修改,先备份一个main.py 重命名为 main_track.py:
import numpy as np
import time
from tracker import face_track
from detector import Detector
from lib_face import *
import cv2
from rtsp import myThread
from face_recognize import person_face
import threading
from lib_face import YuNet
thread_lock = threading.Lock()
thread_exit = False
start_tracking = True
class detector_service(person_face):
def __init__(self,cam=None):
super(detector_service, self).__init__()
# self.loadface = person_face()
self.camera = cam
self.lib_face = YuNet()
X, y, self.names = self.LoadImages()
# print('x',X)
self.ontrack = True
self.start_time = 0
self.model = cv2.face.EigenFaceRecognizer_create()
self.model.train(X, y)
self.name_dic = {}
self.image = None
# self.face_casecade = cv2.CascadeClassifier(r'E:\workspace\person_tracking\data\haarcascade_frontalface_default.xml')
def detect_face(self,img):
faces = self.face_casecade.detectMultiScale(img, 1.3, 5)
for (x, y, w, h) in faces:
frame = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) # 蓝色
roi_gray = img[y:y + h, x:x + w]
try:
# 将图像转换为宽92 高112的图像
# resize(原图像,目标大小,(插值方法)interpolation=,)
roi_gray = cv2.resize(roi_gray, (92, 112), interpolation=cv2.INTER_LINEAR)
params = self.model.predict(roi_gray)
print('Label:%s,confidence:%.2f' % (params[0], params[1]))
'''
putText:给照片添加文字
putText(输入图像,'所需添加的文字',左上角的坐标,字体,字体大小,颜色,字体粗细)
'''
# cv2.putText(frame, self.names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)
if self.names[params[0]] not in self.name_dic.keys():
self.name_dic[self.names[params[0]]] = [x, y, x + w, y + h]
except:
continue
# return frame
def dlib_detect(self):
res = self.face_detecting(self.image) # 0.032s
if res is not None:
face, self.all_face_location = res
for i in range(self.face_num):
try:
[left, right, top, bottom] = self.all_face_location[i]
self.face_img = self.image[top:bottom, left:right]
gray_img = cv2.cvtColor(self.face_img, cv2.COLOR_BGR2GRAY)
roi_gray = cv2.resize(gray_img, (92, 112), interpolation=cv2.INTER_LINEAR)
params = self.model.predict(roi_gray)
if self.names[params[0]] not in self.name_dic.keys():
self.name_dic[self.names[params[0]]] = [left, top, right, bottom]
except:
continue
def lib_face_det(self):
self.lib_face.setInputSize([self.image.shape[1], self.image.shape[0]])
face1 = self.lib_face.infer(self.image)
for det in face1:
try:
bbox = det[0:4].astype(np.int32)
if int(bbox[2])*int(bbox[3])>2000:
print("xxxxxxxxx",bbox[0], bbox[1], bbox[2], bbox[3])
self.face_img = self.image[bbox[1]: bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]]
gray_img = cv2.cvtColor(self.face_img, cv2.COLOR_BGR2GRAY)
roi_gray = cv2.resize(gray_img, (92, 112), interpolation=cv2.INTER_LINEAR)
params = self.model.predict(roi_gray)
if self.names[params[0]] not in self.name_dic.keys():
self.name_dic[self.names[params[0]]] = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
except:
continue
def detector_camera_person(self):
global start_tracking
detector = Detector()
track = face_track()
i = 0
while self.camera.isOpened() and not self.quit_flag:
val, self.image = self.camera.read()
if val == False: continue
# ref, frame = capture.read()
i+=1
detection = time.time()
self.image = cv2.resize(self.image, (1280, 720))
bboxes = detector.detect(self.image) #0.017s
if len(bboxes) > 0:
# self.dlib_detect() # 0.032s
self.lib_face_det()
print("+++++++name+++2++++",self.name_dic)
if self.name_dic:
list_bboxs, confirmed_name, statue = track.person_update(bboxes, self.image, self.name_dic, confirmed_name=confirmed_name)
else:
print("无权限--------------------------------------")
list_bboxs, confirmed_name, statue = track.person_update(bboxes, self.image, confirmed_name=confirmed_name)
output_image_frame = track.draw_bboxes(self.image, list_bboxs, line_thickness=None)
print("start_person_update ==use time====", str(time.time() - start_person))
else:
# 如果画面中 没有bbox
output_image_frame = self.image
self.get_fps(output_image_frame) #平均12-13帧
cv2.imshow('live', output_image_frame)
print(" ==all use time====", str(time.time() - detection))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
self.camera.release()
cv2.destroyAllWindows()
def main():
# cam = cv2.VideoCapture('./video/1.mp4')
cam = cv2.VideoCapture(0)
process = detector_service(cam)
process.detector_camera_person()
if __name__ == '__main__':
# data = r"E:\workspace\person_tracking\data\face"
# de = detector_service()
# de.detector_rtsp_person()
# de.detector_camera_person()
main()
为了方便测试 我把10种人脸检测的方法,挑选了部分加进去了,有兴趣的朋友可以了解下这篇文章:10种轻量级人脸检测算法的比拼_nihate的博客-优快云博客_轻量级人脸检测
有libface:
import numpy as np
import cv2
import time
import argparse
from itertools import product
from _testcapi import FLT_MIN
# from face_recognize import person_face
class YuNet:
def __init__(self, inputSize=[320, 320], confThreshold=0.9, nmsThreshold=0.3, topK=5000, keepTopK=750):
modelPath = r'xxx'
self._model = cv2.dnn.readNet(modelPath)
self._inputNames = ''
self._outputNames = ['loc', 'conf', 'iou']
self._inputSize = inputSize # [w, h]
self._confThreshold = confThreshold
self._nmsThreshold = nmsThreshold
self._topK = topK
self._keepTopK = keepTopK
self._min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
self._steps = [8, 16, 32, 64]
self._variance = [0.1, 0.2]
self.start_time = 0
# Generate priors
self._priorGen()
@property
def name(self):
return self.__class__.__name__
def get_fps(self,img):
now = time.time()
time_period = now - self.start_time
self.fps = 1.0 / time_period
self.start_time = now
color = (0,255,0)
if self.fps < 15:
color = (0,0,255)
cv2.putText(img, str(self.fps.__round__(2)), (20, 50), cv2.FONT_HERSHEY_DUPLEX, 1, color)
def setBackend(self, backend):
self._model.setPreferableBackend(backend)
def setTarget(self, target):
self._model.setPreferableTarget(target)
def setInputSize(self, input_size):
self._inputSize = input_size # [w, h]
# Regenerate priors
self._priorGen()
def infer(self, image):
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
# Preprocess
inputBlob = cv2.dnn.blobFromImage(image)
# Forward
self._model.setInput(inputBlob, self._inputNames)
outputBlob = self._model.forward(self._outputNames)
# Postprocess
results = self._postprocess(outputBlob)
return results
def _postprocess(self, outputBlob):
# Decode
dets = self._decode(outputBlob)
# NMS
keepIdx = cv2.dnn.NMSBoxes(
bboxes=dets[:, 0:4].tolist(),
scores=dets[:, -1].tolist(),
score_threshold=self._confThreshold,
nms_threshold=self._nmsThreshold,
top_k=self._topK
) # box_num x class_num
if len(keepIdx) > 0:
dets = dets[keepIdx]
# dets = np.squeeze(dets, axis=1)
return dets[:self._keepTopK]
else:
return np.empty(shape=(0, 15))
def _priorGen(self):
w, h = self._inputSize
feature_map_2th = [int(int((h + 1) / 2) / 2),
int(int((w + 1) / 2) / 2)]
feature_map_3th = [int(feature_map_2th[0] / 2),
int(feature_map_2th[1] / 2)]
feature_map_4th = [int(feature_map_3th[0] / 2),
int(feature_map_3th[1] / 2)]
feature_map_5th = [int(feature_map_4th[0] / 2),
int(feature_map_4th[1] / 2)]
feature_map_6th = [int(feature_map_5th[0] / 2),
int(feature_map_5th[1] / 2)]
feature_maps = [feature_map_3th, feature_map_4th,
feature_map_5th, feature_map_6th]
priors = []
for k, f in enumerate(feature_maps):
min_sizes = self._min_sizes[k]
for i, j in product(range(f[0]), range(f[1])): # i->h, j->w
for min_size in min_sizes:
s_kx = min_size / w
s_ky = min_size / h
cx = (j + 0.5) * self._steps[k] / w
cy = (i + 0.5) * self._steps[k] / h
priors.append([cx, cy, s_kx, s_ky])
self.priors = np.array(priors, dtype=np.float32)
def _decode(self, outputBlob):
loc, conf, iou = outputBlob
# get score
cls_scores = conf[:, 1]
iou_scores = iou[:, 0]
# clamp
_idx = np.where(iou_scores < 0.)
iou_scores[_idx] = 0.
_idx = np.where(iou_scores > 1.)
iou_scores[_idx] = 1.
scores = np.sqrt(cls_scores * iou_scores)
scores = scores[:, np.newaxis]
scale = np.array(self._inputSize)
# get bboxes
bboxes = np.hstack((
(self.priors[:, 0:2] + loc[:, 0:2] * self._variance[0] * self.priors[:, 2:4]) * scale,
(self.priors[:, 2:4] * np.exp(loc[:, 2:4] * self._variance)) * scale
))
# (x_c, y_c, w, h) -> (x1, y1, w, h)
bboxes[:, 0:2] -= bboxes[:, 2:4] / 2
# get landmarks
landmarks = np.hstack((
(self.priors[:, 0:2] + loc[:, 4: 6] * self._variance[0] * self.priors[:, 2:4]) * scale,
(self.priors[:, 0:2] + loc[:, 6: 8] * self._variance[0] * self.priors[:, 2:4]) * scale,
(self.priors[:, 0:2] + loc[:, 8:10] * self._variance[0] * self.priors[:, 2:4]) * scale,
(self.priors[:, 0:2] + loc[:, 10:12] * self._variance[0] * self.priors[:, 2:4]) * scale,
(self.priors[:, 0:2] + loc[:, 12:14] * self._variance[0] * self.priors[:, 2:4]) * scale
))
dets = np.hstack((bboxes, landmarks, scores))
return dets
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
output = image.copy()
landmark_color = [
(255, 0, 0), # right eye
( 0, 0, 255), # left eye
( 0, 255, 0), # nose tip
(255, 0, 255), # right mouth corner
( 0, 255, 255) # left mouth corner
]
if fps is not None:
cv2.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
for det in results:
bbox = det[0:4].astype(np.int32)
cv2.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)
conf = det[-1]
cv2.putText(output, '{:.4f}'.format(conf), (bbox[0], bbox[1]+12), cv2.FONT_HERSHEY_DUPLEX, 0.5, text_color)
landmarks = det[4:14].astype(np.int32).reshape((5,2))
for idx, landmark in enumerate(landmarks):
cv2.circle(output, landmark, 2, landmark_color[idx], 2)
return output
class SFace:
def __init__(self):
modelPath ='weights/face_detection_yunet.onnx'
self._model = cv2.dnn.readNet(modelPath)
self._input_size = [112, 112]
self._dst = np.array([
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
], dtype=np.float32)
self._dst_mean = np.array([56.0262, 71.9008], dtype=np.float32)
@property
def name(self):
return self.__class__.__name__
def setBackend(self, backend_id):
self._model.setPreferableBackend(backend_id)
def setTarget(self, target_id):
self._model.setPreferableTarget(target_id)
def _preprocess(self, image, bbox):
aligned_image = self._alignCrop(image, bbox)
return cv2.dnn.blobFromImage(aligned_image)
def infer(self, image, bbox):
# Preprocess
# inputBlob = self._preprocess(image, bbox)
inputBlob = cv2.dnn.blobFromImage(self._alignCrop(image, bbox))
# Forward
self._model.setInput(inputBlob)
outputBlob = self._model.forward()
# Postprocess
results = outputBlob / cv2.norm(outputBlob)
return results
def match(self, image1, face1, image2, face2, dis_type=0):
feature1 = self.infer(image1, face1)
feature2 = self.infer(image2, face2)
if dis_type == 0: # COSINE
return np.sum(feature1 * feature2)
elif dis_type == 1: # NORM_L2
return cv2.norm(feature1, feature2)
else:
raise NotImplementedError()
def _alignCrop(self, image, face):
# Retrieve landmarks
if face.shape[-1] == (4 + 5 * 2):
landmarks = face[4:].reshape(5, 2)
else:
raise NotImplementedError()
warp_mat = self._getSimilarityTransformMatrix(landmarks)
aligned_image = cv2.warpAffine(image, warp_mat, self._input_size, flags=cv2.INTER_LINEAR)
return aligned_image
def _getSimilarityTransformMatrix(self, src):
# compute the mean of src and dst
src_mean = np.array([np.mean(src[:, 0]), np.mean(src[:, 1])], dtype=np.float32)
dst_mean = np.array([56.0262, 71.9008], dtype=np.float32)
# subtract the means from src and dst
src_demean = src.copy()
src_demean[:, 0] = src_demean[:, 0] - src_mean[0]
src_demean[:, 1] = src_demean[:, 1] - src_mean[1]
dst_demean = self._dst.copy()
dst_demean[:, 0] = dst_demean[:, 0] - dst_mean[0]
dst_demean[:, 1] = dst_demean[:, 1] - dst_mean[1]
A = np.array([[0., 0.], [0., 0.]], dtype=np.float64)
for i in range(5):
A[0][0] += dst_demean[i][0] * src_demean[i][0]
A[0][1] += dst_demean[i][0] * src_demean[i][1]
A[1][0] += dst_demean[i][1] * src_demean[i][0]
A[1][1] += dst_demean[i][1] * src_demean[i][1]
A = A / 5
d = np.array([1.0, 1.0], dtype=np.float64)
if A[0][0] * A[1][1] - A[0][1] * A[1][0] < 0:
d[1] = -1
T = np.array([
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0]
], dtype=np.float64)
s, u, vt = cv2.SVDecomp(A)
smax = s[0][0] if s[0][0] > s[1][0] else s[1][0]
tol = smax * 2 * FLT_MIN
rank = int(0)
if s[0][0] > tol:
rank += 1
if s[1][0] > tol:
rank += 1
det_u = u[0][0] * u[1][1] - u[0][1] * u[1][0]
det_vt = vt[0][0] * vt[1][1] - vt[0][1] * vt[1][0]
if rank == 1:
if det_u * det_vt > 0:
uvt = np.matmul(u, vt)
T[0][0] = uvt[0][0]
T[0][1] = uvt[0][1]
T[1][0] = uvt[1][0]
T[1][1] = uvt[1][1]
else:
temp = d[1]
d[1] = -1
D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64)
Dvt = np.matmul(D, vt)
uDvt = np.matmul(u, Dvt)
T[0][0] = uDvt[0][0]
T[0][1] = uDvt[0][1]
T[1][0] = uDvt[1][0]
T[1][1] = uDvt[1][1]
d[1] = temp
else:
D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64)
Dvt = np.matmul(D, vt)
uDvt = np.matmul(u, Dvt)
T[0][0] = uDvt[0][0]
T[0][1] = uDvt[0][1]
T[1][0] = uDvt[1][0]
T[1][1] = uDvt[1][1]
var1 = 0.0
var2 = 0.0
for i in range(5):
var1 += src_demean[i][0] * src_demean[i][0]
var2 += src_demean[i][1] * src_demean[i][1]
var1 /= 5
var2 /= 5
scale = 1.0 / (var1 + var2) * (s[0][0] * d[0] + s[1][0] * d[1])
TS = [
T[0][0] * src_mean[0] + T[0][1] * src_mean[1],
T[1][0] * src_mean[0] + T[1][1] * src_mean[1]
]
T[0][2] = dst_mean[0] - scale * TS[0]
T[1][2] = dst_mean[1] - scale * TS[1]
T[0][0] *= scale
T[0][1] *= scale
T[1][0] *= scale
T[1][1] *= scale
return np.array([
[T[0][0], T[0][1], T[0][2]],
[T[1][0], T[1][1], T[1][2]]
], dtype=np.float64)
if __name__=='__main__':
cap = cv2.VideoCapture(0)
img1path='telangpu.png'
img2path='telangpu2.png'
dis_type = 0 # 0: cosine, 1: norm_l1
detector = YuNet()
# recognizer = SFace()
#摄像头入口人脸检测
# img2 = cv2.imread(img2path)
# face2 = detector.infer(img2)
while True:
ret, img = cap.read()
srcimg = cv2.resize(img,(640,480))
print(srcimg)
# srcimg_1 = cv2.flip(srcimg,1)
detector.setInputSize([srcimg.shape[1], srcimg.shape[0]])
face1 = detector.infer(srcimg)
for det in face1:
bbox = det[0:4].astype(np.int32)
cv2.rectangle(srcimg, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 2)
# srcimg = visualize(srcimg,face1)
winName = 'Deep learning object detection in OpenCV'
detector.get_fps(srcimg)
cv2.namedWindow(winName, 0)
cv2.imshow(winName, srcimg)
# distance = recognizer.match(srcimg_1, face1[0][:-1], img2, face2[0][:-1], dis_type)
# if dis_type == 0:
# dis_type = 'Cosine'
# threshold = 0.363
# result = 'same identity' if distance >= threshold else 'different identity'
# else:
# dis_type = 'Norm-L2'
# threshold = 1.128
# result = 'same identity' if distance <= threshold else 'different identity'
if cv2.waitKey(100) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# print('Using {} distance, threshold {} :distance{} ,{}.'.format(dis_type, threshold, distance, result))
print(face1)
有dlib face: 代码部分加下cv2的摄像头读取就可以查看了,就不再写了,自行更改
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 27 11:43:47 2018
@author: Administrator
"""
'''
调用opencv的库实现人脸识别
'''
import time
import cv2,dlib
import numpy as np
import os
import threading
from rtsp import myThread
import config
thread_lock = threading.Lock()
thread_exit=False
from lib_face import YuNet
import shutil
# from camera_catch import camera_service
class person_face():
def __init__(self, name="y", xml_path=None, root_dir=None):
self.name =name
self.root_dir = r"E:\workspace\person_tracking\data\face"
self.face_dir =os.path.join(self.root_dir,name)
self.detector = dlib.get_frontal_face_detector()
self.fps = 0 # 帧率
self.image = None
self.face_img = None
def face_detecting(self,img):
face_location = []
all_face_location = []
faces = self.detector(img, 0)
self.face_num = len(faces)
if len(faces) != 0:
self.face_flag = True
for i, face in enumerate(faces):
face_location.append(face)
w, h = (face.right() - face.left()), (face.bottom() - face.top())
left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4
all_face_location.append([left, right, top, bottom])
return face_location, all_face_location
else:
self.face_flag = False
return None
if __name__ == '__main__':
data = r"E:\workspace\person_tracking\data\face"
serve= person_face()
serve.face_detecting()
对人脸检测这块就不再赘述:目前方法非常多也很成熟
人脸识别部分:从main_track.py 种可以看出用的是
model = cv2.face.EigenFaceRecognizer_create()
具体的先采集人脸,再测试人脸,检测和识别都再此代码中。:
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 27 11:43:47 2018
@author: Administrator
"""
'''
调用opencv的库实现人脸识别
'''
import time
import cv2,dlib
import numpy as np
import os
import threading
from rtsp import myThread
import config
thread_lock = threading.Lock()
thread_exit=False
from lib_face import YuNet
import shutil
# from camera_catch import camera_service
class person_face():
def __init__(self, name="aaa", xml_path=None, root_dir=None):
self.name =name #需要采集的人名
self.root_dir = r"E:\workspace\person_tracking\data\face"
self.face_dir =os.path.join(self.root_dir,name)# 采集图像保存地址
self.lib_face = YuNet()
self.xml_path =r'E:\workspace\person_tracking\data\haarcascade_frontalface_default.xml'
self.detector = dlib.get_frontal_face_detector()
self.face_num = 0 # 这一帧的人脸个数
self.last_face_num = 0 # 上一帧的人脸个数
self.face_num_change_flag = False # 当前帧人脸数量变化的标志位,用于后续人脸识别提高帧率
self.quit_flag = False # 退出程序标志位
self.img_num = 0 # 人脸数据文件夹内的图像个数
self.collect_face_data = True # 是否进行人脸数据的采集,只有为真时才会进行采集
self.image = None
self.face_img = None
def lib_detect(self,img):
face_bbox = []
self.lib_face.setInputSize([img.shape[1], img.shape[0]])
face1 = self.lib_face.infer(img)
for det in face1:
bbox = det[0:4].astype(np.int32)
face_bbox.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
return face_bbox
# 采集自己的人脸数据
def generator_camera(self):
'''
打开摄像头,读取帧,检测该帧图像中的人脸,并进行剪切、缩放
生成图片满足以下格式:
1.灰度图,后缀为 .png
2.图像大小相同
params:
data:指定生成的人脸数据的保存路径
'''
img_height = 480
img_width = 720
capture = cv2.VideoCapture(0)
if not os.path.exists(self.face_dir):
os.mkdir(self.face_dir)
while True:
ref, frame = capture.read()
#人脸检测器
# face = self.detector(frame, 0)#dlib
face = self.lib_detect(frame)#libface
print(face)
for i,[left, top, right, bottom] in enumerate(face):
w = int(right-left)
h = int(bottom-top)
area = w*h
print(area)
if area<600:
continue
# 在原图上绘制矩形
# w, h = (face.right() - face.left()), (face.bottom() - face.top())
# left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4
# cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
else:
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# cv2.rectangle(frame, (face[0], face[1]), (face[2], face[3]), (0, 0, 255), 2)
# 调整图像大小
# new_frame = cv2.resize(frame[y:y + h, x:x + w], (92, 112))
# new_frame = frame[top+2:bottom-2, left+2:right-2]
new_frame = cv2.resize(frame[top+2:bottom-2, left+2:right-2], (92, 112))
# 保存人脸
cv2.imwrite('%s/%s.png' % (self.face_dir, str(time.time())), new_frame)
cv2.imshow('Dynamic', frame)
# 按下q键退出
if cv2.waitKey(100) & 0xff == ord('q'):
break
cv2.destroyAllWindows()
# 载入图像 读取ORL人脸数据库,准备训练数据
def LoadImages(self):
'''
加载图片数据用于训练
params:
data:训练数据所在的目录,要求图片尺寸一样
ret:
images:[m,height,width] m为样本数,height为高,width为宽
names:名字的集合
labels:标签
'''
images = []
names = []
labels = []
label = 0
# 遍历所有文件夹
for subdir in os.listdir(self.root_dir):
subpath = os.path.join(self.root_dir, subdir)
# print('path',subpath)
# 判断文件夹是否存在
if os.path.isdir(subpath):
# 在每一个文件夹中存放着一个人的许多照片
names.append(subdir)
# 遍历文件夹中的图片文件
for filename in os.listdir(subpath):
imgpath = os.path.join(subpath, filename)
img = cv2.imread(imgpath, cv2.IMREAD_COLOR)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow('1',img)
# cv2.waitKey(0)
images.append(gray_img)
labels.append(label)
label += 1
images = np.asarray(images)
# names=np.asarray(names)
labels = np.asarray(labels)
return images, labels, names
# 检验训练结果
def FaceRec(self):
# 加载训练的数据
X, y, names = self.LoadImages()
# print('x',X)
model = cv2.face.EigenFaceRecognizer_create()
model.train(X, y)
# 打开摄像头
cap = cv2.VideoCapture(0)
cv2.namedWindow('Dynamic')
# 创建级联分类器
face_casecade = cv2.CascadeClassifier(self.xml_path)
while (True):
# 读取一帧图像
# ret:图像是否读取成功
# frame:该帧图像
ret, frame = cap.read()
frame = cv2.resize(frame,(1280,720))
# 判断图像是否读取成功
# print('ret',ret)
if ret:
# 转换为灰度图
gray_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 利用级联分类器鉴别人脸
faces = face_casecade.detectMultiScale(gray_img, 1.3, 5)
# faces = self.lib_detect(frame)
# 遍历每一帧图像,画出矩形
for (x, y, w, h) in faces:
frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) # 蓝色
# frame = cv2.rectangle(frame, (x, y), (w, h), (255, 0, 0), 2) # 蓝色
roi_gray = gray_img[y:y + h, x:x + w]
try:
# 将图像转换为宽92 高112的图像
# resize(原图像,目标大小,(插值方法)interpolation=,)
roi_gray = cv2.resize(roi_gray, (92, 112), interpolation=cv2.INTER_LINEAR)
params = model.predict(roi_gray)
print('Label:%s,confidence:%.2f' % (params[0], params[1]))
'''
putText:给照片添加文字
putText(输入图像,'所需添加的文字',左上角的坐标,字体,字体大小,颜色,字体粗细)
'''
cv2.putText(frame, names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)
except:
continue
cv2.imshow('Dynamic', frame)
# 按下q键退出
if cv2.waitKey(100) & 0xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
data = r"E:\workspace\person_tracking\data\face"
serve= person_face()
# LoadImages(data)
# image = camera_service()
# generator(image,"ybj")
serve.generator_camera()
# serve.FaceRec()
人脸部分到底结束,前面两篇也具体介绍了,此部分为扩展方法。
接下来回到main_track.py中
通过人脸检测和人脸识别,类似于门禁一样先对人脸进行识别,并把身份id和人脸的bbox进行保存,再结合人和人脸直接bbox之间的关系,到deepsort中直接对track_id 进行身份id的赋值:
到deep_sort.py中的25行
def update(self, bbox_xywh, confidences, ori_img, name_list=None):
self.height, self.width = ori_img.shape[:2]
# generate detections
features = self._get_features(bbox_xywh, ori_img, name_list=name_list)
bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
detections = [Detection(bbox_tlwh[i], conf, list(features.values())[i], list(features.keys())[i]) for i, conf in enumerate(confidences) if conf>self.min_confidence]
# run on non-maximum supression
boxes = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
detections = [detections[i] for i in indices]
# update tracker
self.tracker.predict()
self.tracker.update(detections)
# output bbox identities
outputs = []
out_name = []
for track in self.tracker.tracks:
if not track.is_confirmed() or track.time_since_update > 1:
continue
box = track.to_tlwh()
x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
print("==========track.track_id=========:",track.track_id)
track_id = track.track_id
outputs.append(np.array([x1, y1, x2, y2], dtype=np.int))
out_name.append(track_id)
if len(outputs) > 0:
outputs = np.stack(outputs, axis=0)
return outputs, out_name
将name dic和bbox的对应关系转为name:bbox的dic 传入features中,根据features中对bbox区域提取特征后再对应为 name:features 的dic
再将name和features 传入Detection类中 Detection中加 self.name = name
本身tracker是对Detection信息 做处理,因此在deep_sort/deep_sort/sort/tracker.py中第58行update中 备注的地方加入self._initiate_track(detections[detection_idx]),进行身份的更新。
def update(self, detections):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_root.detection.Detection]
A list of detections at the current time step.
"""
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
#添加下行 对每每帧都进行name对track_id的重新赋值
self._initiate_track(detections[detection_idx])
self.tracks[track_idx].update(
self.kf, detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
def _initiate_track(): 如下
def _initiate_track(self, detection):
mean, covariance = self.kf.initiate(detection.to_xyah())
print("==========detection.name=========",detection.name)
# if len(name_dic)>=self._next_id:
# name = name_dic[self._next_id]
# else:
# name = self._next_id
# if isinstance(detection.name, int):
# detection.name = self._next_id
self.tracks.append(Track(
mean, covariance, detection.name, self.n_init, self.max_age,
detection.feature))
# self.tracks.append(Track(
# mean, covariance, self._next_id, self.n_init, self.max_age,
# detection.feature))
# self._next_id += 1
至此,遍初步完成对身份的定义和跟踪,此方法目前还不完美,有许多bug的地方。但是目前好像还没有在博客中看到 做人脸识别+行人跟踪的,还在改进中,希望有大佬能有更好的方法能告知我,万分感谢。