人工智能学习81-Yolo预测类—快手视频
人工智能学习82-Yolo预测类—快手视频
YoLo预测类
Yolo预测类加载预测模型,提供三个预测方法,一是预测图片的方法detect_image(),二是计算每秒帧数的方法get_FPS(),三是预测热成像的方法detect_heatmap()。
Yolo.py类
import colorsys
import os
import time
import numpy as np
from keras import backend as K
from PIL import ImageDraw, ImageFont
from yolo_model import get_yolo_model
from utils import (cvtColor, get_anchors, get_classes, preprocess_input,
resize_image, show_config)
from utils_bbox import DecodeBox
class YOLO(object):
_defaults = {
"model_path": '../model_data/yolo_weights.h5', # 原来是:yolo_weights.h5 , best_epoch_weights.h5
"classes_path": '../model_data/coco_classes.txt', # 原来是:coco_classes.txt , voc_classes.txt
"anchors_path": '../model_data/yolo_anchors.txt',
"anchors_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
"input_shape": [416, 416],
"confidence": 0.5,
"nms_iou": 0.3,
"max_boxes": 100,
"letterbox_image": False,
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults)
for name, value in kwargs.items():
setattr(self, name, value)
self._defaults[name] = value
print("yolo.py __init__ name={},value={}".format(name, value))
self.class_names, self.num_classes = get_classes(self.classes_path)
self.anchors, self.num_anchors = get_anchors(self.anchors_path) # shape(9,2)
hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] # (0.01,1.0,1.0)第一个元素为小数
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
self.input_image_shape = K.placeholder(shape=(2,)) # 输入图片大小替位符
self.sess = K.get_session() # 使用Tensorflow-1.13.0
self.boxes, self.scores, self.classes = self.generate()
show_config(**self._defaults)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
self.yolo_model = get_yolo_model([None, None, 3], self.anchors_mask, self.num_classes)
self.yolo_model.load_weights(self.model_path)
print('装入模型:{} model, anchors, and classes loaded.'.format(model_path))
boxes, scores, classes = DecodeBox(
self.yolo_model.output, # 模型定义的输出,是nets\yolo.py中yolo_body函数中的张量[P5, P4, P3]
self.anchors, # 先验框 [116,90],[156,198],[373,326] [30,61],[62,45],[59,119] [10,13],[16,30],[33,23]
self.num_classes, # 目标分类
self.input_image_shape, # 实际输入图片大小
self.input_shape, # 处理图片大小[416, 416]
anchor_mask=self.anchors_mask, # [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
max_boxes=self.max_boxes,
confidence=self.confidence,
nms_iou=self.nms_iou,
letterbox_image=self.letterbox_image
)
return boxes, scores, classes
def detect_image(self, image, crop=False, count=False):
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
print("K.learning_phase()={}".format(K.learning_phase()))
print("self.yolo_model.input={}".format(self.yolo_model.input))
print("self.input_image_shape={}".format(self.input_image_shape))
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
}
)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='../model_data/simhei.ttf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
if count:
print("top_label:", out_classes)
classes_nums = np.zeros([self.num_classes])
for i in range(self.num_classes):
num = np.sum(out_classes == i)
if num > 0:
print(self.class_names[i], " : ", num)
classes_nums[i] = num
print("classes_nums:", classes_nums)
if crop:
for i, c in list(enumerate(out_boxes)):
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[int(c)]
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image
def get_FPS(self, image, test_interval):
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0})
t1 = time.time()
for _ in range(test_interval):
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0})
t2 = time.time()
tact_time = (t2 - t1) / test_interval
return tact_time
def detect_heatmap(self, image, heatmap_save_path):
import cv2
import matplotlib.pyplot as plt
def sigmoid(x):
y = 1.0 / (1.0 + np.exp(-x))
return y
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
output = self.yolo_model.predict(image_data)
plt.imshow(image, alpha=1)
plt.axis('off')
mask = np.zeros((image.size[1], image.size[0]))
for sub_output in output:
b, h, w, c = np.shape(sub_output)
sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]
score = np.max(sigmoid(sub_output[..., 4]), -1)
score = cv2.resize(score, (image.size[0], image.size[1]))
normed_score = (score * 255).astype('uint8')
mask = np.maximum(mask, normed_score)
plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")
plt.axis('off')
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0)
plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1)
print("Save to the " + heatmap_save_path)
plt.show()
def get_map_txt(self, image_id, image, class_names, map_out_path):
f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
for i, c in enumerate(out_classes):
predicted_class = self.class_names[int(c)]
score = str(out_scores[i])
top, left, bottom, right = out_boxes[i]
if predicted_class not in class_names:
continue
f.write("%s %s %s %s %s %s\n" % (
predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))
f.close()
return
def close_session(self):
self.sess.close()
Yolo预测类
import colorsys
import os
import time
import numpy as np
from keras import backend as K
from PIL import ImageDraw, ImageFont
from yolo_model import get_yolo_model
from utils import (cvtColor, get_anchors, get_classes, preprocess_input,
resize_image, show_config)
from utils_bbox import DecodeBox
class YOLO(object):
_defaults = {
"model_path": '../model_data/yolo_weights.h5', # 原来是:yolo_weights.h5 , best_epoch_weights.h5
"classes_path": '../model_data/coco_classes.txt', # 原来是:coco_classes.txt , voc_classes.txt
"anchors_path": '../model_data/yolo_anchors.txt',
"anchors_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
"input_shape": [416, 416],
"confidence": 0.5,
"nms_iou": 0.3,
"max_boxes": 100,
"letterbox_image": False,
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults)
for name, value in kwargs.items():
setattr(self, name, value)
self._defaults[name] = value
print("yolo.py __init__ name={},value={}".format(name, value))
self.class_names, self.num_classes = get_classes(self.classes_path)
self.anchors, self.num_anchors = get_anchors(self.anchors_path) # shape(9,2)
hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] # (0.01,1.0,1.0)第一个元素为小数
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
self.input_image_shape = K.placeholder(shape=(2,)) # 输入图片大小替位符
self.sess = K.get_session() # 使用Tensorflow-1.13.0
self.boxes, self.scores, self.classes = self.generate()
show_config(**self._defaults)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
self.yolo_model = get_yolo_model([None, None, 3], self.anchors_mask, self.num_classes)
self.yolo_model.load_weights(self.model_path)
print('装入模型:{} model, anchors, and classes loaded.'.format(model_path))
boxes, scores, classes = DecodeBox(
self.yolo_model.output, # 模型定义的输出,是nets\yolo.py中yolo_body函数中的张量[P5, P4, P3]
self.anchors, # 先验框 [116,90],[156,198],[373,326] [30,61],[62,45],[59,119] [10,13],[16,30],[33,23]
self.num_classes, # 目标分类
self.input_image_shape, # 实际输入图片大小
self.input_shape, # 处理图片大小[416, 416]
anchor_mask=self.anchors_mask, # [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
max_boxes=self.max_boxes,
confidence=self.confidence,
nms_iou=self.nms_iou,
letterbox_image=self.letterbox_image
)
return boxes, scores, classes
def detect_image(self, image, crop=False, count=False):
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
print("K.learning_phase()={}".format(K.learning_phase()))
print("self.yolo_model.input={}".format(self.yolo_model.input))
print("self.input_image_shape={}".format(self.input_image_shape))
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
}
)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='../model_data/simhei.ttf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
if count:
print("top_label:", out_classes)
classes_nums = np.zeros([self.num_classes])
for i in range(self.num_classes):
num = np.sum(out_classes == i)
if num > 0:
print(self.class_names[i], " : ", num)
classes_nums[i] = num
print("classes_nums:", classes_nums)
if crop:
for i, c in list(enumerate(out_boxes)):
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[int(c)]
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image
def get_FPS(self, image, test_interval):
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0})
t1 = time.time()
for _ in range(test_interval):
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0})
t2 = time.time()
tact_time = (t2 - t1) / test_interval
return tact_time
def detect_heatmap(self, image, heatmap_save_path):
import cv2
import matplotlib.pyplot as plt
def sigmoid(x):
y = 1.0 / (1.0 + np.exp(-x))
return y
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
output = self.yolo_model.predict(image_data)
plt.imshow(image, alpha=1)
plt.axis('off')
mask = np.zeros((image.size[1], image.size[0]))
for sub_output in output:
b, h, w, c = np.shape(sub_output)
sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]
score = np.max(sigmoid(sub_output[..., 4]), -1)
score = cv2.resize(score, (image.size[0], image.size[1]))
normed_score = (score * 255).astype('uint8')
mask = np.maximum(mask, normed_score)
plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")
plt.axis('off')
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0)
plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1)
print("Save to the " + heatmap_save_path)
plt.show()
def get_map_txt(self, image_id, image, class_names, map_out_path):
f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")
image = cvtColor(image)
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
for i, c in enumerate(out_classes):
predicted_class = self.class_names[int(c)]
score = str(out_scores[i])
top, left, bottom, right = out_boxes[i]
if predicted_class not in class_names:
continue
f.write("%s %s %s %s %s %s\n" % (
predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))
f.close()
return
def close_session(self):
self.sess.close()
代码解释部分
方法generate第59行
方法DecodeBox()返回预测框坐标信息,预测框存在物体置信度,物体分类信息。此方法返回的只是张量,没有返回数据,可以理解只返回了求解预测框坐标信息,物体置信度,物体分类信息的算法,还没有求解数据。
方法detect_image第81行
根据方法DecodeBox() 返回预测框坐标信息,预测框存在物体置信度,物体分类信息的算法,求解真实数据。