前言
本篇文章主要解读YOLO的代码,以YOLOV3为基础到YOLOv4。关于目标检测,YOLO更多信息,请参考:目标检测综述、目标检测yolo系列、深度学习 目标检测Yolo算法代码的实现、目标检测Anchor Free:CenterNet。本篇博客主要从大体上介绍YOLO,更多细节内容可以参考深度学习 目标检测Yolo算法代码的实现。
数据集
数据集的目录结构如下:
├── dataset
│ ├── Annotations
│ ├── ImageSets
│ └── JPEGImages
数据集下包含三个文件夹,分别为:Annotations
、ImageSets
和JPEGImages
。JPEGImages
存放的是图像文件,以.jpg
格式保存;Annotation
存放的是标注信息的xml
文件,记录对应图像的目标位置框、目标分类等;ImageSets
存放的是数据集划分的txt文件,包含训练集、测试集以及验证集的划分。划分数据集的代码为:
from absl import app, flags, logging
from absl.flags import FLAGS
import os
from typing import List, Any
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
base_path = os.path.join(os.getcwd(), 'village')
flags.DEFINE_string('jsonpath', '', 'path to label(*.json)')
flags.DEFINE_bool('isJSON', False, 'convert jsonFile or not')
flags.DEFINE_string('savepath',base_path, 'path to save directory')
flags.DEFINE_bool('Test', True, 'Create test set or not')
def main(_argv):
# 1.标签路径
labelme_path = FLAGS.jsonpath
#原始labelme标注数据路径
saved_path = FLAGS.savepath+'/'
# 保存路径
isUseTest=True#是否创建test集
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
os.makedirs(saved_path + "ImageSets/Main/")
# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\","/").split("/")[-1].split(".json")[0] for i in files]
if FLAGS.isJSON:
# 4.读取标注信息并写入 xml
for json_file_ in files:
json_filename = labelme_path + json_file_ + ".json"
json_file = json.load(open(json_filename, "r", encoding="utf-8"))
height, width, channels = cv2.imread('labelmedataset/images/' + json_file_ + ".jpg").shape
with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
xml.write('<annotation>\n')
xml.write('\t<folder>' + 'AI' + '</folder>\n')
xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
xml.write('\t<source>\n')
xml.write('\t\t<database>AGCIMAI</database>\n')
xml.write('\t</source>\n')
xml.write('\t<size>\n')
xml.write('\t\t<width>' + str(width) + '</width>\n')
xml.write('\t\t<height>' + str(height) + '</height>\n')
xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
xml.write('\t</size>\n')
xml.write('\t\t<segmented>0</segmented>\n')
for multi in json_file["shapes"]:
points = np.array(multi["points"])
labelName=multi["label"]
xmin = min(points[:, 0])
xmax = max(points[:, 0])
ymin = min(points[:, 1])
ymax = max(points[:, 1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t<object>\n')
xml.write('\t\t<name>' + labelName+ '</name>\n')
xml.write('\t\t<pose>Unspecified</pose>\n')
xml.write('\t\t<truncated>1</truncated>\n')
xml.write('\t\t<difficult>0</difficult>\n')
xml.write('\t\t<bndbox>\n')
xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n')
xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n')
xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n')
xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n')
xml.write('\t\t</bndbox>\n')
xml.write('\t</object>\n')
print(json_filename, xmin, ymin, xmax, ymax, label)
xml.write('</annotation>')
# 5.复制图片到 VOC2007/JPEGImages/下
# image_files = glob("labelmedataset/images/" + "*.jpg")
# print("copy image files to VOC007/JPEGImages/")
# for image in image_files:
# shutil.copy(image, saved_path + "JPEGImages/")
# 6.split files for txt
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')
total_files = glob(saved_path+"/Annotations/*.xml")
total_files = [i.replace("\\","/").split("/")[-1].split(".xml")[0] for i in total_files]
trainval_files=[]
test_files=[]
if isUseTest:
trainval_files, test_files = train_test_split(total_files, test_size=0.1, random_state=55)
else:
trainval_files=total_files
for file in trainval_files:
ftrainval.write(file + "\n")
# split
train_files, val_files = train_test_split(trainval_files, test_size=0.2, random_state=55)
# train
for file in train_files:
ftrain.write(file + "\n")
# val
for file in val_files:
fval.write(file + "\n")
for file in test_files:
ftest.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
Yolov3
YOLOV3的网络架构图如下:
Yolov3的backbone为Darknet-53,没有全连接层。Darknet-53包含DBL模块以及resn模块。DBL模块根据上图网络结构由卷积层+BN层+Leaky relu模型组成。经过Backbone层后,通过类似FPN,获取不同尺度的待检测featuremap。待检测的feature maps经过检测头得到最终的结果。
模型搭建
根据上面的图,模型搭建的代码非常简单。
Darknet
def Darknet(name=None):
x = inputs = Input([None, None, 3])
x = DarknetConv(x, 32, 3)
x = DarknetBlock(x, 64, 1)
x = DarknetBlock(x, 128, 2) # skip connection
x = x_36 = DarknetBlock(x, 256, 8) # skip connection
x = x_61 = DarknetBlock(x, 512, 8)
x = DarknetBlock(x, 1024, 4)
return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
左下角的Darknetnetconv2D_BN_Leaky
实现:
def DarknetConv(x, filters, size, strides=1, batch_norm=True):
if strides == 1:
padding = 'same'
else:
x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-padding
padding = 'valid'
x = Conv2D(filters=filters, kernel_size=size,
strides=strides, padding=padding,
use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
if batch_norm:
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.1)(x)
return x
Resblock_body
实现,ResX表示有X个残差模块:
def DarknetBlock(x, filters, blocks):
x = DarknetConv(x, filters, 3, strides=2)
for _ in range(blocks):
x = DarknetResidual(x, filters)
return x
Neck
Neck部分主要与backbone生成的feature map与不同尺度下的feature map进行caoncat,得到最终待检测的feature map。从图可以看出Neck部分包括DarknetConv
(上面已给出)、上采样、concat和卷积操作,具体代码如下:
def YoloConv(filters, name=None):
def yolo_conv(x_in):
if isinstance(x_in, tuple):
inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
x, x_skip = inputs
# concat with skip connection
x = DarknetConv(x, filters, 1)
x = UpSampling2D(2)(x)
x = Concatenate()([x, x_skip])
else:
x = inputs = Input(x_in.shape[1:])
x = DarknetConv(x, filters, 1)
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, filters, 1)
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, filters, 1)
return Model(inputs, x, name=name)(x_in)
return yolo_conv
Detector
Yolo输出比较简单,主要输出目标的位置:左上角坐标以及右下角坐标,置信度以及分类。
def YoloOutput(filters, anchors, classes, name=None):
def yolo_output(x_in):
x = inputs = Input(x_in.shape[1:])
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
anchors, classes + 5)))(x)
return tf.keras.Model(inputs, x, name=name)(x_in)
return yolo_output
关于边界框预测可以移步到之前写的一篇博客:深度学习 目标检测Yolo算法代码的实现。
模型输入
在模型输入主要讲一个数据预处理函数:preprocess_true_boxes()
,它的主要作用是根据标注的得到不同尺度下最佳的anchor boxes,具体解析看如下代码:
# 详解:https://blog.youkuaiyun.com/weixin_38145317/article/details/95349201
# https://zhuanlan.zhihu.com/p/79425557
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
# 该函数得到detectors_mask(最佳预测的anchor boxes,每一个true boxes都对应一个anchor boxes)
# true_boxes:实际框的位置和类别,shape:(batch_size, max_box_number, 5),5表示(x_min, y_min, x_max, y_max, label)
assert (true_boxes[..., 4]<num_classes).all()
num_layers = len(anchors)//3
anchor_mask = config.ANCHOR_MASK
# 归一化
true_boxes = np.array(true_boxes, dtype='float32')
input_shape = np.array(input_shape, dtype='int32')
# 中心点坐标
boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
# 标注框长宽
boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
# m为batch size,grid_shapes为网格的shape
m = true_boxes.shape[0]
# {0:32 1:16 2:8}表示特征金字塔下采样的倍数,得到不同尺度下的feature map的尺寸
grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
# 对不同的特征图下生成对应目标框的,如目标框有两个,对于13*13feature map,生成2*13*13*3*(5+class_num+1)
y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
dtype='float32') for l in range(num_layers)]
anchors = np.expand_dims(anchors, 0)
# 计算anchorbox与true box的IOU,因此需要得出anchorbox的坐标
anchor_maxes = anchors / 2.
anchor_mins = -anchor_maxes
valid_mask = boxes_wh[..., 0]>0
# 对每一张图进行处理,计算每个目标与设定anchor box的最佳IOU
for b in range(m):
wh = boxes_wh[b, valid_mask[b]]
if len(wh)==0: continue
wh = np.expand_dims(wh, -2)
box_maxes = wh / 2.
box_mins = -box_maxes
intersect_mins = np.maximum(box_mins, anchor_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
box_area = wh[..., 0] * wh[..., 1]
anchor_area = anchors[..., 0] * anchors[..., 1]
iou = intersect_area / (box_area + anchor_area - intersect_area)
best_anchor = np.argmax(iou, axis=-1)
for t, best_detect in enumerate(best_anchor):
for l in range(num_layers):
if best_detect in anchor_mask[l]:
# 根据真实框的坐标信息来计算所属网格左上角的位置. xind, yind其实就是网格的坐标
xind = np.floor(true_boxes[b,t,0] * grid_shapes[l][1]).astype('int32')
yind = np.floor(true_boxes[b,t,1] * grid_shapes[l][0]).astype('int32')
k = anchor_mask[l].index(best_detect)
c = true_boxes[b, t, 4].astype('int32')
# 填充真实框的中心位置和宽高
y_true[l][b, yind, xind, k, 0:4] = true_boxes[b, t, 0:4]
# 置信度
y_true[l][b, yind, xind, k, 4] = 1
# label以one-hot形式标记
y_true[l][b, yind, xind, k, 5+c] = 1
return y_true
Debug该函数的代码:
# 测试preprocess_true_boxes
'''
debug preprocess_true_boxes
'''
true_boxes = [[[263, 211, 324, 339, 8], [165, 264, 253, 372, 8], [241, 194, 295, 299, 8], [150, 141, 229, 284, 14]],
[[69, 172, 270, 330, 12], [150, 141, 229, 284, 14], [241, 194, 295, 299, 8], [285, 201, 327, 331, 14]]]
true_boxes = np.array(true_boxes)
print(true_boxes.shape)
input_shape = (416, 416)
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]
num_classes = 20
preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes)
preprocess_true_boxes,顾名思义对真实框进行相关预处理。为了更好地理解这个函数,有必要先说明bboxes和label分别表示什么,具体怎么操作。bboxes是用来存放真实框的中心坐标以及宽高(x,y,w,h),其shape为(3,150,4)。3表示3种网格尺寸,150表示每种网格尺寸允许存放的最大真实框数量,4就是(x,y,w,h)。label是用来存放3种网格尺寸下每一个网格的中心坐标、宽高、置信度以及所属类别(x, y, w, h, conf, classid),其中class_id用one-hot编码表示,并对其进行平滑处理。label的shape为(3,train_output_sizes,train_output_sizes,anchor_per_scale,5 + num_classes)。3表示3种网格尺寸,train_output_sizes表示每种网格尺寸的大小,anchor_per_scale表示每个网格预测多少个anchor框,5 + numclasses就不再多说了。label的初始化为0矩阵,即每个网格的信息(x, y, w, h, conf, classid)都设置为0。计算3个先验框和真实框的iou值,筛选iou值>0.3的先验框并标记索引, 然后将真实框的(x,y,w,h,class_id)填充到真实框所属的网格中(对应标记索引),网格的置信度设为1。
模型损失
def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
def yolo_loss(y_true, y_pred):
# 1. transform all pred outputs
# y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
y_pred, anchors, classes)
pred_xy = pred_xywh[..., 0:2]
pred_wh = pred_xywh[..., 2:4]
# 2. transform all true outputs
# y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
true_box, true_obj, true_class_idx = tf.split(
y_true, (4, 1, 1), axis=-1)
true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
true_wh = true_box[..., 2:4] - true_box[..., 0:2]
# give higher weights to small boxes
box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
# 3. inverting the pred box equations
grid_size = tf.shape(y_true)[1]
grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
tf.cast(grid, tf.float32)
true_wh = tf.math.log(true_wh / anchors)
true_wh = tf.where(tf.math.is_inf(true_wh),
tf.zeros_like(true_wh), true_wh)
# 4. calculate all masks
obj_mask = tf.squeeze(true_obj, -1)
# ignore false positive when iou is over threshold
best_iou = tf.map_fn(
lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
x[1], tf.cast(x[2], tf.bool))), axis=-1),
(pred_box, true_box, obj_mask),
tf.float32)
ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
# 坐标使用平方误差
# 5. calculate all losses
xy_loss = obj_mask * box_loss_scale * \
tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
wh_loss = obj_mask * box_loss_scale * \
tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
# 置信度使用的是二分类交叉熵
obj_loss = binary_crossentropy(true_obj, pred_obj)
obj_loss = obj_mask * obj_loss + \
(1 - obj_mask) * ignore_mask * obj_loss
# TODO: use binary_crossentropy instead
# 类别使用交叉熵损失
class_loss = obj_mask * sparse_categorical_crossentropy(
true_class_idx, pred_class)
# 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
return xy_loss + wh_loss + obj_loss + class_loss
return yolo_loss
yolo box的预测:
def yolo_boxes(pred, anchors, classes):
# pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
grid_size = tf.shape(pred)[1:3]
box_xy, box_wh, objectness, class_probs = tf.split(
pred, (2, 2, 1, classes), axis=-1)
# 将预测的数据归一化到[0, 1]区间
box_xy = tf.sigmoid(box_xy)
objectness = tf.sigmoid(objectness)
class_probs = tf.sigmoid(class_probs)
pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss
# !!! grid[x][y] == (y, x)
grid = _meshgrid(grid_size[1],grid_size[0])
grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2]
box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
tf.cast(grid_size, tf.float32)
# 这个是因为在设置的时候已经被log了
box_wh = tf.exp(box_wh) * anchors
box_x1y1 = box_xy - box_wh / 2
box_x2y2 = box_xy + box_wh / 2
bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
return bbox, objectness, class_probs, pred_box
Yolov4
关于YOLOv4具体请参考:https://blog.youkuaiyun.com/u012655441/article/details/121141084和对应代码:yolov4
模型评估
这里主要介绍评估模型性能的其中一个指标mAp,绘制PR曲线,求相应的面积,具体代码如下:
import glob
import json
import os
import shutil
import operator
import sys
import argparse
import math
import numpy as np
sys.path.append(os.getcwd())
import config as sys_config
'''
用于计算mAP
代码克隆自https://github.com/Cartucho/mAP
如果想要设定mAP0.x,比如计算mAP0.75,可以设定MINOVERLAP = 0.75。
'''
MINOVERLAP = 0.5
parser = argparse.ArgumentParser()
parser.add_argument('-na', '--no-animation', help="no animation is shown.", action="store_true")
parser.add_argument('-np', '--no-plot', help="no plot is shown.", action="store_true")
parser.add_argument('-q', '--quiet', help="minimalistic console output.", action="store_true")
parser.add_argument('-i', '--ignore', nargs='+', type=str, help="ignore a list of classes.")
parser.add_argument('--set-class-iou', nargs='+', type=str, help="set IoU for a specific class.")
args = parser.parse_args()
'''
0,0 ------> x (width)
|
| (Left,Top)
| *_________
| | |
| |
y |_________|
(height) *
(Right,Bottom)
'''
if args.ignore is None:
args.ignore = []
specific_iou_flagged = False
if args.set_class_iou is not None:
specific_iou_flagged = True
os.chdir(os.path.dirname(os.path.abspath(__file__)))
GT_PATH = os.path.join(sys_config.result, sys_config.gt_folder_name)
DR_PATH = os.path.join(sys_config.result, sys_config.pr_folder_name)
IMG_PATH = os.path.join(sys_config.result, sys_config.image_optional)
if os.path.exists(IMG_PATH):
for dirpath, dirnames, files in os.walk(IMG_PATH):
if not files:
args.no_animation = True
else:
args.no_animation = True
show_animation = False
if not args.no_animation:
try:
import cv2
show_animation = True
except ImportError:
print("\"opencv-python\" not found, please install to visualize the results.")
args.no_animation = True
draw_plot = False
if not args.no_plot:
try:
import matplotlib.pyplot as plt
draw_plot = True
except ImportError:
print("\"matplotlib\" not found, please install it to get the resulting plots.")
args.no_plot = True
def log_average_miss_rate(precision, fp_cumsum, num_images):
"""
log-average miss rate:
Calculated by averaging miss rates at 9 evenly spaced FPPI points
between 10e-2 and 10e0, in log-space.
output:
lamr | log-average miss rate
mr | miss rate
fppi | false positives per image
references:
[1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
State of the Art." Pattern Analysis and Machine Intelligence, IEEE
Transactions on 34.4 (2012): 743 - 761.
"""
if precision.size == 0:
lamr = 0
mr = 1
fppi = 0
return lamr, mr, fppi
fppi = fp_cumsum / float(num_images)
mr = (1 - precision)
fppi_tmp = np.insert(fppi, 0, -1.0)
mr_tmp = np.insert(mr, 0, 1.0)
ref = np.logspace(-2.0, 0.0, num = 9)
for i, ref_i in enumerate(ref):
j = np.where(fppi_tmp <= ref_i)[-1][-1]
ref[i] = mr_tmp[j]
lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
return lamr, mr, fppi
"""
throw error and exit
"""
def error(msg):
print(msg)
sys.exit(0)
"""
check if the number is a float between 0.0 and 1.0
"""
def is_float_between_0_and_1(value):
try:
val = float(value)
if val > 0.0 and val < 1.0:
return True
else:
return False
except ValueError:
return False
"""
Calculate the AP given the recall and precision array
1st) We compute a version of the measured precision/recall curve with
precision monotonically decreasing
2nd) We compute the AP as the area under this curve by numerical integration.
"""
def voc_ap(rec, prec):
"""
--- Official matlab code VOC2012---
mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
rec.insert(0, 0.0) # insert 0.0 at begining of list
rec.append(1.0) # insert 1.0 at end of list
mrec = rec[:]
prec.insert(0, 0.0) # insert 0.0 at begining of list
prec.append(0.0) # insert 0.0 at end of list
mpre = prec[:]
"""
This part makes the precision monotonically decreasing
(goes from the end to the beginning)
matlab: for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
"""
for i in range(len(mpre)-2, -1, -1):
mpre[i] = max(mpre[i], mpre[i+1])
"""
This part creates a list of indexes where the recall changes
matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
"""
i_list = []
for i in range(1, len(mrec)):
if mrec[i] != mrec[i-1]:
i_list.append(i) # if it was matlab would be i + 1
"""
The Average Precision (AP) is the area under the curve
(numerical integration)
matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
ap = 0.0
for i in i_list:
ap += ((mrec[i]-mrec[i-1])*mpre[i])
return ap, mrec, mpre
"""
Convert the lines of a file to a list
"""
def file_lines_to_list(path):
# open txt file lines to a list
with open(path) as f:
content = f.readlines()
# remove whitespace characters like `\n` at the end of each line
content = [x.strip() for x in content]
return content
"""
Draws text in image
"""
def draw_text_in_image(img, text, pos, color, line_width):
font = cv2.FONT_HERSHEY_PLAIN
fontScale = 1
lineType = 1
bottomLeftCornerOfText = pos
cv2.putText(img, text,
bottomLeftCornerOfText,
font,
fontScale,
color,
lineType)
text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
return img, (line_width + text_width)
"""
Plot - adjust axes
"""
def adjust_axes(r, t, fig, axes):
# get text width for re-scaling
bb = t.get_window_extent(renderer=r)
text_width_inches = bb.width / fig.dpi
# get axis width in inches
current_fig_width = fig.get_figwidth()
new_fig_width = current_fig_width + text_width_inches
propotion = new_fig_width / current_fig_width
# get axis limit
x_lim = axes.get_xlim()
axes.set_xlim([x_lim[0], x_lim[1]*propotion])
"""
Draw plot using Matplotlib
"""
def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
# sort the dictionary by decreasing value, into a list of tuples
sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
# unpacking the list of tuples into two lists
sorted_keys, sorted_values = zip(*sorted_dic_by_value)
#
if true_p_bar != "":
"""
Special case to draw in:
- green -> TP: True Positives (object detected and matches ground-truth)
- red -> FP: False Positives (object detected but does not match ground-truth)
- orange -> FN: False Negatives (object not detected but present in the ground-truth)
"""
fp_sorted = []
tp_sorted = []
for key in sorted_keys:
fp_sorted.append(dictionary[key] - true_p_bar[key])
tp_sorted.append(true_p_bar[key])
plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
# add legend
plt.legend(loc='lower right')
"""
Write number on side of bar
"""
fig = plt.gcf() # gcf - get current figure
axes = plt.gca()
r = fig.canvas.get_renderer()
for i, val in enumerate(sorted_values):
fp_val = fp_sorted[i]
tp_val = tp_sorted[i]
fp_str_val = " " + str(fp_val)
tp_str_val = fp_str_val + " " + str(tp_val)
# trick to paint multicolor with offset:
# first paint everything and then repaint the first number
t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
if i == (len(sorted_values)-1): # largest bar
adjust_axes(r, t, fig, axes)
else:
plt.barh(range(n_classes), sorted_values, color=plot_color)
"""
Write number on side of bar
"""
fig = plt.gcf() # gcf - get current figure
axes = plt.gca()
r = fig.canvas.get_renderer()
for i, val in enumerate(sorted_values):
str_val = " " + str(val) # add a space before
if val < 1.0:
str_val = " {0:.2f}".format(val)
t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
# re-set axes to show number inside the figure
if i == (len(sorted_values)-1): # largest bar
adjust_axes(r, t, fig, axes)
# set window title
fig.canvas.set_window_title(window_title)
# write classes in y axis
tick_font_size = 12
plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
"""
Re-scale height accordingly
"""
init_height = fig.get_figheight()
# comput the matrix height in points and inches
dpi = fig.dpi
height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
height_in = height_pt / dpi
# compute the required figure height
top_margin = 0.15 # in percentage of the figure height
bottom_margin = 0.05 # in percentage of the figure height
figure_height = height_in / (1 - top_margin - bottom_margin)
# set new height
if figure_height > init_height:
fig.set_figheight(figure_height)
# set plot title
plt.title(plot_title, fontsize=14)
# set axis titles
# plt.xlabel('classes')
plt.xlabel(x_label, fontsize='large')
# adjust size of window
fig.tight_layout()
# save the plot
fig.savefig(output_path)
# show image
if to_show:
plt.show()
# close the plot
plt.close()
"""
Create a ".temp_files/" and "results/" directory
"""
TEMP_FILES_PATH = ".temp_files"
if not os.path.exists(TEMP_FILES_PATH): # if it doesn't exist already
os.makedirs(TEMP_FILES_PATH)
results_files_path = "results"
if os.path.exists(results_files_path): # if it exist already
# reset the results directory
shutil.rmtree(results_files_path)
os.makedirs(results_files_path)
if draw_plot:
os.makedirs(os.path.join(results_files_path, "AP"))
os.makedirs(os.path.join(results_files_path, "F1"))
os.makedirs(os.path.join(results_files_path, "Recall"))
os.makedirs(os.path.join(results_files_path, "Precision"))
if show_animation:
os.makedirs(os.path.join(results_files_path, "images", "detections_one_by_one"))
"""
ground-truth
Load each of the ground-truth files into a temporary ".json" file.
Create a list of all the class names present in the ground-truth (gt_classes).
"""
# get a list with the ground-truth files
ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
if len(ground_truth_files_list) == 0:
error("Error: No ground-truth files found!")
ground_truth_files_list.sort()
# dictionary with counter per class
gt_counter_per_class = {}
counter_images_per_class = {}
for txt_file in ground_truth_files_list:
#print(txt_file)
file_id = txt_file.split(".txt", 1)[0]
file_id = os.path.basename(os.path.normpath(file_id))
# check if there is a correspondent detection-results file
temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
if not os.path.exists(temp_path):
error_msg = "Error. File not found: {}\n".format(temp_path)
error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
error(error_msg)
lines_list = file_lines_to_list(txt_file)
# create ground-truth dictionary
bounding_boxes = []
is_difficult = False
already_seen_classes = []
for line in lines_list:
try:
if "difficult" in line:
class_name, left, top, right, bottom, _difficult = line.split()
is_difficult = True
else:
class_name, left, top, right, bottom = line.split()
except:
if "difficult" in line:
line_split = line.split()
_difficult = line_split[-1]
bottom = line_split[-2]
right = line_split[-3]
top = line_split[-4]
left = line_split[-5]
class_name = ""
for name in line_split[:-5]:
class_name += name + " "
class_name = class_name[:-1]
is_difficult = True
else:
line_split = line.split()
bottom = line_split[-1]
right = line_split[-2]
top = line_split[-3]
left = line_split[-4]
class_name = ""
for name in line_split[:-4]:
class_name += name + " "
class_name = class_name[:-1]
if class_name in args.ignore:
continue
bbox = left + " " + top + " " + right + " " +bottom
if is_difficult:
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
is_difficult = False
else:
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
if class_name in gt_counter_per_class:
gt_counter_per_class[class_name] += 1
else:
gt_counter_per_class[class_name] = 1
if class_name not in already_seen_classes:
if class_name in counter_images_per_class:
counter_images_per_class[class_name] += 1
else:
counter_images_per_class[class_name] = 1
already_seen_classes.append(class_name)
with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
json.dump(bounding_boxes, outfile)
gt_classes = list(gt_counter_per_class.keys())
gt_classes = sorted(gt_classes)
n_classes = len(gt_classes)
"""
Check format of the flag --set-class-iou (if used)
e.g. check if class exists
"""
if specific_iou_flagged:
n_args = len(args.set_class_iou)
error_msg = \
'\n --set-class-iou [class_1] [IoU_1] [class_2] [IoU_2] [...]'
if n_args % 2 != 0:
error('Error, missing arguments. Flag usage:' + error_msg)
# [class_1] [IoU_1] [class_2] [IoU_2]
# specific_iou_classes = ['class_1', 'class_2']
specific_iou_classes = args.set_class_iou[::2] # even
# iou_list = ['IoU_1', 'IoU_2']
iou_list = args.set_class_iou[1::2] # odd
if len(specific_iou_classes) != len(iou_list):
error('Error, missing arguments. Flag usage:' + error_msg)
for tmp_class in specific_iou_classes:
if tmp_class not in gt_classes:
error('Error, unknown class \"' + tmp_class + '\". Flag usage:' + error_msg)
for num in iou_list:
if not is_float_between_0_and_1(num):
error('Error, IoU must be between 0.0 and 1.0. Flag usage:' + error_msg)
"""
detection-results
Load each of the detection-results files into a temporary ".json" file.
"""
dr_files_list = glob.glob(DR_PATH + '/*.txt')
dr_files_list.sort()
for class_index, class_name in enumerate(gt_classes):
bounding_boxes = []
for txt_file in dr_files_list:
file_id = txt_file.split(".txt",1)[0]
file_id = os.path.basename(os.path.normpath(file_id))
temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
if class_index == 0:
if not os.path.exists(temp_path):
error_msg = "Error. File not found: {}\n".format(temp_path)
error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
error(error_msg)
lines = file_lines_to_list(txt_file)
for line in lines:
try:
tmp_class_name, confidence, left, top, right, bottom = line.split()
except:
line_split = line.split()
bottom = line_split[-1]
right = line_split[-2]
top = line_split[-3]
left = line_split[-4]
confidence = line_split[-5]
tmp_class_name = ""
for name in line_split[:-5]:
tmp_class_name += name + " "
tmp_class_name = tmp_class_name[:-1]
if tmp_class_name == class_name:
bbox = left + " " + top + " " + right + " " +bottom
bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
json.dump(bounding_boxes, outfile)
"""
Calculate the AP for each class
"""
sum_AP = 0.0
ap_dictionary = {}
lamr_dictionary = {}
with open(results_files_path + "/results.txt", 'w') as results_file:
results_file.write("# AP and precision/recall per class\n")
count_true_positives = {}
for class_index, class_name in enumerate(gt_classes):
count_true_positives[class_name] = 0
"""
Load detection-results of that class
"""
dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
dr_data = json.load(open(dr_file))
"""
Assign detection-results to ground-truth objects
"""
nd = len(dr_data)
tp = [0] * nd
fp = [0] * nd
score = [0] * nd
score05_idx = 0
for idx, detection in enumerate(dr_data):
file_id = detection["file_id"]
score[idx] = float(detection["confidence"])
if score[idx] > 0.5:
score05_idx = idx
if show_animation:
ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
if len(ground_truth_img) == 0:
error("Error. Image not found with id: " + file_id)
elif len(ground_truth_img) > 1:
error("Error. Multiple image with id: " + file_id)
else:
img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
img_cumulative_path = results_files_path + "/images/" + ground_truth_img[0]
if os.path.isfile(img_cumulative_path):
img_cumulative = cv2.imread(img_cumulative_path)
else:
img_cumulative = img.copy()
bottom_border = 60
BLACK = [0, 0, 0]
img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
ground_truth_data = json.load(open(gt_file))
ovmax = -1
gt_match = -1
bb = [ float(x) for x in detection["bbox"].split() ]
for obj in ground_truth_data:
if obj["class_name"] == class_name:
bbgt = [ float(x) for x in obj["bbox"].split() ]
bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
iw = bi[2] - bi[0] + 1
ih = bi[3] - bi[1] + 1
if iw > 0 and ih > 0:
# compute overlap (IoU) = area of intersection / area of union
ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
+ 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
ov = iw * ih / ua
if ov > ovmax:
ovmax = ov
gt_match = obj
if show_animation:
status = "NO MATCH FOUND!"
min_overlap = MINOVERLAP
if specific_iou_flagged:
if class_name in specific_iou_classes:
index = specific_iou_classes.index(class_name)
min_overlap = float(iou_list[index])
if ovmax >= min_overlap:
if "difficult" not in gt_match:
if not bool(gt_match["used"]):
tp[idx] = 1
gt_match["used"] = True
count_true_positives[class_name] += 1
with open(gt_file, 'w') as f:
f.write(json.dumps(ground_truth_data))
if show_animation:
status = "MATCH!"
else:
fp[idx] = 1
if show_animation:
status = "REPEATED MATCH!"
else:
fp[idx] = 1
if ovmax > 0:
status = "INSUFFICIENT OVERLAP"
"""
Draw image to show animation
"""
if show_animation:
height, widht = img.shape[:2]
# colors (OpenCV works with BGR)
white = (255,255,255)
light_blue = (255,200,100)
green = (0,255,0)
light_red = (30,30,255)
# 1st line
margin = 10
v_pos = int(height - margin - (bottom_border / 2.0))
text = "Image: " + ground_truth_img[0] + " "
img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
if ovmax != -1:
color = light_red
if status == "INSUFFICIENT OVERLAP":
text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
else:
text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
color = green
img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
# 2nd line
v_pos += int(bottom_border / 2.0)
rank_pos = str(idx+1) # rank position (idx starts at 0)
text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
color = light_red
if status == "MATCH!":
color = green
text = "Result: " + status + " "
img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
font = cv2.FONT_HERSHEY_SIMPLEX
if ovmax > 0: # if there is intersections between the bounding-boxes
bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
bb = [int(i) for i in bb]
cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
# show image
cv2.imshow("Animation", img)
cv2.waitKey(20) # show for 20 ms
# save image to results
output_img_path = results_files_path + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
cv2.imwrite(output_img_path, img)
# save the image with all the objects drawn to it
cv2.imwrite(img_cumulative_path, img_cumulative)
cumsum = 0
for idx, val in enumerate(fp):
fp[idx] += cumsum
cumsum += val
cumsum = 0
for idx, val in enumerate(tp):
tp[idx] += cumsum
cumsum += val
rec = tp[:]
for idx, val in enumerate(tp):
rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
prec = tp[:]
for idx, val in enumerate(tp):
prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
ap, mrec, mprec = voc_ap(rec[:], prec[:])
F1 = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
sum_AP += ap
text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
if len(prec)>0:
F1_text = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
Recall_text = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
Precision_text = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
else:
F1_text = "0.00" + " = " + class_name + " F1 "
Recall_text = "0.00%" + " = " + class_name + " Recall "
Precision_text = "0.00%" + " = " + class_name + " Precision "
rounded_prec = [ '%.2f' % elem for elem in prec ]
rounded_rec = [ '%.2f' % elem for elem in rec ]
results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
if not args.quiet:
if len(prec)>0:
print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
+ " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
else:
print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
ap_dictionary[class_name] = ap
n_images = counter_images_per_class[class_name]
lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
lamr_dictionary[class_name] = lamr
"""
Draw plot
"""
if draw_plot:
plt.plot(rec, prec, '-o')
area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
fig = plt.gcf()
fig.canvas.set_window_title('AP ' + class_name)
plt.title('class: ' + text)
plt.xlabel('Recall')
plt.ylabel('Precision')
axes = plt.gca()
axes.set_xlim([0.0,1.0])
axes.set_ylim([0.0,1.05])
fig.savefig(results_files_path + "/AP/" + class_name + ".png")
plt.cla()
plt.plot(score, F1, "-", color='orangered')
plt.title('class: ' + F1_text + "\nscore_threhold=0.5")
plt.xlabel('Score_Threhold')
plt.ylabel('F1')
axes = plt.gca()
axes.set_xlim([0.0,1.0])
axes.set_ylim([0.0,1.05])
fig.savefig(results_files_path + "/F1/" + class_name + ".png")
plt.cla()
plt.plot(score, rec, "-H", color='gold')
plt.title('class: ' + Recall_text + "\nscore_threhold=0.5")
plt.xlabel('Score_Threhold')
plt.ylabel('Recall')
axes = plt.gca()
axes.set_xlim([0.0,1.0])
axes.set_ylim([0.0,1.05])
fig.savefig(results_files_path + "/Recall/" + class_name + ".png")
plt.cla()
plt.plot(score, prec, "-s", color='palevioletred')
plt.title('class: ' + Precision_text + "\nscore_threhold=0.5")
plt.xlabel('Score_Threhold')
plt.ylabel('Precision')
axes = plt.gca()
axes.set_xlim([0.0,1.0])
axes.set_ylim([0.0,1.05])
fig.savefig(results_files_path + "/Precision/" + class_name + ".png")
plt.cla()
if show_animation:
cv2.destroyAllWindows()
results_file.write("\n# mAP of all classes\n")
mAP = sum_AP / n_classes
text = "mAP = {0:.2f}%".format(mAP*100)
results_file.write(text + "\n")
print(text)
# remove the temp_files directory
shutil.rmtree(TEMP_FILES_PATH)
"""
Count total of detection-results
"""
# iterate through all the files
det_counter_per_class = {}
for txt_file in dr_files_list:
# get lines to list
lines_list = file_lines_to_list(txt_file)
for line in lines_list:
class_name = line.split()[0]
# check if class is in the ignore list, if yes skip
if class_name in args.ignore:
continue
# count that object
if class_name in det_counter_per_class:
det_counter_per_class[class_name] += 1
else:
# if class didn't exist yet
det_counter_per_class[class_name] = 1
#print(det_counter_per_class)
dr_classes = list(det_counter_per_class.keys())
"""
Plot the total number of occurences of each class in the ground-truth
"""
if draw_plot:
window_title = "ground-truth-info"
plot_title = "ground-truth\n"
plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
x_label = "Number of objects per class"
output_path = results_files_path + "/ground-truth-info.png"
to_show = False
plot_color = 'forestgreen'
draw_plot_func(
gt_counter_per_class,
n_classes,
window_title,
plot_title,
x_label,
output_path,
to_show,
plot_color,
'',
)
"""
Write number of ground-truth objects per class to results.txt
"""
with open(results_files_path + "/results.txt", 'a') as results_file:
results_file.write("\n# Number of ground-truth objects per class\n")
for class_name in sorted(gt_counter_per_class):
results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
"""
Finish counting true positives
"""
for class_name in dr_classes:
# if class exists in detection-result but not in ground-truth then there are no true positives in that class
if class_name not in gt_classes:
count_true_positives[class_name] = 0
#print(count_true_positives)
"""
Plot the total number of occurences of each class in the "detection-results" folder
"""
if draw_plot:
window_title = "detection-results-info"
# Plot title
plot_title = "detection-results\n"
plot_title += "(" + str(len(dr_files_list)) + " files and "
count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
# end Plot title
x_label = "Number of objects per class"
output_path = results_files_path + "/detection-results-info.png"
to_show = False
plot_color = 'forestgreen'
true_p_bar = count_true_positives
draw_plot_func(
det_counter_per_class,
len(det_counter_per_class),
window_title,
plot_title,
x_label,
output_path,
to_show,
plot_color,
true_p_bar
)
"""
Write number of detected objects per class to results.txt
"""
with open(results_files_path + "/results.txt", 'a') as results_file:
results_file.write("\n# Number of detected objects per class\n")
for class_name in sorted(dr_classes):
n_det = det_counter_per_class[class_name]
text = class_name + ": " + str(n_det)
text += " (tp:" + str(count_true_positives[class_name]) + ""
text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
results_file.write(text)
"""
Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
"""
if draw_plot:
window_title = "lamr"
plot_title = "log-average miss rate"
x_label = "log-average miss rate"
output_path = results_files_path + "/lamr.png"
to_show = False
plot_color = 'royalblue'
draw_plot_func(
lamr_dictionary,
n_classes,
window_title,
plot_title,
x_label,
output_path,
to_show,
plot_color,
""
)
"""
Draw mAP plot (Show AP's of all classes in decreasing order)
"""
if draw_plot:
window_title = "mAP"
plot_title = "mAP = {0:.2f}%".format(mAP*100)
x_label = "Average Precision"
output_path = results_files_path + "/mAP.png"
to_show = True
plot_color = 'royalblue'
draw_plot_func(
ap_dictionary,
n_classes,
window_title,
plot_title,
x_label,
output_path,
to_show,
plot_color,
""
)
获取测试集推理结果代码:
import colorsys
import os
import sys
sys.path.append(os.getcwd())
import config as sys_config
import numpy as np
from keras import backend as K
from keras.layers import Input
from keras.models import load_model
from PIL import Image
from tqdm import tqdm
from nets.yolo4 import yolo_body, yolo_eval
from utils.utils import letterbox_image
from nets.yolo import YOLO
'''
这里设置的门限值较低是因为计算map需要用到不同门限条件下的Recall和Precision值。
所以只有保留的框足够多,计算的map才会更精确,详情可以了解map的原理。
计算map时输出的Recall和Precision值指的是门限为0.5时的Recall和Precision值。
此处获得的./input/detection-results/里面的txt的框的数量会比直接predict多一些,这是因为这里的门限低,
目的是为了计算不同门限条件下的Recall和Precision值,从而实现map的计算。
这里的self.iou指的是非极大抑制所用到的iou,具体的可以了解非极大抑制的原理,
如果低分框与高分框的iou大于这里设定的self.iou,那么该低分框将会被剔除。
如果想要设定mAP0.x,比如设定mAP0.75,可以去get_map.py设定MINOVERLAP。
'''
yolo = YOLO(
model_path=sys_config.model_path,
anchors_path=sys_config.anchors_path,
classes_path=sys_config.classes_path,
score=sys_config.map_socre,
iou=sys_config.map_iou,
max_boxes=sys_config.max_boxes,
model_image_size=(sys_config.imagesize, sys_config.imagesize),
letterbox_image=sys_config.letterbox_image
)
image_ids = open(os.path.join(sys_config.test_txt, 'test.txt')).read().strip().split()
if not os.path.exists(os.path.join(sys_config.result, sys_config.pr_folder_name)):
os.makedirs(os.path.join(sys_config.result, sys_config.pr_folder_name))
for image_id in tqdm(image_ids):
image_path = sys_config.dataset_base_path+"/JPEGImages/"+image_id+".jpg"
image = Image.open(image_path)
# 开启后在之后计算mAP可以可视化
# image.save("./input/images-optional/"+image_id+".jpg")
yolo.get_dr_txt(image_id,image)
print("Conversion completed!")
获取GroundTrue代码:
import sys
import os
sys.path.append(os.getcwd())
import glob
import xml.etree.ElementTree as ET
import config as sys_config
def get_classes(classes_path):
'''loads the classes'''
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
image_ids = open(os.path.join(sys_config.test_txt, 'test.txt')).read().strip().split()
gt_folder = os.path.join(sys_config.result, sys_config.gt_folder_name)
if not os.path.exists(gt_folder):
os.makedirs(gt_folder)
for image_id in image_ids:
with open(os.path.join(gt_folder, image_id+".txt"), "w") as new_f:
root = ET.parse( os.path.join(sys_config.dataset_base_path, "Annotations", image_id+".xml")).getroot()
for obj in root.findall('object'):
# classes_path = 'model_data/voc_classes.txt'
# class_names = get_classes(classes_path)
# if obj_name not in class_names:
# continue
bndbox = obj.find('bndbox')
left = bndbox.find('xmin').text
top = bndbox.find('ymin').text
right = bndbox.find('xmax').text
bottom = bndbox.find('ymax').text
difficult_flag = False
if obj.find('difficult')!=None:
difficult = obj.find('difficult').text
if int(difficult)==1:
difficult_flag = True
obj_name = obj.find('name').text
area = (int(right) - int(left))*(int(bottom)-int(top))
height = root.find('size').find('height').text
width = root.find('size').find('width').text
img_area = int(height)*int(width)*0.1
if area<img_area:
filename = root.find('filename').text
difficult_flag = True
if difficult_flag:
new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
else:
new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
print("Conversion completed!")