CornerNet：实现demo、可视化heatmap、测试各类别精度

最新推荐文章于 2024-11-25 15:40:53 发布

jmuyjl

最新推荐文章于 2024-11-25 15:40:53 发布

阅读量1.1w

点赞数 10

分类专栏：深度学习目标检测文章标签：深度学习目标检测

本文链接：https://blog.youkuaiyun.com/LLyj_/article/details/100025623

版权

深度学习同时被 2 个专栏收录

12 篇文章

订阅专栏

目标检测

6 篇文章

订阅专栏

CornerNet：实现demo、可视化heatmap、测试各类别精度

文章目录

CornerNet：实现demo、可视化heatmap、测试各类别精度

欢迎大家来讨论关于CornerNet的代码，一起交流，直接评论就好了，我很快回的，不信你试试☎☎☎

前言

有段时间没有整理自己最近在研究的东西了，但还是感觉时不时要停下来总结一下才能记忆深刻。最近一直在搞Anchor-free的二维目标检测算法(CornerNet\CornerNet-Lite\CenterNet\CenterNet…)，接触比较多的还是CornerNet(ECCV2018)，详情请看CornerNet论文解读和CornerNet配置。

今天要说的是CornerNet代码中并没有类似demo.py这样的调用模型画出检测结果的代码(好像是有个debug参数，但是我没试过)，还有就是整篇都在说heatmap，但是不知道heatmap长什么样，最后还有实验结果的分析不够丰富，所以增加了多类别的精度输出。

实现demo

这里给出两种方案：

方案一

直接在github上找到了别人实现的demo.py。虽然这个能解决CornerNet的demo问题，但是我觉得还是有必要了解下第二种方法。

#!/usr/bin/env python
import os
import json
import torch
import pprint
import argparse
import importlib
import numpy as np
import cv2

import matplotlib
matplotlib.use("Agg")

from config import system_configs
from nnet.py_factory import NetworkFactory

from config import system_configs
from utils import crop_image, normalize_
from external.nms import soft_nms, soft_nms_merge

torch.backends.cudnn.benchmark = False

class_name = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
    'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
    'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
    'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

image_ext = ['jpg', 'jpeg', 'png']

def parse_args():
    parser = argparse.ArgumentParser(description="Demo CornerNet")
    parser.add_argument("--demo", dest="demo",
                        help="demo image or image folder",
                        default="", type=str)
    parser.add_argument("--cfg_file", help="config file", 
                        default='CornerNet', type=str)
    parser.add_argument("--testiter", dest="testiter",
                        help="test at iteration i",
                        default=None)
    parser.add_argument("--suffix", dest="suffix", default=None, type=str)

    args = parser.parse_args()
    return args

def _rescale_dets(detections, ratios, borders, sizes):
    xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
    xs    /= ratios[:, 1][:, None, None]
    ys    /= ratios[:, 0][:, None, None]
    xs    -= borders[:, 2][:, None, None]
    ys    -= borders[:, 0][:, None, None]
    np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
    np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)

def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3, debug=False):
    detections = nnet.test(
        [images], ae_threshold=ae_threshold, K=K, kernel=kernel)
    detections = detections.data.cpu().numpy()
    return detections

if __name__ == "__main__":
    args = parse_args()
    if args.suffix is None:
        cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + ".json")
    else:
        cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + "-{}.json".format(args.suffix))
    print("cfg_file: {}".format(cfg_file))

    with open(cfg_file, "r") as f:
        configs = json.load(f)

    configs["system"]["snapshot_name"] = args.cfg_file
    system_configs.update_config(configs["system"])
    print("system config...")
    pprint.pprint(system_configs.full)

    test_iter = system_configs.max_iter if args.testiter is None \
                                        else args.testiter
    print("loading parameters at iteration: {}".format(test_iter))
    print("building neural network...")
    nnet = NetworkFactory(None)
    print("loading parameters...")
    nnet.load_params(test_iter)
    nnet.cuda()
    nnet.eval_mode()

    K             = configs["db"]["top_k"]
    ae_threshold  = configs["db"]["ae_threshold"]
    nms_kernel    = 3

    scales        = configs["db"]["test_scales"]
    weight_exp    = 8
    merge_bbox    = False
    categories    = configs["db"]["categories"]
    nms_threshold = configs["db"]["nms_threshold"]
    max_per_image = configs["db"]["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1, 
        "exp_soft_nms": 2
    }["exp_soft_nms"]

    mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
    std  = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
    top_bboxes = {}

    if os.path.isdir(args.demo):
        image_names = []
        ls = os.listdir(args.demo)
        for file_name in ls:
            ext = file_name[file_name.rfind('.') + 1:].lower()
            if ext in image_ext:
                image_names.append(os.path.join(args.demo, file_name))
    else:
        image_names = [args.demo]

    for image_id, image_name in enumerate(image_names):
        image      = cv2.imread(image_name)

        height, width = image.shape[0:2]

        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width  = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width  = new_width  | 127

            images  = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios  = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes   = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio  = out_width  / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, mean, std)

            images[0]  = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0]   = [int(height * scale), int(width * scale)]
            ratios[0]  = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets   = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel, debug=True)
            dets   = dets.reshape(2, -1, 8)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets   = dets.reshape(1, -1, 8)

            _rescale_dets(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes    = detections[..., -1]
        classes    = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds  = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes    = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1] 
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth    = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        if 1:
            image      = cv2.imread(image_name)
            bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5)
                cat_name  = class_name[j]
                cat_size  = cv2.getTextSize(
                    cat_name + '0.0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                color     = np.random.random((3, )) * 0.6 + 0.4
                color     = color * 255
                color     = color.astype(np.int32).tolist()
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    sc    = bbox[4]
                    bbox  = bbox[0:4].astype(np.int32)
                    txt   = '{}{:.1f}'.format(cat_name, sc)
                    if bbox[1] - cat_size[1] - 2 < 0:
                        cv2.rectangle(image,
                            (bbox[0], bbox[1] + 2),
                            (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
                            color, -1
                        )
                        cv2.putText(image, txt, 
                            (bbox[0], bbox[1] + cat_size[1] + 2), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1
                        )
                    else:
                        cv2.rectangle(image,
                            (bbox[0], bbox[1] - cat_size[1] - 2),
                            (bbox[0] + cat_size[0], bbox[1] - 2),
                            color, -1
                        )
                        cv2.putText(image, txt, 
                            (bbox[0], bbox[1] - 2), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1
                        )
                    cv2.rectangle(image,
                        (bbox[0], bbox[1]),
                        (bbox[2], bbox[3]),
                        color, 2
                    )
            # cv2.imshow('out', image)
            # cv2.waitKey()
            cv2.imwrite("/home/jhsu/lyj/CornerNet/demo_result/" + str(image_id+1) + ".jpg", ima

方案二

上述方案能够解决CornerNet的demo问题。但如果说最终目的是发paper，那么则需要多种算法对比，也就是需要多条baseline，如果每条baseline都要跑一些demo，那么上述的方案就不能通用了，比如说CenterNet、Faster R-CNN等就没办法直接套上述代码，因此方案二通用性更高。

我用的是第二种方法：测试代码先跑测试集–>原先代码自动生成result.json–>python脚本json转txt–>python脚本读取txt将坐标信息画回测试集图片。(这个流程是针对CornerNet的，其余算法不一定有result.json，不过没有关系，总之总的流程就是：测试代码先跑测试集-->修改算法代码将测试结果保存为txt-->python脚本读取txt将坐标信息画回测试集图片)接下来一步一步分析。

测试代码先跑测试集

这个就直接运行测试代码，但是这里建议写个shell脚本，比如叫test.sh，这样的好处有：(1). 能够保存终端输出的信息，因为测试到最后会有mAP输出，别到时候稍不留神就没注意到，就白跑了，所以直接将终端输出的信息保存下来稳。(2). 有些测试命令贼长，每次都要输入很长的话那谁顶得住，有这个test.sh，方便很多：
```
#!/usr/bin/env bash
NETWORK="yjl_CornerNet"
MODELDIR="./yjl_test_CornerNet/test-$NETWORK-`date +%Y-%m-%d-%H-%M-%S`"
mkdir -p "$MODELDIR"
# 这里填迭代几次的模型跑测试
iter=435000
LOGFILE="$MODELDIR/log-iter-$iter-$NETWORK-`date +%Y-%m-%d-%H-%M-%S`.log"

# test model 
python test.py CornerNet --testiter $iter 2>&1 | tee $LOGFILE
```
例如上述代码命名为test.sh，那么直接在终端./test.sh运行就好，运行之后，你会看见自动创建了目录存储了测试日志：之后你每次运行./test.sh都会自动创建新的log，记录测试过程：
原先代码自动生成result.json

测试结束后，自动生成的result.json

这个json就是存储数据的一种格式，这个打开要挺久的，里面放的就是测试结果，一堆框的信息啥的，没啥打开的必，直接进行下一步。

python脚本json转txt

直接同个路径下建个python脚本，键入：

import json

f = open("results.json", 'r')
arr = json.loads(f.read())
f.close()

f = open("result.txt", 'w')
for i in arr:
    f.write(
        "%08d %d %f %f %f %f %f\n" % (
        i['image_id'], i['category_id'], i['score'],
        i['bbox'][0], i['bbox'][1], i['bbox'][2] + i['bbox'][0], i['bbox'][3] + i['bbox'][1]))
f.close()

这样就会将result.json转化成result.txt，转化的意义在于我更方便用python处理这些格式。

python脚本读取txt将坐标信息画回测试集图片

到此为止，你已经有了CornerNet算法的检测结果(result.txt)，然后还要刚才测试的测试集的图片，有两个东西之后，就差个下面的脚本了，当然还是先看下txt长啥样：

# 图片名  类别id  置信度 x1  y1  x2  y2
00000001 0 0.300000 466.450000 138.720000 502.650000 206.690000
00000001 0 0.150000 466.450000 138.720000 498.860000 170.720000
00000001 0 0.080000 466.530000 154.640000 486.630000 198.660000
00000001 0 0.030000 466.600000 138.700000 502.630000 174.680000
00000001 0 0.020000 243.160000 159.020000 511.140000 166.620000

然后是画框的脚本

# coding:utf-8
'''
将测试生成的txt文件，把文件中对应的box的坐标画回原图
'''
import cv2
import numpy as py
import os

def drawBBox(txt_path, img_path, save_path, first_img):
	global img_id

	img_id = first_img
	with open(txt_path,'r')as fp:
		while(1):
			line = fp.readline()
			if not line:
				print("txt is over!!!")
				break
			str = line.split()
			x = round(float(str[3]))
			y = round(float(str[4]))
			w = round(float(str[5]))
			h = round(float(str[6]))
			ap = round(float(str[2]))
			if ap >= 0.5:
				if str[0] != img_id or img_id == first_img:
					img = cv2.imread(img_path + str[0] + ".jpg")
				else:
					img = cv2.imread(save_path + str[0] + ".jpg")
				if str[1] == '5':
					cv2.rectangle(img,(x,y-22),(x+100,y),(0,255,255), thickness = -1)
					cv2.putText(img, "Pedestrian", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(0,255,255),3,4,0)
				elif str[1] == '0':
					cv2.rectangle(img,(x,y-22),(x+50,y),(0,255,0), thickness = -1)
					cv2.putText(img, "Rider", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(0,255,0),3,4,0)
				elif str[1] == '3':
					cv2.rectangle(img,(x,y-22),(x+130,y),(0,0,255), thickness = -1)
					cv2.putText(img, "Electromobile", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(0,0,255),3,4,0)
				elif str[1] == '6':
					cv2.rectangle(img,(x,y-22),(x+40,y),(255,255,0), thickness = -1)
					cv2.putText(img, "Bike", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(255,255,0),3,4,0)
				elif str[1] == '2':
					cv2.rectangle(img,(x,y-22),(x+90,y),(172,172,0), thickness = -1)
					cv2.putText(img, "Motorbike", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(172,172,0),3,4,0)
				elif str[1] == '4':
					cv2.rectangle(img,(x,y-22),(x+110,y),(172,0,172), thickness = -1)
					cv2.putText(img, "Rider_trunc", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(172,0,172),3,4,0)
				elif str[1] == '1':
					cv2.rectangle(img,(x,y-22),(x+35,y),(255,0,255), thickness = -1)
					cv2.putText(img, "Hat", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))
					cv2.rectangle(img,(x,y),(w,h),(255,0,255),3,4,0)
				
				img_id = str[0]
				cv2.imwrite(save_path + img_id+".jpg", img)
				print(str[0]+".jpg is save....OK!!!")

if __name__ == '__main__':
	# txt存放的路径
	txt_path = "./lsm-CornerNet-Lite.txt"
	# 原图片路径
	img_path = "./test/"
	# 画出来的图片保存的路径
	save_path = "./result/"
	# 测试集第一张图片名
	first_img = "00000001"
	drawBBox(txt_path, img_path, save_path, first_img)
	print("All Done....")

中间一大段的if…elif…是txt中类别id对应的类别名，这个根据自身情况简单修改下就好。

最终实现的效果就是测试集所有的图片都被画上了bbox（每种颜色的框对应不不同的类别）：
在这里插入图片描述

可视化heatmap

这里参考大佬的复现：可视化heatmap，直接按照链接来就能达到效果，里面碰到的第一步对demo.py进行修改，这里的demo.py就是上述方案一的代码。接着我对visualize.py进行了注释:

# -*- coding: utf-8 -*-
import numpy as np
import cv2
import torch

def visualize(image, tl_heat, br_heat):
    # image'size = [2, 3, 1151, 2047]
    # 这个图片的原始尺寸下采样4倍，就变成heatmap的维度，所以测试时不一定是128
    # [2, 7, 288, 512]，这个是不定的，看图片大小
    tl_heat = torch.sigmoid(tl_heat)
    # [2, 7, 288, 512]
    br_heat = torch.sigmoid(br_heat)
    
    
    # 这个colors是一个list，shape为(7, 1, 1, 3)，7是类别数，1，1，3是随机random的
    # 这个作用就是给每个类定制了专属的随机生成的颜色,大概长下面这样
    '''
    [array([[[105, 131, 151]]], dtype=uint8),
     array([[[180, 216, 153]]], dtype=uint8),
     array([[[151, 150, 167]]], dtype=uint8),
     array([[[188, 236, 177]]], dtype=uint8),
     array([[[111, 143, 220]]], dtype=uint8),
     array([[[240, 194, 238]]], dtype=uint8),
     array([[[207, 136, 124]]], dtype=uint8)]
    '''
    colors = [((np.random.random((1, 1, 3)) * 0.6 + 0.4)*255).astype(np.uint8)\
               for _ in range(tl_heat.shape[1])]
    # tl_heat[0] size = [7, 288, 512]
    # 取走第一个batch的特征，配上颜色
    # tl_hm、br_hm的维度均是[h, w, 3]
    tl_hm = _gen_colormap(tl_heat[0].detach().cpu().numpy(), colors)
    br_hm = _gen_colormap(br_heat[0].detach().cpu().numpy(), colors)
    # 标准差和均值
    mean = np.array([0.40789654, 0.44719302, 0.47026115],
                    dtype=np.float32).reshape(3, 1, 1)
    std = np.array([0.28863828, 0.27408164, 0.27809835],
                    dtype=np.float32).reshape(3, 1, 1)
    # 为rgb的图片，每通道乘上标准差加上均值，相当于每通道分配一个数字
    img = (image[0].detach().cpu().numpy() * std + mean) * 255
    # 再把图片transpose成标准的样子
    img = img.astype(np.uint8).transpose(1, 2, 0)

    tl_blend = _blend_img(img, tl_hm)
    br_blend = _blend_img(img, br_hm)
    cv2.imwrite("./tl_heatmap.jpg", tl_blend)
    cv2.imwrite("./br_heatmap.jpg", br_blend)
    print("~~~save heatmaps OK!")

def _gen_colormap(heatmap, colors):
    # 这个heatmap的维度是[7, 288, 512]
    num_classes = heatmap.shape[0]
    h, w = heatmap.shape[1], heatmap.shape[2]
    color_map = np.zeros((h, w, 3), dtype=np.uint8)
    for i in range(num_classes):
        # np.maximum是两个输入进行对比，每次谁大就挑谁的，维度要一致
        # color_map维度[h, w, 3]
        # heatmap[i, :, :, np.newaxis]维度[h, w, 1]
        # colors[i]维度[1, 1, 3]
        # 最终右边这一长串其实是0-255的整型数字
        # 接着循环类别次，color_map一直更新，每次挑maximum的
      color_map = np.maximum(
        color_map, (heatmap[i, :, :, np.newaxis] * colors[i]).astype(np.uint8))
    return color_map


def _blend_img(back, fore, trans=0.7):
    '''
    back = img-->[h*4, w*4, 3]
    fore = tl_hm-->[h, w, 3]
    '''
    if fore.shape[0] != back.shape[0] or fore.shape[0] != back.shape[1]:
      fore = cv2.resize(fore, (back.shape[1], back.shape[0]))
    if len(fore.shape) == 2:
      fore = fore.reshape(fore.shape[0], fore.shape[1], 1)
    # 两幅图像进行合并时，按公式：blended_img = img1 * (1 – alpha) + img2* alpha 进行
    ret = (back * (1. - trans) + fore * trans).astype(np.uint8)
    # 别越界了,ret的大小就是原图的大小
    ret[ret > 255] = 255
    return ret

测试各类别精度

像这种训练COCO数据集的，测试完只能输出类似这样的评估指标：
在这里插入图片描述
能够输出mAP的信息，虽然很多精度，可是各个类别的AP值却没有，因此训练多类别时，这样的信息还不够，我们更想要的是各个类别的具体AP值，这样才能分析哪个类别检测难度大，哪个类别精度高等等。因此需要修改下代码：

在~/CornerNet/db/coco.py中，直接加入新的函数：

def _print_detection_eval_metrics(self, coco_eval):
        IoU_lo_thresh = 0.5
        IoU_hi_thresh = 0.95

        def _get_thr_ind(coco_eval, thr):
          ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
                         (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
          iou_thr = coco_eval.params.iouThrs[ind]
          assert np.isclose(iou_thr, thr)
          return ind

        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
        # precision has dims (iou, recall, cls, area range, max dets)
        # area range index 0: all area ranges
        # max dets index 2: 100 per image
        precision = \
          coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
        ap_default = np.mean(precision[precision > -1])
        print(('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
               '~~~~').format(IoU_lo_thresh, IoU_hi_thresh))
        # print("")
        print('MAP:{:.1f}'.format(100 * ap_default))
        for cls_ind, cls in enumerate(self._classes):
            if cls == '__background__':
                continue
            # minus 1 because of __background__
            # cat_name  = db.class_name(cls_ind)
            # print(cat_name)
            cat_name  = self.class_name(cls)
            # print(cat_name+":")
            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind, 0, 2]
            ap = np.mean(precision[precision > -1])
            print(cat_name+':{:.1f}'.format(100 * ap))

接着直接Ctrl F在代码中定位到coco_eval.evaluate()这一行，在这一行下面新增加两行：

coco_eval.accumulate()
self._print_detection_eval_metrics(coco_eval)

之后重新跑测试的时候，最终就能够输出各个类别的AP值：
在这里插入图片描述