coco数据集是我们计算机视觉中最常用的数据集,因此我们需要知道并且熟悉其处理与评估
首先看看,coco2017数据集文件
├── coco2017: 数据集根目录
├── train2017: 所有训练图像文件夹(118287张)
├── val2017: 所有验证图像文件夹(5000张)
└── annotations: 对应标注文件夹
├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件
├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件
├── captions_train2017.json: 对应图像描述的训练集标注文件
├── captions_val2017.json: 对应图像描述的验证集标注文件
├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件
└── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹
1. coco数据集的处理
代码:
"""COCO Dataset Classes"""
import os
import numpy as np
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from utils.path import COCO2017_path, COCO2014_path
from skimage import io, color
COCO_2014_ROOT = COCO2014_path
COCO_ROOT = COCO2017_path
COCO_CLASSES = [
'person',
'bicycle',
'car',
'motorcycle',
'airplane',
'bus',
'train',
'truck',
'boat',
'traffic light',
'fire hydrant',
'stop sign',
'parking meter',
'bench',
'bird',
'cat',
'dog',
'horse',
'sheep',
'cow',
'elephant',
'bear',
'zebra',
'giraffe',
'backpack',
'umbrella',
'handbag',
'tie',
'suitcase',
'frisbee',
'skis',
'snowboard',
'sports ball',
'kite',
'baseball bat',
'baseball glove',
'skateboard',
'surfboard',
'tennis racket',
'bottle',
'wine glass',
'cup',
'fork',
'knife',
'spoon',
'bowl',
'banana',
'apple',
'sandwich',
'orange',
'broccoli',
'carrot',
'hot dog',
'pizza',
'donut',
'cake',
'chair',
'couch',
'potted plant',
'bed',
'dining table',
'toilet',
'tv',
'laptop',
'mouse',
'remote',
'keyboard',
'cell phone',
'microwave',
'oven',
'toaster',
'sink',
'refrigerator',
'book',
'clock',
'vase',
'scissors',
'teddy bear',
'hair drier',
'toothbrush',
]
colors = [
(39, 129, 113),
(164, 80, 133),
(83, 122, 114),
(99, 81, 172),
(95, 56, 104),
(37, 84, 86),
(14, 89, 122),
(80, 7, 65),
(10, 102, 25),
(90, 185, 109),
(106, 110, 132),
(169, 158, 85),
(188, 185, 26),
(103, 1, 17),
(82, 144, 81),
(92, 7, 184),
(49, 81, 155),
(179, 177, 69),
(93, 187, 158),
(13, 39, 73),
(12, 50, 60),
(16, 179, 33),
(112, 69, 165),
(15, 139, 63),
(33, 191, 159),
(182, 173, 32),
(34, 113, 133),
(90, 135, 34),
(53, 34, 86),
(141, 35, 190),
(6, 171, 8),
(118, 76, 112),
(89, 60, 55),
(15, 54, 88),
(112, 75, 181),
(42, 147, 38),
(138, 52, 63),
(128, 65, 149),
(106, 103, 24),
(168, 33, 45),
(28, 136, 135),
(86, 91, 108),
(52, 11, 76),
(142, 6, 189),
(57, 81, 168),
(55, 19, 148),
(182, 101, 89),
(44, 65, 179),
(1, 33, 26),
(122, 164, 26),
(70, 63, 134),
(137, 106, 82),
(120, 118, 52),
(129, 74, 42),
(182, 147, 112),
(22, 157, 50),
(56, 50, 20),
(2, 22, 177),
(156, 100, 106),
(21, 35, 42),
(13, 8, 121),
(142, 92, 28),
(45, 118, 33),
(105, 118, 30),
(7, 185, 124),
(46, 34, 146),
(105, 184, 169),
(22, 18, 5),
(147, 71, 73),
(181, 64, 91),
(31, 39, 184),
(164, 179, 33),
(96, 50, 18),
(95, 15, 106),
(113, 68, 54),
(136, 116, 112),
(119, 139, 130),
(31, 139, 34),
(66, 6, 127),
(62, 39, 2),
(49, 99, 180),
(49, 119, 155),
(153, 50, 183),
(125, 38, 3),
(129, 87, 143),
(49, 87, 40),
(128, 62, 120),
(73, 85, 148),
(28, 144, 118),
(29, 9, 24),
(175, 45, 108),
(81, 175, 64),
(178, 19, 157),
(74, 188, 190),
(18, 114, 2),
(62, 128, 96),
(21, 3, 150),
(0, 6, 95),
(2, 20, 184),
(122, 37, 185),
]
class CocoDetection(Dataset):
def __init__(self,
root_dir,
set_name='train2017',
transform=None):
self.root_dir = root_dir
self.set_name = set_name
self.transform = transform
# annotations
self.coco = COCO(
os.path.join(self.root_dir,
'annotations',
'instances_' + self.set_name + '.json'))
self.ids = self.coco.getImgIds()
self.load_classes()
def load_classes(self):
# load class name (name->label)
self.cat_ids = self.coco.getCatIds()
self.categories = self.coco.loadCats(self.cat_ids)
self.categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in self.categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
# also load the reverse (label -> name)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
def __len__(self):
return len(self.ids)
def __getitem__(self, idx):
"""
params: idx
return: img, annots, scale
"""
# idx name
# annots == targets
img = self.load_image(idx)
annot = self.load_annots(idx)
if self.transform:
img, annot = self.transform(img, annot)
return img, annot
def load_image(self, idx):
# image_info: {'license': 3, 'file_name': '000000288174.jpg',
# 'coco_url': 'http://images.cocodataset.org/train2017/000000288174.jpg',
# 'height': 427, 'width': 640, 'date_captured': '2013-11-19 23:47:46',
# 'flickr_url': 'http://farm6.staticflickr.com/5101/5651186170_9ff5af6e3e_z.jpg',
# 'id': 288174}
image_info = self.coco.loadImgs(self.ids[idx])[0]
path = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
# path: /data/public/coco2017/train2017/000000288174.jpg
img = io.imread(path)
# height, width, channels = img.shape
if len(img.shape) == 2:
img = color.gray2rgb(img)
# <class 'numpy.ndarray'>
return img.astype(np.float32) / 255.0
def load_annots(self, idx):
# get ground truth annotations
annot_ids = self.coco.getAnnIds(imgIds=self.ids[idx], iscrowd=False)
# parse annotations
annots = np.zeros((0, 5))
if len(annot_ids) == 0:
return annots
# parse annotations
coco_annots = self.coco.loadAnns(annot_ids)
for idx, a in enumerate(coco_annots):
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
annot = np.zeros((1, 5))
annot[0, :4] = a['bbox']
annot[0, 4] = self.coco_label_to_label(a['category_id'])
annots = np.append(annots, annot, axis=0)
# [x, y, w, h] -> [x1, y1, x2, y2]
annots[:, 2] = annots[:, 0] + annots[:, 2]
annots[:, 3] = annots[:, 1] + annots[:, 3]
# annot = [x_min, y_min, x_max, y_max, id]
return annots
def coco_label_to_label(self, category_id):
return self.coco_labels_inverse[category_id]
def label_to_coco_label(self, coco_label):
return self.coco_labels[coco_label]
def image_aspect_ratio(self, idx):
image = self.coco.loadImgs(self.ids[idx])[0]
return float(image['width']) / float(image['height'])
分析:
首先,CocoDetection继承了Dataset
class CocoDetection(Dataset)
其重要的四个函数:
1. def __len__(self) Coco数据集的统计(共有多少张图)
def __len__(self):
return len(self.ids)
2. __getitem__(self, idx)根据Coco的idx来处理每张图像:img = self.load_image(idx)和每张图像的ground truth:annot = self.load_annots(idx),之后在将图像进行预处理:img, annot = self.transform(img, annot)
def __getitem__(self, idx):
"""
params: idx
return: img, annots, scale
"""
# idx name
# annots == targets
img = self.load_image(idx)
annot = self.load_annots(idx)
if self.transform:
img, annot = self.transform(img, annot)
return img, annot
3. load_image(self, idx)处理图像使用skimage,则img.shape为height,width,再将灰度图像转化为RGB
4. load_annots(self, idx)处理图像ground truth: coco_annots = self.coco.loadAnns(annot_ids)解析coco ground truth,再将其分解成[x_min, y_min, x_max, y_max, id]
2. coco数据集的评估
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.177
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.286
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.184
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.080
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.181
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.258
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.224
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.388
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.437
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.229
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.482
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.610
官方解释:
COCO数据集评估https://cocodataset.org/#detections-eval
1. Average Precision(AP):
a. AP: % AP at IoU=.50:.05:.95 (primary challenge metric)
b. AP^IoU=.50 : % AP at IoU=.50 (PASCAL VOC metric)
c. AP^IoU=.75: % AP at IoU=.75 (strict metric)
2. AP Across Scales:
a. AP^small: % AP for small objects: area < 32^2
b. AP^ medium: % AP for medium objects: 32^2 < area < 96^2
c. AP^large: % AP for large objects: area > 96^2
3. Average Recall (AR):
a. AR^max=1: % AR given 1 detection per image
b. AR^max=10: % AR given 10 detection per image
c. AR^max=100: % AR given 100 detection per image
4. AR Across Scales:
a. AR^small: % AR for small objects: area < 32^2
b. AR^medium: % AR for medium objects: 32^2 < area < 96^2
c. AR^large: % AR for large objects: area > 96^2
1.a. 最常用的指标,取每隔0.05,计算mAP(平均精确度)
1.b. VOC的评判标准,的mAP
1.c. 评判较为严格的mAP(),可以反应算法框的位置精准程度
2.不同尺寸物体的mAP,若出现-1表示没有检测出来此尺寸的物体或者没有此尺寸的物体
2.a. 小物体的mAP
2.b. 中等物体的mAP
2.c. 大物体的mAP
3. 与 4. 计算AR(平均召回率),可以用来对模型进行预测是否收敛,是否继续训练
3. 中maxDets=[1,10,100]表示置信度排名前1,10,100的预测框,根据这些预测框与ground truth框进行对比计算AR
3.a. 置信度排名第1的AR
3.b. 置信度排名前10的AR
3.c. 置信度排名前100的AR
如上所述:
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.224
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.388
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.437
置信度排名前100的AR与置信度排名前100的AR、置信度排名第1的AR相差很多,则说明模型没有收敛还可以继续训练。
4.a. 小物体的AR
4.b. 中等物体的AR
4.c. 大物体的AR
如上述:
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.229
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.482
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.610
小物体的AR与中等物体的AR、大物体的AR相差很多,则说明模型没有收敛还可以继续训练。
参考:
COCO评价指标https://blog.youkuaiyun.com/wss794/article/details/122408175?utm_medium=distribute.pc_aggpage_search_result.none-task-blog-2~aggregatepage~first_rank_ecpm_v1~rank_v31_ecpm-1-122408175-null-null.pc_agg_new_rank&utm_term=coco%E8%AF%84%E4%BB%B7%E6%8C%87%E6%A0%87&spm=1000.2123.3001.4430目标检测之coco评价指标
https://blog.youkuaiyun.com/ThomasCai001/article/details/120097650修改COCO评价指标 maxDets=[10,15,20]
https://blog.youkuaiyun.com/weixin_42899627/article/details/120689553