2020-11-15

最新推荐文章于 2024-10-08 10:21:44 发布

April leander

最新推荐文章于 2024-10-08 10:21:44 发布

阅读量270

点赞数

分类专栏：目标跟踪文章标签：深度学习

本文链接：https://blog.youkuaiyun.com/weixin_40471107/article/details/109710795

版权

目标跟踪专栏收录该内容

9 篇文章

订阅专栏

model 代码
class ResNet(nn.Module):

def __init__(self, num_classes, block, layers):
    self.inplanes = 64
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.bn1 = nn.BatchNorm2d(64)  #归一化
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

    if block == BasicBlock:
        fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
                     self.layer4[layers[3] - 1].conv2.out_channels]
    elif block == Bottleneck:
        fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
                     self.layer4[layers[3] - 1].conv3.out_channels]
    else:
        raise ValueError(f"Block type {block} not understood")

    self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

    self.regressionModel = RegressionModel(256)
    self.classificationModel = ClassificationModel(256, num_classes=num_classes)

    self.anchors = Anchors()

    self.regressBoxes = BBoxTransform()

    self.clipBoxes = ClipBoxes()

    self.focalLoss = losses.FocalLoss()
    
    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()
    
    #？？？？
    prior = 0.01

    self.classificationModel.output.weight.data.fill_(0)
    self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))

    self.regressionModel.output.weight.data.fill_(0)
    self.regressionModel.output.bias.data.fill_(0)

    self.freeze_bn()

def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
        downsample = nn.Sequential(
            nn.Conv2d(self.inplanes, planes * block.expansion,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(planes * block.expansion),
        )

    layers = [block(self.inplanes, planes, stride, downsample)]
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
        layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

def freeze_bn(self):
    '''Freeze BatchNorm layers.'''
    for layer in self.modules():
        if isinstance(layer, nn.BatchNorm2d):
            layer.eval()

#eval 是评估的缩写
def forward(self, inputs):

    if self.training:
        img_batch, annotations = inputs
    else:
        img_batch = inputs

    x = self.conv1(img_batch)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x1 = self.layer1(x)
    x2 = self.layer2(x1)
    x3 = self.layer3(x2)
    x4 = self.layer4(x3)

    features = self.fpn([x2, x3, x4])
    
    #在第一维拼接
    regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)

    classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)

    anchors = self.anchors(img_batch)
    
    if self.training:
        return self.focalLoss(classification, regression, anchors, annotations)
    else:
        transformed_anchors = self.regressBoxes(anchors, regression)
        transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)

        finalResult = [[], [], []]

        finalScores = torch.Tensor([])
        finalAnchorBoxesIndexes = torch.Tensor([]).long()
        finalAnchorBoxesCoordinates = torch.Tensor([])

        if torch.cuda.is_available():
            finalScores = finalScores.cuda()
            finalAnchorBoxesIndexes = finalAnchorBoxesIndexes.cuda()
            finalAnchorBoxesCoordinates = finalAnchorBoxesCoordinates.cuda()

        for i in range(classification.shape[2]):
            scores = torch.squeeze(classification[:, :, i])
            scores_over_thresh = (scores > 0.05)
            if scores_over_thresh.sum() == 0:
                # no boxes to NMS, just continue
                continue

            scores = scores[scores_over_thresh]
            anchorBoxes = torch.squeeze(transformed_anchors)
            anchorBoxes = anchorBoxes[scores_over_thresh]
            anchors_nms_idx = nms(anchorBoxes, scores, 0.5)

            finalResult[0].extend(scores[anchors_nms_idx])
            finalResult[1].extend(torch.tensor([i] * anchors_nms_idx.shape[0]))
            finalResult[2].extend(anchorBoxes[anchors_nms_idx])

            finalScores = torch.cat((finalScores, scores[anchors_nms_idx]))
            finalAnchorBoxesIndexesValue = torch.tensor([i] * anchors_nms_idx.shape[0])
            if torch.cuda.is_available():
                finalAnchorBoxesIndexesValue = finalAnchorBoxesIndexesValue.cuda()

            finalAnchorBoxesIndexes = torch.cat((finalAnchorBoxesIndexes, finalAnchorBoxesIndexesValue))
            finalAnchorBoxesCoordinates = torch.cat((finalAnchorBoxesCoordinates, anchorBoxes[anchors_nms_idx]))

        return [finalScores, finalAnchorBoxesIndexes, finalAnchorBoxesCoordinates]
        #返回最终的得分，最终的锚框索引，最终的锚框相关？

def resnet18(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet18’], model_dir=’.’), strict=False)
return model

def resnet34(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet34’], model_dir=’.’), strict=False)
return model

def resnet50(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet50’], model_dir=’.’), strict=False)
return model

def resnet101(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet101’], model_dir=’.’), strict=False)
return model

def resnet152(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet152’], model_dir=’.’), strict=False)
return model

dataloader 代码
from future import print_function, division
import sys
import os
import torch
import numpy as np
import random
import csv

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.sampler import Sampler

from pycocotools.coco import COCO

#skimage.io读出来的数据是numpy格式的，图片处理
#skimage的是(height,width, channel)，读出来的图片可以直接img_file2[0][0][0]获得三个通道的像素值
import skimage.io
import skimage.transform
import skimage.color
import skimage

from PIL import Image
#图像处理标准库

class CocoDataset(Dataset):
“”“Coco dataset.”""

def __init__(self, root_dir, set_name='train2017', transform=None):
    """
    Args:
        root_dir (string): COCO directory.
        transform (callable, optional): Optional transform to be applied
            on a sample.
    """
    self.root_dir = root_dir
    self.set_name = set_name
    self.transform = transform
    
    #coco数据集的路径
    self.coco      = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
    self.image_ids = self.coco.getImgIds()

    self.load_classes()

def load_classes(self):
    # load class names (name -> label)
    categories = self.coco.loadCats(self.coco.getCatIds())
    categories.sort(key=lambda x: x['id'])

    self.classes             = {}
    self.coco_labels         = {}
    self.coco_labels_inverse = {}

    for c in categories:
        self.coco_labels[len(self.classes)] = c['id']
        self.coco_labels_inverse[c['id']] = len(self.classes)
        self.classes[c['name']] = len(self.classes)

    # also load the reverse (label -> name)
    self.labels = {}
    for key, value in self.classes.items():
        self.labels[value] = key

def __len__(self):
    return len(self.image_ids)

def __getitem__(self, idx):

    img = self.load_image(idx)
    annot = self.load_annotations(idx)
    sample = {'img': img, 'annot': annot}
    if self.transform:
        sample = self.transform(sample)

    return sample

def load_image(self, image_index):
    image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
    #path       = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
    path       = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
    img = skimage.io.imread(path)

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)

    return img.astype(np.float32)/255.0

def load_annotations(self, image_index):
    # get ground truth annotations
    #真实注释
    annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
    annotations     = np.zeros((0, 5))
    
    #some images appear to miss annotations (like image with id 257034)
    #有些图片失去注释
    if len(annotations_ids) == 0:
        return annotations

    # parse annotations
    coco_annotations = self.coco.loadAnns(annotations_ids)
    for idx, a in enumerate(coco_annotations):

        # some annotations have basically no width / height, skip them
        if a['bbox'][2] < 1 or a['bbox'][3] < 1:
            continue
       #不是很明白此处
        annotation        = np.zeros((1, 5)) #1行5列的矩阵
        annotation[0, :4] = a['bbox']  #
        annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
        annotations       = np.append(annotations, annotation, axis=0)

    # transform from [x, y, w, h] to [x1, y1, x2, y2]
    annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
    annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

    return annotations

def coco_label_to_label(self, coco_label):
    return self.coco_labels_inverse[coco_label]


def label_to_coco_label(self, label):
    return self.coco_labels[label]
 
def image_aspect_ratio(self, image_index):
    image = self.coco.loadImgs(self.image_ids[image_index])[0]
    return float(image['width']) / float(image['height'])

def num_classes(self):
    return 80

class CSVDataset(Dataset):
“”“CSV dataset.”""

def __init__(self, train_file, class_list, transform=None):
    """
    Args:
        train_file (string): CSV file with training annotations
        annotations (string): CSV file with class list
        test_file (string, optional): CSV file with testing annotations
    """
    self.train_file = train_file
    self.class_list = class_list   #类的列表
    self.transform = transform

    # parse the provided class file
    try:
        with self._open_for_csv(self.class_list) as file:
            self.classes = self.load_classes(csv.reader(file, delimiter=',')) #以逗号相间隔读取。
    except ValueError as e:
        raise(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)))

    self.labels = {}
    for key, value in self.classes.items():
        self.labels[value] = key

    # csv with img_path, x1, y1, x2, y2, class_name
    try:
        with self._open_for_csv(self.train_file) as file:
            self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
    except ValueError as e:
        raise(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)))
    self.image_names = list(self.image_data.keys())

def _parse(self, value, function, fmt):
    """
    将字符串转换成值
    Parse a string into a value, and format a nice ValueError if it fails.
    Returns `function(value)`.
    Any `ValueError` raised is catched and a new `ValueError` is raised
    with message `fmt.format(e)`, where `e` is the caught `ValueError`.
    """
    try:
        return function(value)
    except ValueError as e:
        raise_from(ValueError(fmt.format(e)), None)

def _open_for_csv(self, path):
    """
    Open a file with flags suitable for csv.reader.
    打开一个文件 适合csv.reader
    This is different for python2 it means with mode 'rb',
    for python3 this means 'r' with "universal newlines".
    """
    if sys.version_info[0] < 3:
        return open(path, 'rb')
    else:
        return open(path, 'r', newline='')

def load_classes(self, csv_reader):
    result = {}

    for line, row in enumerate(csv_reader):
        line += 1

        try:
            class_name, class_id = row
        except ValueError:
            raise(ValueError('line {}: format should be \'class_name,class_id\''.format(line)))
        class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))

        if class_name in result:
            raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
        result[class_name] = class_id
    return result

def __len__(self):
    return len(self.image_names)

def __getitem__(self, idx):

    img = self.load_image(idx)
    annot = self.load_annotations(idx)
    sample = {'img': img, 'annot': annot}
    if self.transform:
        sample = self.transform(sample)

    return sample

def load_image(self, image_index):
    img = skimage.io.imread(self.image_names[image_index])#self.image_names应该是一系列的图像

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)#转换成灰度图

    return img.astype(np.float32)/255.0

def load_annotations(self, image_index):
    # get ground truth annotations
    annotation_list = self.image_data[self.image_names[image_index]]
    annotations     = np.zeros((0, 5))

    # some images appear to miss annotations (like image with id 257034)
    if len(annotation_list) == 0:
        return annotations

    # parse annotations
    for idx, a in enumerate(annotation_list):
        # some annotations have basically no width / height, skip them
        x1 = a['x1']
        x2 = a['x2']
        y1 = a['y1']
        y2 = a['y2']

        if (x2-x1) < 1 or (y2-y1) < 1:
            continue

        annotation        = np.zeros((1, 5))
        
        annotation[0, 0] = x1
        annotation[0, 1] = y1
        annotation[0, 2] = x2
        annotation[0, 3] = y2

        annotation[0, 4]  = self.name_to_label(a['class'])
        annotations       = np.append(annotations, annotation, axis=0)

    return annotations

def _read_annotations(self, csv_reader, classes):
    result = {}
    for line, row in enumerate(csv_reader):
        line += 1

        try:
            img_file, x1, y1, x2, y2, class_name = row[:6]
        except ValueError:
            raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)

        if img_file not in result:
            result[img_file] = []

        # If a row contains only an image path, it's an image without annotations.
        #如果有一行只包括 图片路径，则是没有注释的图片
        if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
            continue

        x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
        y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
        x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
        y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))

        # Check that the bounding box is valid.
        if x2 <= x1:
            raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
        if y2 <= y1:
            raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))

        # check if the current class name is correctly present
        if class_name not in classes:
            raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))

        result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
    return result

def name_to_label(self, name):
    return self.classes[name]
#有一个classes的数组，一个labels类的数组，分别存储对方  类和标签
def label_to_name(self, label):
    return self.labels[label]

def num_classes(self):
    return max(self.classes.values()) + 1

#图像宽高比
def image_aspect_ratio(self, image_index):
    image = Image.open(self.image_names[image_index])
    return float(image.width) / float(image.height)

def collater(data):

imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data] #对于数据中的s 取出scale
    
widths = [int(s.shape[0]) for s in imgs]
heights = [int(s.shape[1]) for s in imgs]
batch_size = len(imgs) #块的大小是图片的长度

max_width = np.array(widths).max()  
max_height = np.array(heights).max()

padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
#batch_size个 立方体（w,h,3)

for i in range(batch_size):
    img = imgs[i]
    padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
    #第i个图像，从0到width,从0到height,填充 r,g,b三个色彩的数值
max_num_annots = max(annot.shape[0] for annot in annots)
##annot是注释吗？？？？

if max_num_annots > 0:

    annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1

    if max_num_annots > 0:
        for idx, annot in enumerate(annots):
            #print(annot.shape)
            if annot.shape[0] > 0:
                annot_padded[idx, :annot.shape[0], :] = annot
else:
    annot_padded = torch.ones((len(annots), 1, 5)) * -1


padded_imgs = padded_imgs.permute(0, 3, 1, 2)

return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}

class Resizer(object):
“”“Convert ndarrays in sample to Tensors.从数组转换成tensors向量”""

def __call__(self, sample, min_side=608, max_side=1024):
    image, annots = sample['img'], sample['annot']

    rows, cols, cns = image.shape

    smallest_side = min(rows, cols)

    # rescale the image so the smallest side is min_side
    scale = min_side / smallest_side

    # check if the largest side is now greater than max_side, which can happen
    # when images have a large aspect ratio 当图片有更大的宽高比
    largest_side = max(rows, cols)

    if largest_side * scale > max_side:
        scale = max_side / largest_side

    # resize the image with the computed scale  计算出的规模调整图像
    image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
    rows, cols, cns = image.shape

    pad_w = 32 - rows%32
    pad_h = 32 - cols%32

    new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
    new_image[:rows, :cols, :] = image.astype(np.float32)

    annots[:, :4] *= scale

    return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}

class Augmenter(object):
“”“Convert ndarrays in sample to Tensors.”""

def __call__(self, sample, flip_x=0.5):

    if np.random.rand() < flip_x:
        image, annots = sample['img'], sample['annot']
        image = image[:, ::-1, :]

        rows, cols, channels = image.shape

        x1 = annots[:, 0].copy()
        x2 = annots[:, 2].copy()
        
        x_tmp = x1.copy()

        annots[:, 0] = cols - x2
        annots[:, 2] = cols - x_tmp

        sample = {'img': image, 'annot': annots}

    return sample

#归一化 (image.astype(np.float32)-self.mean)/self.std
class Normalizer(object):

def __init__(self):
    self.mean = np.array([[[0.485, 0.456, 0.406]]])
    self.std = np.array([[[0.229, 0.224, 0.225]]])

def __call__(self, sample):

    image, annots = sample['img'], sample['annot']

    return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}

class UnNormalizer(object):
def init(self, mean=None, std=None):
if mean == None:
self.mean = [0.485, 0.456, 0.406]
else:
self.mean = mean
if std == None:
self.std = [0.229, 0.224, 0.225]
else:
self.std = std

#__call__方法的参数就是调用实例时需要的参数
def __call__(self, tensor):
    """
    Args:
        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
    Returns:
        Tensor: Normalized image.
    """
    for t, m, s in zip(tensor, self.mean, self.std):
        t.mul_(s).add_(m)
    return tensor

class AspectRatioBasedSampler(Sampler):

def __init__(self, data_source, batch_size, drop_last):
    self.data_source = data_source
    self.batch_size = batch_size
    self.drop_last = drop_last
    self.groups = self.group_images()

def __iter__(self):
    random.shuffle(self.groups) #实现了对list元素进行随机排序的一种功能。
    #没有返回值
    for group in self.groups:
        yield group

#yield就是 return 返回一个值，并且记住这个返回的位置，下次迭代就从这个位置后(下一行)开始。
# 带有yield的函数不仅仅只用于for循环中，而且可用于某个函数的参数，只要这个函数的参数允许迭代参数。

def __len__(self):
    if self.drop_last:
        return len(self.data_source) // self.batch_size
    else:
        return (len(self.data_source) + self.batch_size - 1) // self.batch_size

def group_images(self):
    # determine the order of the images 确定图片的顺序
    order = list(range(len(self.data_source)))
    order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
    #key=lambda  变量：变量[维数] 。维数可以按照自己的需要进行设置。按照宽高比进行排序

    # divide into groups, one group = one batch
    #range(start, stop[, step])
    return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
    #这一长串没懂

————————————————————————————
coco.eval()
from pycocotools.cocoeval import COCOeval
import json
import torch

def evaluate_coco(dataset, model, threshold=0.05):

model.eval()

with torch.no_grad():

    # start collecting results
    results = []
    image_ids = []

    for index in range(len(dataset)):
        data = dataset[index]
        scale = data['scale']

        # run network
        if torch.cuda.is_available():
            scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
        else:
            scores, labels, boxes = model(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
        scores = scores.cpu()
        labels = labels.cpu()
        boxes  = boxes.cpu()

        # correct boxes for image scale
        boxes /= scale

        if boxes.shape[0] > 0:
            # change to (x, y, w, h) (MS COCO standard)
            boxes[:, 2] -= boxes[:, 0]
            boxes[:, 3] -= boxes[:, 1]

            # compute predicted labels and scores
            #for box, score, label in zip(boxes[0], scores[0], labels[0]):
            for box_id in range(boxes.shape[0]):
                score = float(scores[box_id])
                label = int(labels[box_id])
                box = boxes[box_id, :]

                # scores are sorted, so we can break
                if score < threshold:
                    break

                # append detection for each positively labeled class
                image_result = {
                    'image_id'    : dataset.image_ids[index],
                    'category_id' : dataset.label_to_coco_label(label),
                    'score'       : float(score),
                    'bbox'        : box.tolist(),
                }

                # append detection to results
                results.append(image_result)

        # append image to list of processed images
        image_ids.append(dataset.image_ids[index])

        # print progress
        print('{}/{}'.format(index, len(dataset)), end='\r')

    if not len(results):
        return

    # write output
    json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)

    # load results in COCO evaluation tool
    coco_true = dataset.coco
    coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))

    # run COCO evaluation
    coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
    coco_eval.params.imgIds = image_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    model.train()

    return