model 代码
class ResNet(nn.Module):
def __init__(self, num_classes, block, layers):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64) #归一化
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
if block == BasicBlock:
fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
self.layer4[layers[3] - 1].conv2.out_channels]
elif block == Bottleneck:
fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
self.layer4[layers[3] - 1].conv3.out_channels]
else:
raise ValueError(f"Block type {block} not understood")
self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
self.regressionModel = RegressionModel(256)
self.classificationModel = ClassificationModel(256, num_classes=num_classes)
self.anchors = Anchors()
self.regressBoxes = BBoxTransform()
self.clipBoxes = ClipBoxes()
self.focalLoss = losses.FocalLoss()
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
#????
prior = 0.01
self.classificationModel.output.weight.data.fill_(0)
self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
self.regressionModel.output.weight.data.fill_(0)
self.regressionModel.output.bias.data.fill_(0)
self.freeze_bn()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = [block(self.inplanes, planes, stride, downsample)]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def freeze_bn(self):
'''Freeze BatchNorm layers.'''
for layer in self.modules():
if isinstance(layer, nn.BatchNorm2d):
layer.eval()
#eval 是评估的缩写
def forward(self, inputs):
if self.training:
img_batch, annotations = inputs
else:
img_batch = inputs
x = self.conv1(img_batch)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x1 = self.layer1(x)
x2 = self.layer2(x1)
x3 = self.layer3(x2)
x4 = self.layer4(x3)
features = self.fpn([x2, x3, x4])
#在第一维拼接
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
anchors = self.anchors(img_batch)
if self.training:
return self.focalLoss(classification, regression, anchors, annotations)
else:
transformed_anchors = self.regressBoxes(anchors, regression)
transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
finalResult = [[], [], []]
finalScores = torch.Tensor([])
finalAnchorBoxesIndexes = torch.Tensor([]).long()
finalAnchorBoxesCoordinates = torch.Tensor([])
if torch.cuda.is_available():
finalScores = finalScores.cuda()
finalAnchorBoxesIndexes = finalAnchorBoxesIndexes.cuda()
finalAnchorBoxesCoordinates = finalAnchorBoxesCoordinates.cuda()
for i in range(classification.shape[2]):
scores = torch.squeeze(classification[:, :, i])
scores_over_thresh = (scores > 0.05)
if scores_over_thresh.sum() == 0:
# no boxes to NMS, just continue
continue
scores = scores[scores_over_thresh]
anchorBoxes = torch.squeeze(transformed_anchors)
anchorBoxes = anchorBoxes[scores_over_thresh]
anchors_nms_idx = nms(anchorBoxes, scores, 0.5)
finalResult[0].extend(scores[anchors_nms_idx])
finalResult[1].extend(torch.tensor([i] * anchors_nms_idx.shape[0]))
finalResult[2].extend(anchorBoxes[anchors_nms_idx])
finalScores = torch.cat((finalScores, scores[anchors_nms_idx]))
finalAnchorBoxesIndexesValue = torch.tensor([i] * anchors_nms_idx.shape[0])
if torch.cuda.is_available():
finalAnchorBoxesIndexesValue = finalAnchorBoxesIndexesValue.cuda()
finalAnchorBoxesIndexes = torch.cat((finalAnchorBoxesIndexes, finalAnchorBoxesIndexesValue))
finalAnchorBoxesCoordinates = torch.cat((finalAnchorBoxesCoordinates, anchorBoxes[anchors_nms_idx]))
return [finalScores, finalAnchorBoxesIndexes, finalAnchorBoxesCoordinates]
#返回最终的得分,最终的锚框索引,最终的锚框相关?
def resnet18(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet18’], model_dir=’.’), strict=False)
return model
def resnet34(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet34’], model_dir=’.’), strict=False)
return model
def resnet50(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet50’], model_dir=’.’), strict=False)
return model
def resnet101(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet101’], model_dir=’.’), strict=False)
return model
def resnet152(num_classes, pretrained=False, **kwargs):
“”“Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
“””
model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls[‘resnet152’], model_dir=’.’), strict=False)
return model
dataloader 代码
from future import print_function, division
import sys
import os
import torch
import numpy as np
import random
import csv
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.sampler import Sampler
from pycocotools.coco import COCO
#skimage.io读出来的数据是numpy格式的,图片处理
#skimage的是(height,width, channel),读出来的图片可以直接img_file2[0][0][0]获得三个通道的像素值
import skimage.io
import skimage.transform
import skimage.color
import skimage
from PIL import Image
#图像处理标准库
class CocoDataset(Dataset):
“”“Coco dataset.”""
def __init__(self, root_dir, set_name='train2017', transform=None):
"""
Args:
root_dir (string): COCO directory.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.root_dir = root_dir
self.set_name = set_name
self.transform = transform
#coco数据集的路径
self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
self.image_ids = self.coco.getImgIds()
self.load_classes()
def load_classes(self):
# load class names (name -> label)
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
# also load the reverse (label -> name)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
def __len__(self):
return len(self.image_ids)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
#path = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
path = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
img = skimage.io.imread(path)
if len(img.shape) == 2:
img = skimage.color.gray2rgb(img)
return img.astype(np.float32)/255.0
def load_annotations(self, image_index):
# get ground truth annotations
#真实注释
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
annotations = np.zeros((0, 5))
#some images appear to miss annotations (like image with id 257034)
#有些图片失去注释
if len(annotations_ids) == 0:
return annotations
# parse annotations
coco_annotations = self.coco.loadAnns(annotations_ids)
for idx, a in enumerate(coco_annotations):
# some annotations have basically no width / height, skip them
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
#不是很明白此处
annotation = np.zeros((1, 5)) #1行5列的矩阵
annotation[0, :4] = a['bbox'] #
annotation[0, 4] = self.coco_label_to_label(a['category_id'])
annotations = np.append(annotations, annotation, axis=0)
# transform from [x, y, w, h] to [x1, y1, x2, y2]
annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
return annotations
def coco_label_to_label(self, coco_label):
return self.coco_labels_inverse[coco_label]
def label_to_coco_label(self, label):
return self.coco_labels[label]
def image_aspect_ratio(self, image_index):
image = self.coco.loadImgs(self.image_ids[image_index])[0]
return float(image['width']) / float(image['height'])
def num_classes(self):
return 80
class CSVDataset(Dataset):
“”“CSV dataset.”""
def __init__(self, train_file, class_list, transform=None):
"""
Args:
train_file (string): CSV file with training annotations
annotations (string): CSV file with class list
test_file (string, optional): CSV file with testing annotations
"""
self.train_file = train_file
self.class_list = class_list #类的列表
self.transform = transform
# parse the provided class file
try:
with self._open_for_csv(self.class_list) as file:
self.classes = self.load_classes(csv.reader(file, delimiter=',')) #以逗号相间隔读取。
except ValueError as e:
raise(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)))
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# csv with img_path, x1, y1, x2, y2, class_name
try:
with self._open_for_csv(self.train_file) as file:
self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
except ValueError as e:
raise(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)))
self.image_names = list(self.image_data.keys())
def _parse(self, value, function, fmt):
"""
将字符串转换成值
Parse a string into a value, and format a nice ValueError if it fails.
Returns `function(value)`.
Any `ValueError` raised is catched and a new `ValueError` is raised
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
"""
try:
return function(value)
except ValueError as e:
raise_from(ValueError(fmt.format(e)), None)
def _open_for_csv(self, path):
"""
Open a file with flags suitable for csv.reader.
打开一个文件 适合csv.reader
This is different for python2 it means with mode 'rb',
for python3 this means 'r' with "universal newlines".
"""
if sys.version_info[0] < 3:
return open(path, 'rb')
else:
return open(path, 'r', newline='')
def load_classes(self, csv_reader):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
class_name, class_id = row
except ValueError:
raise(ValueError('line {}: format should be \'class_name,class_id\''.format(line)))
class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
if class_name in result:
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
result[class_name] = class_id
return result
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
img = skimage.io.imread(self.image_names[image_index])#self.image_names应该是一系列的图像
if len(img.shape) == 2:
img = skimage.color.gray2rgb(img)#转换成灰度图
return img.astype(np.float32)/255.0
def load_annotations(self, image_index):
# get ground truth annotations
annotation_list = self.image_data[self.image_names[image_index]]
annotations = np.zeros((0, 5))
# some images appear to miss annotations (like image with id 257034)
if len(annotation_list) == 0:
return annotations
# parse annotations
for idx, a in enumerate(annotation_list):
# some annotations have basically no width / height, skip them
x1 = a['x1']
x2 = a['x2']
y1 = a['y1']
y2 = a['y2']
if (x2-x1) < 1 or (y2-y1) < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, 0] = x1
annotation[0, 1] = y1
annotation[0, 2] = x2
annotation[0, 3] = y2
annotation[0, 4] = self.name_to_label(a['class'])
annotations = np.append(annotations, annotation, axis=0)
return annotations
def _read_annotations(self, csv_reader, classes):
result = {}
for line, row in enumerate(csv_reader):
line += 1
try:
img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
if img_file not in result:
result[img_file] = []
# If a row contains only an image path, it's an image without annotations.
#如果有一行只包括 图片路径,则是没有注释的图片
if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
continue
x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
# check if the current class name is correctly present
if class_name not in classes:
raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
return result
def name_to_label(self, name):
return self.classes[name]
#有一个classes的数组,一个labels类的数组,分别存储对方 类和标签
def label_to_name(self, label):
return self.labels[label]
def num_classes(self):
return max(self.classes.values()) + 1
#图像宽高比
def image_aspect_ratio(self, image_index):
image = Image.open(self.image_names[image_index])
return float(image.width) / float(image.height)
def collater(data):
imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data] #对于数据中的s 取出scale
widths = [int(s.shape[0]) for s in imgs]
heights = [int(s.shape[1]) for s in imgs]
batch_size = len(imgs) #块的大小是图片的长度
max_width = np.array(widths).max()
max_height = np.array(heights).max()
padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
#batch_size个 立方体(w,h,3)
for i in range(batch_size):
img = imgs[i]
padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
#第i个图像,从0到width,从0到height,填充 r,g,b三个色彩的数值
max_num_annots = max(annot.shape[0] for annot in annots)
##annot是注释吗????
if max_num_annots > 0:
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
if max_num_annots > 0:
for idx, annot in enumerate(annots):
#print(annot.shape)
if annot.shape[0] > 0:
annot_padded[idx, :annot.shape[0], :] = annot
else:
annot_padded = torch.ones((len(annots), 1, 5)) * -1
padded_imgs = padded_imgs.permute(0, 3, 1, 2)
return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
class Resizer(object):
“”“Convert ndarrays in sample to Tensors.从数组转换成tensors向量”""
def __call__(self, sample, min_side=608, max_side=1024):
image, annots = sample['img'], sample['annot']
rows, cols, cns = image.shape
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio 当图片有更大的宽高比
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
# resize the image with the computed scale 计算出的规模调整图像
image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
rows, cols, cns = image.shape
pad_w = 32 - rows%32
pad_h = 32 - cols%32
new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
new_image[:rows, :cols, :] = image.astype(np.float32)
annots[:, :4] *= scale
return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
class Augmenter(object):
“”“Convert ndarrays in sample to Tensors.”""
def __call__(self, sample, flip_x=0.5):
if np.random.rand() < flip_x:
image, annots = sample['img'], sample['annot']
image = image[:, ::-1, :]
rows, cols, channels = image.shape
x1 = annots[:, 0].copy()
x2 = annots[:, 2].copy()
x_tmp = x1.copy()
annots[:, 0] = cols - x2
annots[:, 2] = cols - x_tmp
sample = {'img': image, 'annot': annots}
return sample
#归一化 (image.astype(np.float32)-self.mean)/self.std
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots = sample['img'], sample['annot']
return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
class UnNormalizer(object):
def init(self, mean=None, std=None):
if mean == None:
self.mean = [0.485, 0.456, 0.406]
else:
self.mean = mean
if std == None:
self.std = [0.229, 0.224, 0.225]
else:
self.std = std
#__call__方法的参数就是调用实例时需要的参数
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
return tensor
class AspectRatioBasedSampler(Sampler):
def __init__(self, data_source, batch_size, drop_last):
self.data_source = data_source
self.batch_size = batch_size
self.drop_last = drop_last
self.groups = self.group_images()
def __iter__(self):
random.shuffle(self.groups) #实现了对list元素进行随机排序的一种功能。
#没有返回值
for group in self.groups:
yield group
#yield就是 return 返回一个值,并且记住这个返回的位置,下次迭代就从这个位置后(下一行)开始。
# 带有yield的函数不仅仅只用于for循环中,而且可用于某个函数的参数,只要这个函数的参数允许迭代参数。
def __len__(self):
if self.drop_last:
return len(self.data_source) // self.batch_size
else:
return (len(self.data_source) + self.batch_size - 1) // self.batch_size
def group_images(self):
# determine the order of the images 确定图片的顺序
order = list(range(len(self.data_source)))
order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
#key=lambda 变量:变量[维数] 。维数可以按照自己的需要进行设置。按照宽高比进行排序
# divide into groups, one group = one batch
#range(start, stop[, step])
return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
#这一长串没懂
————————————————————————————
coco.eval()
from pycocotools.cocoeval import COCOeval
import json
import torch
def evaluate_coco(dataset, model, threshold=0.05):
model.eval()
with torch.no_grad():
# start collecting results
results = []
image_ids = []
for index in range(len(dataset)):
data = dataset[index]
scale = data['scale']
# run network
if torch.cuda.is_available():
scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
else:
scores, labels, boxes = model(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
scores = scores.cpu()
labels = labels.cpu()
boxes = boxes.cpu()
# correct boxes for image scale
boxes /= scale
if boxes.shape[0] > 0:
# change to (x, y, w, h) (MS COCO standard)
boxes[:, 2] -= boxes[:, 0]
boxes[:, 3] -= boxes[:, 1]
# compute predicted labels and scores
#for box, score, label in zip(boxes[0], scores[0], labels[0]):
for box_id in range(boxes.shape[0]):
score = float(scores[box_id])
label = int(labels[box_id])
box = boxes[box_id, :]
# scores are sorted, so we can break
if score < threshold:
break
# append detection for each positively labeled class
image_result = {
'image_id' : dataset.image_ids[index],
'category_id' : dataset.label_to_coco_label(label),
'score' : float(score),
'bbox' : box.tolist(),
}
# append detection to results
results.append(image_result)
# append image to list of processed images
image_ids.append(dataset.image_ids[index])
# print progress
print('{}/{}'.format(index, len(dataset)), end='\r')
if not len(results):
return
# write output
json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
# load results in COCO evaluation tool
coco_true = dataset.coco
coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
# run COCO evaluation
coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
model.train()
return