加载数据
dataLoader.py
import torch
import numpy as np
from dataLoader_utils import my_transform, cv2_reader
import cv2
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, root, output_hw, transform=None, loader=None):
# 1.创建data_items
# data_line = 'imagePath xmin1,ymin1,xmax1,ymax1 xmin2,ymin2,xmax2,ymax2 ...'
# data_item = [imagePath, [[xmin1, ymin1, xmax1, ymax1], [xmin2, ymin2, xmax2, ymax2], ...]]
self.data_items = []
data_lines = open(root, 'r')
for data_line in data_lines:
line = data_line.strip().split(' ')
image_path = line[0]
image_boxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
self.data_items.append([image_path, image_boxes])
# 2.初始化一些成员变量
self.output_hw = output_hw
self.transform = transform
self.loader = loader
def __getitem__(self, index):
# 1.读取图像和标注坐标
image_path, image_boxes = self.data_items[index]
image = self.loader(image_path)
# 2.图像变换、标注坐标变换、图像类型由PIL.Image转np.array
if self.transform is not None:
image, boxes = self.transform(image, image_boxes, self.output_hw)
return np.transpose(image, (2, 0, 1)), boxes
def __len__(self):
return len(self.data_items)
if __name__ == '__main__':
input_hw = (300, 300) # 网络需要的尺寸
batch_size = 2
for epoch in range(1):
train_data = MyDataSet(r'train.txt', input_hw, transform=my_transform, loader=cv2_reader)
# 因为每张图像上的目标个数不确定,所以batch_size只能为1。DataLoader自动把np.array转换成tensor
data_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=1, shuffle=False)
# 2.遍历
batch_image = []
batch_boxes = []
for image, boxes in data_loader:
batch_image.append(image) # image.shape=[b, c, h, w]
batch_boxes.append(boxes)
if len(batch_image) == batch_size:
for i, (image, boxes) in enumerate(zip(batch_image, batch_boxes)):
# 保存变换后的图像
image_hwc = image.squeeze().permute(1, 2, 0).numpy().astype(np.uint8) # bchw -> hwc
image_hwc = cv2.cvtColor(image_hwc, cv2.COLOR_RGB2BGR)
cv2.imwrite(str(i) + '_20201223.jpg', image_hwc)
# 输出变换后的框坐标
print(boxes)
batch_image.clear()
batch_boxes.clear()
dataLoader_utils.py
from PIL import Image
import numpy as np
import cv2
MEANS = (104, 117, 123) # RGB
def cv2_reader(image_path):
# 使用默认参数读取会忽略透明度通道。返回值类型是numpy.ndarray。
image = cv2.imread(image_path)
# 把默认BGR转为RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image # hwc
# 左闭右开
def rand(a=0.0, b=1.0):
return np.random.rand() * (b - a) + a
# type(image_data) = type(box_data) = numpy.ndarray, type(image_size) = tuple
def means_normalize(image_data, box_data, image_size):
# 1.图像减均值
image_data = image_data - MEANS
# 2.标注框归一化
boxes = np.array(box_data[:, :4], dtype=np.float32)
boxes[:, 0] = boxes[:, 0] / image_size[0] # xmin / w
boxes[:, 1] = boxes[:, 1] / image_size[1] # ymin / h
boxes[:, 2] = boxes[:, 2] / image_size[0] # xmax / w
boxes[:, 3] = boxes[:, 3] / image_size[1] # ymax / h
# 3.保证标注框合理性
boxes = np.maximum(np.minimum(boxes, 1), 0)
if ((boxes[:, 3] - boxes[:, 1]) <= 0).any() and ((boxes[:, 2] - boxes[:, 0]) <= 0).any():
return image_data, np.array([])
# 4.坐标后加类别
box_data = np.concatenate([boxes, box_data[:, -1:]], axis=-1)
return image_data, box_data
# type(image) = type(image_boxes) = numpy.ndarray, type(input_hw) = tuple
def my_transform(image, boxes, output_hw, jitter=0.1, hsv=(0.1, 1.1, 1.1)):
input_hw = image.shape[:2]
# 1.1左右翻转
params_flip = True if rand() < 0.5 else False
if params_flip:
image = cv2.flip(image, 1)
# 1.2横纵比例变换
params_new_hw1 = (round(input_hw[0] * rand(0.95, 1.05)),
round(input_hw[1] * rand(0.95, 1.05)))
image = cv2.resize(image, params_new_hw1[::-1], cv2.INTER_CUBIC)
# 1.3固定大小
ratio = min(np.divide(output_hw, params_new_hw1))
params_new_hw2 = tuple(np.multiply(params_new_hw1, 0.95 * ratio).astype(np.int))
image = cv2.resize(image, params_new_hw2[::-1], cv2.INTER_CUBIC)
params_dx_dy = int(round(rand(0, output_hw[1] - params_new_hw2[1]))), \
int(round(rand(0, output_hw[0] - params_new_hw2[0])))
image_pil = Image.fromarray(image)
new_image = Image.new('RGB', output_hw, (255, 0, 0))
new_image.paste(image_pil, params_dx_dy)
image = np.array(new_image)
# 标注框跟随图像做相应变动 xmin ymin xmax ymax
# 2.1左右翻转
if params_flip:
boxes[:, :3:2] = input_hw[1] - boxes[:, 2::-2]
# 2.2横纵比例变换
ratio_hw1 = np.divide(params_new_hw1, input_hw)
boxes[:, 0:3:2] = np.multiply(boxes[:, 0:3:2], ratio_hw1[1])
boxes[:, 1:4:2] = np.multiply(boxes[:, 1:4:2], ratio_hw1[0])
# 2.3固定大小
ratio_hw2 = np.divide(params_new_hw2, params_new_hw1)
boxes[:, 0:3:2] = np.multiply(boxes[:, 0:3:2], ratio_hw2[1]) + params_dx_dy[0]
boxes[:, 1:4:2] = np.multiply(boxes[:, 1:4:2], ratio_hw2[0]) + params_dx_dy[1]
# 2.3取整
boxes = np.around(boxes).astype(np.int)
return image, boxes