1. 标注自己的数据,包括原图jpg/png和json文件
2. 按照下面的结构组织数据
- ImageSets
- Segmentation
- train.txt
- trainval.txt
- val.txt
- JPEGImages
- SegmentationClass
ImageSets目录内单放一个Segmentation文件夹,然后Segmentation目录下需要制作3个txt文件:train.txt,trainval.txt,val.txt 用来表示训练集,验证集,测试集的划分信息。
2.1 结构说明
- JPEGImages 目录内用于存放图像数据集的原图。
- SegmentationClass 目录放置的mask图像,mask是原图根据标注信息json文件生成的,生成方式后面介绍,注意mask的图像与原图的名称一一对应。
2.2 生成3个txt文件
import os
import numpy as np
root = r"D:\dataset\belt\JPEGImages"
output = r"D:\dataset\belt\ImageSets\Segmentation"
filename = []
#从存放原图的目录中遍历所有图像文件
# dirs = os.listdir(root)
for root, dir, files in os.walk(root):
for file in files:
print(file)
filename.append(file[:-4]) # 去除后缀,存储
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]
#分别写入train.txt, test.txt
with open(os.path.join(output,'train.txt'), 'w') as f1, open(os.path.join(output,'trainval.txt'), 'w') as f2,open(os.path.join(output,'val.txt'), 'w') as f3:
for i in train:
f1.write(i + '\n')
for i in trainval:
f2.write(i + '\n')
for i in val:
f3.write(i + '\n')
print('成功!')
2.3 根据json,制作对应的mask图像,labelme to voc
# labelme2voc.py
#!/usr/bin/env python
from __future__ import print_function
import argparse
import glob
import os
import os.path as osp
import sys
import imgviz
import numpy as np
import labelme
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("input_dir", help="input annotated directory")
parser.add_argument("output_dir", help="output dataset directory")
parser.add_argument("--labels", help="labels file", required=True)
parser.add_argument(
"--noviz", help="no visualization", action="store_true"
)
args = parser.parse_args()
if osp.exists(args.output_dir):
print("Output directory already exists:", args.output_dir)
sys.exit(1)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, "JPEGImages"))
os.makedirs(osp.join(args.output_dir, "SegmentationClass"))
os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG"))
if not args.noviz:
os.makedirs(
osp.join(args.output_dir, "SegmentationClassVisualization")
)
os.makedirs(osp.join(args.output_dir, "SegmentationObject"))
os.makedirs(osp.join(args.output_dir, "SegmentationObjectPNG"))
if not args.noviz:
os.makedirs(
osp.join(args.output_dir, "SegmentationObjectVisualization")
)
print("Creating dataset:", args.output_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == "__ignore__"
continue
elif class_id == 0:
assert class_name == "_background_"
class_names.append(class_name)
class_names = tuple(class_names)
print("class_names:", class_names)
out_class_names_file = osp.join(args.output_dir, "class_names.txt")
with open(out_class_names_file, "w") as f:
f.writelines("\n".join(class_names))
print("Saved class_names:", out_class_names_file)
for filename in glob.glob(osp.join(args.input_dir, "*.json")):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
out_cls_file = osp.join(
args.output_dir, "SegmentationClass", base + ".npy"
)
out_clsp_file = osp.join(
args.output_dir, "SegmentationClassPNG", base + ".png"
)
if not args.noviz:
out_clsv_file = osp.join(
args.output_dir,
"SegmentationClassVisualization",
base + ".jpg",
)
out_ins_file = osp.join(
args.output_dir, "SegmentationObject", base + ".npy"
)
out_insp_file = osp.join(
args.output_dir, "SegmentationObjectPNG", base + ".png"
)
if not args.noviz:
out_insv_file = osp.join(
args.output_dir,
"SegmentationObjectVisualization",
base + ".jpg",
)
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
cls, ins = labelme.utils.shapes_to_label(
img_shape=img.shape,
shapes=label_file.shapes,
label_name_to_value=class_name_to_id,
)
ins[cls == -1] = 0 # ignore it.
# class label
labelme.utils.lblsave(out_clsp_file, cls)
np.save(out_cls_file, cls)
if not args.noviz:
clsv = imgviz.label2rgb(
cls,
imgviz.rgb2gray(img),
label_names=class_names,
font_size=15,
loc="rb",
)
imgviz.io.imsave(out_clsv_file, clsv)
# instance label
labelme.utils.lblsave(out_insp_file, ins)
np.save(out_ins_file, ins)
if not args.noviz:
instance_ids = np.unique(ins)
instance_names = [str(i) for i in range(max(instance_ids) + 1)]
insv = imgviz.label2rgb(
ins,
imgviz.rgb2gray(img),
label_names=instance_names,
font_size=15,
loc="rb",
)
imgviz.io.imsave(out_insv_file, insv)
if __name__ == "__main__":
main()
python ./labelme2voc.py .\dataset\json .\dataset\voc --labels labels.txt
类别放在label_names.txt里,默认会有一个_background_。所以我们正常的语义分割至少要有两类。
3. 修改deeplab+源码,增加自己的数据集
3.1 mypath.py 中加入自己数据集的路径
实例中增加的数据集名称为grass
3.2 在dataloaders/datasets目录下添加文件
复制一份pascal.py文件,并重命名为自己的数据集名称
然后打开自己的数据集py文件,修改文件内的类别数和数据集名称
3.3 修改dateloaders目录下utils.py
搜素def get_cityscapes_labels()函数,然后在上方添加自己数据集的函数,例如get_grass_labels().
这个函数的主要意思就是给自己每个类设置一个掩膜颜色,有多少个类,就设置多少种颜色。
然后在decode_segmap函数内添加代码,其中n_classes是你要分割的类别数
3.4 在dataloaders目录下修改__init__.py
在第一行添加数据集名称,复制’pascal’数据集描述,把名称修改为自己数据集的名字
3.5 在同级目录中修改train.py约185行添加自己数据集的名称(可以设置为默认)
4. 开始训练数据
python train.py --backbone resnet --lr 0.007 --workers 1 --epochs 50 --batch-size 8 --gpu-ids 0 --checkname deeplab-resnet --dataset grass
5. 测试
源码中没有测试代码,需要自己放入一个测试py文件。
修改–in-path为数据集的测试图片,最后的结果保存在–out-path中
#
# demo.py
#
import argparse
import os
import numpy as np
import time
from modeling.deeplab import *
from dataloaders import custom_transforms as tr
from PIL import Image
from torchvision import transforms
from dataloaders.utils import *
from torchvision.utils import make_grid, save_image
import cv2
def main():
parser = argparse.ArgumentParser(description="PyTorch DeeplabV3Plus Training")
parser.add_argument('--in-path', type=str, default='/root/home/zyx/Seg552_VOC/test',
help='image to test')
parser.add_argument('--out-path', type=str, required=True, help='mask image to save')
parser.add_argument('--backbone', type=str, default='resnet',
choices=['resnet', 'xception', 'drn', 'mobilenet'],
help='backbone name (default: resnet)')
parser.add_argument('--ckpt', type=str, default='deeplab-resnet.pth',
help='saved model')
parser.add_argument('--out-stride', type=int, default=16,
help='network output stride (default: 8)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--gpu-ids', type=str, default='0',
help='use which gpu to train, must be a \
comma-separated list of integers only (default=0)')
parser.add_argument('--dataset', type=str, default='grass',
choices=['pascal', 'coco', 'cityscapes','belt','grass'],
help='dataset name (default: pascal)')
parser.add_argument('--crop-size', type=int, default=513,
help='crop image size')
parser.add_argument('--num_classes', type=int, default=2,
help='crop image size')
parser.add_argument('--sync-bn', type=bool, default=None,
help='whether to use sync bn (default: auto)')
parser.add_argument('--freeze-bn', type=bool, default=False,
help='whether to freeze bn parameters (default: False)')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
if args.cuda:
try:
args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')]
except ValueError:
raise ValueError('Argument --gpu_ids must be a comma-separated list of integers only')
if args.sync_bn is None:
if args.cuda and len(args.gpu_ids) > 1:
args.sync_bn = True
else:
args.sync_bn = False
model_s_time = time.time()
model = DeepLab(num_classes=args.num_classes,
backbone=args.backbone,
output_stride=args.out_stride,
sync_bn=args.sync_bn,
freeze_bn=args.freeze_bn)
# model = nn.DataParallel(model)
os.makedirs(args.out_path, exist_ok=True)
ckpt = torch.load(args.ckpt, map_location='cpu')
model.load_state_dict(ckpt['state_dict'])
model = model.cuda()
model_u_time = time.time()
model_load_time = model_u_time-model_s_time
print("model load time is {}".format(model_load_time))
composed_transforms = transforms.Compose([
tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
tr.ToTensor()])
for name in os.listdir(args.in_path):
s_time = time.time()
image = Image.open(args.in_path+"/"+name).convert('RGB')
# image = Image.open(args.in_path).convert('RGB')
target = Image.open(args.in_path+"/"+name).convert('L')
sample = {'image': image, 'label': target}
tensor_in = composed_transforms(sample)['image'].unsqueeze(0)
model.eval()
if args.cuda:
tensor_in = tensor_in.cuda()
with torch.no_grad():
output = model(tensor_in)
grid_image = make_grid(decode_seg_map_sequence(torch.max(output[:3], 1)[1].detach().cpu().numpy()),
3, normalize=False, range=(0, 255))
# 用cv2读入原图
img_s = cv2.imread(args.in_path+"/"+name)
# tensor2cv2
img_m = grid_image.clone().detach() # 复制一份
img_m = img_m.to(torch.device('cpu')) # 到cpu
img_m = img_m.squeeze() # 去掉批次维度
# 从[0,1]转化为[0,255],再从CHW转为HWC,最后转为cv2
img_m = img_m.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).type(torch.uint8).numpy()
img_m = cv2.cvtColor(img_m, cv2.COLOR_RGB2BGR) # RGB转BRG
# 图像融合
dst = cv2.addWeighted(img_s,1,img_m,0.7,0)
# 保存图片
cv2.imwrite(args.out_path+"/"+"{}_mask.png".format(name[0:-4]), dst)
# save_image(grid_image,args.out_path+"/"+"{}_mask.png".format(name[0:-4]))
u_time = time.time()
img_time = u_time-s_time
print("image:{} time: {} ".format(name,img_time))
# save_image(grid_image, args.out_path)
# print("type(grid) is: ", type(grid_image))
# print("grid_image.shape is: ", grid_image.shape)
print("image save in out_path.")
if __name__ == "__main__":
main()
# python demo.py --in-path your_file --out-path your_dst_file
注意点:参数–dataset代码中加入自己的类
输入测试指令:
python testdemo.py --dataset grass --num_classes 2 --ckpt run/grass/deeplab-resnet/model_best.pth.tar --backbone resnet --in-path dataset/voc/JPEGImages --out-path ./dataset/test