一 、 安装环境:
参考官方文档,这里不重点描述.
二、制作coco数据集:
本案例是用labeImg标注工具进行标注的xml格式的标注文件(我的是voc格式)。
① 准备自定义数据集:
将所有的标注文件放到Annotations文件夹下,所有的图片放到JPEGImages文件夹下:
如下图所示:
② 数据集转换(转换为coco数据集格式):
首先划分数据集(训练集和验证机划分),: 运行python脚本 splitVOCDataset.py ,具体内容:
import os
import random
import math
from tqdm import tqdm
# 划分VOC格式的数据集, 找出没有对应.xml的图片文件
image_extensions = ['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']
def split_voc_dataset(dataset_dir, train_ratio, val_ratio, use_random_seed=False, random_seed=999):
if not (0 < train_ratio + val_ratio <= 1):
print("Invalid ratio values. They should sum up to 1.")
return
annotations_dir = os.path.join(dataset_dir, 'Annotations')
images_dir = os.path.join(dataset_dir, 'JPEGImages')
output_dir = os.path.join(dataset_dir, 'ImageSets/Main')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
dict_info = dict()
# List all the image files in the JPEGImages directory
for file in os.listdir(images_dir):
if any(ext in file for ext in image_extensions):
jpg_files, endwith = os.path.splitext(file)
dict_info[jpg_files] = endwith
# List all the XML files in the Annotations directory
xml_files = [file for file in os.listdir(annotations_dir) if file.endswith('.xml')]
if use_random_seed:
# Set the random seed for reproducibility
random.seed(random_seed)
random.shuffle(xml_files)
num_samples = len(xml_files)
print('1. >>>>>> num_samples(total xml data): ', num_samples);
num_train = math.ceil(num_samples * train_ratio)
print('2. >>>>>> num_train: ', num_train);
num_val = num_samples - num_train
print('3. >>>>>> num_val: ', num_val);
train_xml_files = xml_files[:num_train]
val_xml_files = xml_files[num_train:num_train + num_val]
with open(os.path.join(output_dir, 'train_list.txt'), 'w+') as train_file:
for xml_file in train_xml_files:
image_name = os.path.splitext(xml_file)[0]
if image_name in dict_info:
image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
annotation_path = os.path.join('Annotations', xml_file)
train_file.write(f'{image_path}\t{annotation_path}\n')
else:
print(f"没有找到图片 {os.path.join(images_dir, image_name)}")
with open(os.path.join(output_dir, 'val_list.txt'), 'w+') as val_file:
for xml_file in val_xml_files:
image_name = os.path.splitext(xml_file)[0]
if image_name in dict_info:
image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
annotation_path = os.path.join('Annotations', xml_file)
val_file.write(f'{image_path}\t{annotation_path}\n')
else:
print(f"没有找到图片 {os.path.join(images_dir, image_name)}")
labels = set()
for xml_file in tqdm(xml_files):
annotation_path = os.path.join(annotations_dir, xml_file)
with open(annotation_path, 'r+', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
if '<name>' in line:
label = line.strip().replace('<name>', '').replace('</name>', '')
labels.add(label)
with open(os.path.join(output_dir, 'labels.txt'), 'w+') as labels_file:
for label in labels:
labels_file.write(f'{label}\n')
if __name__ == "__main__":
train_ratio = 0.85 # Adjust the train-validation split ratio as needed
val_ratio = 1 - train_ratio
# 假如使用随机种子,并且自己设置种子数值,则换种子后划分后的训练集和验证集图片会不一样;
# 如果不使用种子(默认种子),每次的训练集和验证集图片划分的部分是一样的,但txt记录的循序会变;
random_seed = 3688
use_random_seed = True
dataset_dir = '/home/topf/dataSet/customer_data/'
split_voc_dataset(dataset_dir, train_ratio, val_ratio, use_random_seed, random_seed)
# python demo/image_demo.py /home/topf/gr/qc/JPEGImages/IMG_20240119_153924.jpg configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py --weights check_points/
这步骤执行完后会生成一个ImageSets文件夹:
里面内容是:
其次转换为coco数据集格式:运行脚本voc2cocoFormat.py, 具体代码内容为:
import os
import json
from xml.etree import ElementTree as ET
def parse_xml(dataset_dir, xml_file):
xml_path = os.path.join(dataset_dir, xml_file)
tree = ET.parse(xml_path)
root = tree.getroot()
objects = root.findall('object')
annotations = []
for obj in objects:
bbox = obj.find('bndbox')
xmin = int(bbox.find('xmin').text)
ymin = int(bbox.find('ymin').text)
xmax = int(bbox.find('xmax').text)
ymax = int(bbox.find('ymax').text)
# Extract label from XML annotation
label = obj.find('name').text
if not label:
print(f"Label not found in XML annotation. Skipping annotation.")
continue
annotations.append({
'xmin': xmin,
'ymin': ymin,
'xmax': xmax,
'ymax': ymax,
'label': label
})
return annotations
def convert_to_coco_format(image_list_file, annotations_dir, output_json_file, dataset_dir):
images = []
annotations = []
categories = []
# Load labels
with open(os.path.join(annotations_dir, 'labels.txt'), 'r+', encoding='utf-8') as labels_file:
label_lines = labels_file.readlines()
categories = [{'id': i + 1, 'name': label.strip()} for i, label in enumerate(label_lines)]
annotation_id = 1 # Initialize unique annotation ID
# Load image list file
with open(image_list_file, 'r+') as image_list:
image_lines = image_list.readlines()
for i, line in enumerate(image_lines):
image_path, annotation_path = line.strip().split('\t')
image_id = i + 1
image_filename = os.path.basename(image_path)
# Extract image size from XML file
xml_path = os.path.join(dataset_dir, annotation_path)
tree = ET.parse(xml_path)
size = tree.find('size')
image_height = int(size.find('height').text)
image_width = int(size.find('width').text)
images.append({
'id': image_id,
'file_name': image_filename,
'height': image_height,
'width': image_width,
'license': None,
'flickr_url': None,
'coco_url': None,
'date_captured': None
})
# Load annotations from XML files
xml_annotations = parse_xml(dataset_dir, annotation_path)
for xml_annotation in xml_annotations:
label = xml_annotation['label']
category_id = next((cat['id'] for cat in categories if cat['name'] == label), None)
if category_id is None:
print(f"Label '{label}' not found in categories. Skipping annotation.")
continue
bbox = {
'xmin': xml_annotation['xmin'],
'ymin': xml_annotation['ymin'],
'xmax': xml_annotation['xmax'],
'ymax': xml_annotation['ymax']
}
annotations.append({
'id': annotation_id,
'image_id': image_id,
'category_id': category_id,
'bbox': [bbox['xmin'], bbox['ymin'], bbox['xmax'] - bbox['xmin'], bbox['ymax'] - bbox['ymin']],
'area': (bbox['xmax'] - bbox['xmin']) * (bbox['ymax'] - bbox['ymin']),
'segmentation': [],
'iscrowd': 0
})
annotation_id += 1 # Increment annotation ID for uniqueness
coco_data = {
'images': images,
'annotations': annotations,
'categories': categories
}
with open(output_json_file, 'w+') as json_file:
json.dump(coco_data, json_file, indent=4)
if __name__ == "__main__":
# Adjust paths as needed
output_dataset_dir = '/home/topf/dataSet/customer_data/'
image_sets_dir = '/home/topf/dataSet/customer_data/ImageSets/Main/'
train_list_file = os.path.join(image_sets_dir, 'train_list.txt')
val_list_file = os.path.join(image_sets_dir, 'val_list.txt')
output_train_json_file = os.path.join(output_dataset_dir, 'train_coco.json')
output_val_json_file = os.path.join(output_dataset_dir, 'val_coco.json')
convert_to_coco_format(train_list_file, image_sets_dir, output_train_json_file, output_dataset_dir)
convert_to_coco_format(val_list_file, image_sets_dir, output_val_json_file, output_dataset_dir)
print("The JSON file has been successfully generated!!!")
此步骤执行完毕后生成train_coco.json和val_coco.json 如下图所示:
三、配置训练config文件和训练参数:
我们以rtmdet-l模型为例,
① 复制文件configs/_base_/datasets/coco_detection.py 重命名为: coco_detection_person.py,具体内容为(修改项请详看注释部分):
# dataset settings
dataset_type = 'CocoDataset'
###数据集路径
data_root = 'dataset/det/person_det/coco/'
###定义数据集标签(注意!!!在下面dict配置的时候要引用)
metainfo = {
'classes': ('person', ),
'palette': [
(220, 20, 60),
]
}
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection/coco/'
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
backend_args = None
train_pipeline = [
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='PackDetInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='Resize', scale=(1333, 800), keep_ratio=True),
# If you don't have a gt annotation, delete the pipeline
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
###配置训练集
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
batch_sampler=dict(type='AspectRatioBatchSampler'),
dataset=dict(
type=dataset_type,
###引用上面配置的数据集标签
metainfo=metainfo,
###数据集路径
data_root=data_root,
###数据集->json路径
ann_file='annotations/train_coco.json',
###数据集->图片路径
data_prefix=dict(img='JPEGImages/'),
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=train_pipeline,
backend_args=backend_args))
###配置验证集
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
###引用上面配置的数据集标签
metainfo=metainfo,
###数据集路径
data_root=data_root,
###数据集->json路径
ann_file='annotations/val_coco.json',
###数据集->图片路径
data_prefix=dict(img='JPEGImages/'),
test_mode=True,
pipeline=test_pipeline,
backend_args=backend_args))
test_dataloader = val_dataloader
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/val_coco.json',
metric='bbox',
format_only=False,
backend_args=backend_args)
test_evaluator = val_evaluator
# inference on test dataset and
# format the output results for submission.
# test_dataloader = dict(
# batch_size=1,
# num_workers=2,
# persistent_workers=True,
# drop_last=False,
# sampler=dict(type='DefaultSampler', shuffle=False),
# dataset=dict(
# type=dataset_type,
# data_root=data_root,
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
# data_prefix=dict(img='test2017/'),
# test_mode=True,
# pipeline=test_pipeline))
# test_evaluator = dict(
# type='CocoMetric',
# metric='bbox',
# format_only=True,
# ann_file=data_root + 'annotations/image_info_test-dev2017.json',
# outfile_prefix='./work_dirs/coco_detection/test')
② 复制 configs/rtmdet/rtmdet_l_8xb32-300e_coco.py 重命名为:
rtmdet_l_8xb32-300e_coco_person_det.py,具体内容为(修改项请详看注释部分):
### ../_base_/datasets/coco_detection 改为 ../_base_/datasets/coco_detection_person.py
_base_ = [
'../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py',
'../_base_/datasets/coco_detection_person.py', './rtmdet_tta.py'
]
###加载预训练模型
load_from = '/home/topf/ai-project/mmdetection-3.1.0/checkpoints/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth'
model = dict(
type='RTMDet',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
bgr_to_rgb=False,
batch_augments=None),
backbone=dict(
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=1,
widen_factor=1,
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU', inplace=True)),
neck=dict(
type='CSPNeXtPAFPN',
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3,
expand_ratio=0.5,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU', inplace=True)),
bbox_head=dict(
type='RTMDetSepBNHead',
###修改自定义类别数量
num_classes=1,
in_channels=256,
stacked_convs=2,
feat_channels=256,
anchor_generator=dict(
type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
bbox_coder=dict(type='DistancePointBBoxCoder'),
loss_cls=dict(
type='QualityFocalLoss',
use_sigmoid=True,
beta=2.0,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
with_objectness=False,
exp_on_reg=True,
share_conv=True,
pred_kernel_size=1,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU', inplace=True)),
train_cfg=dict(
assigner=dict(type='DynamicSoftLabelAssigner', topk=13),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=30000,
min_bbox_size=0,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.65),
max_per_img=300),
)
train_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
dict(
type='RandomResize',
scale=(1280, 1280),
ratio_range=(0.1, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=(640, 640)),
dict(type='YOLOXHSVRandomAug'),
dict(type='RandomFlip', prob=0.5),
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(
type='CachedMixUp',
img_scale=(640, 640),
ratio_range=(1.0, 1.0),
max_cached_images=20,
pad_val=(114, 114, 114)),
dict(type='PackDetInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(0.1, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=(640, 640)),
dict(type='YOLOXHSVRandomAug'),
dict(type='RandomFlip', prob=0.5),
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type='PackDetInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(
###根据自己显卡内存和算力情况修改(我的测试卡是rtx2080ti)
batch_size=2,
num_workers=2,
batch_sampler=None,
pin_memory=True,
dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(
###根据自己显卡内存和算力情况修改(我的测试卡是rtx2080ti)
batch_size=2, num_workers=2, dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
### max_epochs = 300
### 总设置的epoch
max_epochs = 600
stage2_num_epochs = 20
### base_lr = 0.004
### base学习率
base_lr = 0.002
### 权重文件中间存储频次(每30个epoch存一次)
interval = 30
train_cfg = dict(
max_epochs=max_epochs,
val_interval=interval,
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
val_evaluator = dict(proposal_nums=(100, 1, 10))
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
_delete_=True,
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# hooks
default_hooks = dict(
checkpoint=dict(
interval=interval,
max_keep_ckpts=3 # only keep latest 3 checkpoints
))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
配置准备工作准备完毕。
四、开始训练:
执行训练脚本 :
python tools/train.py configs/rtmdet/rtmdet_l_8xb32-300e_coco_person_det.py --work-dir work_dir_person_det/
至此训练旅程开始~ enjoy yourself ~