DeepLabv3+是一种强大的语义分割模型,以下是训练自己数据集的完整步骤:
1. 准备数据集
数据要求
-
输入图像(如JPG/PNG格式)
-
对应的标注图像(PNG格式,每个像素值代表类别ID)
标注注意事项
-
标注图像应为单通道,像素值0通常表示背景
-
确保标注与原始图像尺寸相同
-
类别ID应从0开始连续编号(0,1,2,...)
2. 安装依赖
bash
# 安装TensorFlow(建议使用2.x版本)
pip install tensorflow
# 安装其他依赖
pip install pillow matplotlib numpy
3. 数据预处理
创建TFRecord文件
DeepLab通常使用TFRecord格式。创建生成TFRecord的脚本:
import tensorflow as tf
from PIL import Image
import os
def create_tf_example(image_path, mask_path):
img = Image.open(image_path)
mask = Image.open(mask_path)
img_bytes = tf.io.encode_jpeg(tf.image.convert_image_dtype(tf.image.decode_image(open(image_path, 'rb').read()), tf.uint8))
mask_bytes = tf.io.encode_png(tf.image.convert_image_dtype(tf.image.decode_image(open(mask_path, 'rb').read()), tf.uint8))
return tf.train.Example(features=tf.train.Features(feature={
'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_bytes.numpy()])),
'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.compat.as_bytes(os.path.basename(image_path))])),
'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.compat.as_bytes('jpeg')])),
'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[img.height])),
'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[img.width])),
'image/channels': tf.train.Feature(int64_list=tf.train.Int64List(value=[3])),
'image/segmentation/class/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[mask_bytes.numpy()])),
'image/segmentation/class/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.compat.as_bytes('png')])),
}))
def create_tf_records(image_dir, mask_dir, output_path):
writer = tf.io.TFRecordWriter(output_path)
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
for img_file in image_files:
base_name = os.path.splitext(img_file)[0]
mask_file = f"{base_name}.png" # 假设标注文件是PNG格式
img_path = os.path.join(image_dir, img_file)
mask_path = os.path.join(mask_dir, mask_file)
tf_example = create_tf_example(img_path, mask_path)
writer.write(tf_example.SerializeToString())
writer.close()
4. 配置训练参数
创建配置文件(如config.yaml
):
train_dataset_config:
dataset: "your_dataset"
data_dir: "path/to/train_tfrecords"
batch_size: 4
crop_size: [513, 513]
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_factor_step_size: 0.25
num_readers: 4
is_training: true
should_shuffle: true
val_dataset_config:
dataset: "your_dataset"
data_dir: "path/to/val_tfrecords"
batch_size: 1
crop_size: [513, 513]
num_readers: 1
is_training: false
should_shuffle: false
train_config:
base_learning_rate: 0.007
learning_rate_power: 0.9
momentum: 0.9
weight_decay: 0.00004
training_number_of_steps: 30000
fine_tune_batch_norm: true
model_config:
model_variant: "mobilenet_v2" # 也可以是"resnet50", "xception"等
output_stride: 16
decoder_output_stride: 4
atrous_rates: [6, 12, 18]
5. 训练模型
使用TensorFlow的DeepLab实现进行训练:
import tensorflow as tf
from deeplab import common
from deeplab import model
from deeplab.datasets import data_generator
# 加载配置
config = load_yaml_config('config.yaml')
# 创建模型
deeplab_model = model.DeepLab(
config.model_config,
is_training=True)
# 创建数据生成器
train_dataset = data_generator.Dataset(
config.train_dataset_config,
should_shuffle=config.train_dataset_config.should_shuffle,
should_repeat=True)
# 定义优化器
optimizer = tf.keras.optimizers.SGD(
learning_rate=common.create_learning_rate(
config.train_config.base_learning_rate,
config.train_config.learning_rate_power,
config.train_config.training_number_of_steps),
momentum=config.train_config.momentum)
# 训练循环
for step, batch in enumerate(train_dataset):
with tf.GradientTape() as tape:
outputs = deeplab_model(batch['image'], training=True)
loss = compute_loss(outputs, batch['label'])
gradients = tape.gradient(loss, deeplab_model.trainable_variables)
optimizer.apply_gradients(zip(gradients, deeplab_model.trainable_variables))
if step % 100 == 0:
print(f"Step {step}, Loss: {loss.numpy()}")
6. 评估模型
val_dataset = data_generator.Dataset(
config.val_dataset_config,
should_shuffle=False,
should_repeat=False)
total_accuracy = 0
count = 0
for batch in val_dataset:
outputs = deeplab_model(batch['image'], training=False)
accuracy = compute_accuracy(outputs, batch['label'])
total_accuracy += accuracy
count += 1
print(f"Validation Accuracy: {total_accuracy / count}")
7. 导出模型用于推理
# 创建保存模型的目录
save_dir = "saved_model"
tf.saved_model.save(deeplab_model, save_dir)
# 或者导出为TFLite格式(用于移动设备)
converter = tf.lite.TFLiteConverter.from_keras_model(deeplab_model)
tflite_model = converter.convert()
with open('model.tflite', 'wb') as f:
f.write(tflite_model)
8. 使用预训练模型(可选)
可以加载预训练权重加速训练:
# 下载预训练模型
PRETRAINED_URL = "http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz"
# 加载权重
deeplab_model.load_weights('path/to/pretrained/weights')
常见问题解决
-
内存不足:减小batch_size或图像尺寸
-
训练不收敛:尝试降低学习率或使用学习率调度
-
类别不平衡:在损失函数中使用类别权重
-
过拟合:增加数据增强或使用正则化技术
进阶技巧
-
使用不同的backbone(如Xception)提高精度
-
尝试不同的学习率调度策略
-
添加数据增强(随机缩放、旋转、颜色变换)
-
使用混合精度训练加速训练过程
希望这个指南能帮助你成功训练自己的DeepLabv3+模型!