# ================ 标准库导入 ================
import logging
import os
import xml.etree.ElementTree as ET
import random
from typing import Optional
import cv2
import numpy as np
# ================ PaddlePaddle核心导入 ================
import paddle
import paddle.nn as nn
from paddle.amp import GradScaler, auto_cast
from paddle.io import DataLoader
from paddle.optimizer import AdamW
# ================ PaddleDetection模块导入 ================
import ppdet.data.transform as T
from ppdet.data.source import VOCDataSet
from ppdet.metrics import VOCMetric
from ppdet.modeling.backbones import SwinTransformer
from ppdet.modeling.necks import CustomCSPPAN
from ppdet.modeling.heads import PPYOLOEHead
from ppdet.utils.checkpoint import load_weight
from tqdm import tqdm
# ================ 自定义 ResNet 模型 ================
from paddle.vision.models import ResNet
from paddle.vision.models.resnet import BottleneckBlock
class CustomResNet50(ResNet):
def __init__(self, norm_layer=nn.BatchNorm2D, **kwargs):
super().__init__(block=BottleneckBlock, depth=50, **kwargs)
self.norm_layer = norm_layer
# ================ 自定义标准化层 ================
# ================ 修复后的自定义标准化层 ================
class CustomNormalize(nn.Layer):
def __init__(self):
super().__init__()
self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape([1, 3, 1, 1])
self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape([1, 3, 1, 1])
def forward(self, data: dict):
image = data['image']
# 确保是numpy数组并归一化
if not isinstance(image, np.ndarray):
image = np.array(image)
image = image.astype('float32') / 255.0
# 转换维度: (H, W, C) -> (C, H, W) -> (1, C, H, W)
if len(image.shape) == 3 and image.shape[-1] == 3:
image = image.transpose(2, 0, 1)
image = np.expand_dims(image, axis=0) # 增加批次维度
# 转换为paddle张量并归一化
image = paddle.to_tensor(image)
data['image'] = (image - self.mean) / self.std
return data
# 注册自定义操作
T.CustomNormalize = CustomNormalize
# ================ 修改后的数据集类 ================
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class RoadObstacleDataset(VOCDataSet):
def __init__(self, dataset_dir: str, image_dir: str, anno_path: str, file_list: Optional[str] = None,
label_list: str = 'label_list.txt',
mode: str = 'train', transform_mode: str = 'default', split_ratio: float = 0.8):
super().__init__(
dataset_dir=str(dataset_dir),
image_dir=str(image_dir),
anno_path=str(anno_path),
data_fields=['image', 'gt_bbox', 'gt_class', 'im_id', 'im_shape'],
sample_num=-1
)
self.mode = mode
self.transform_mode = transform_mode
self.transforms = self._get_transforms()
# 简化路径验证
self.image_dir = os.path.join(dataset_dir, image_dir)
self.anno_dir = os.path.join(dataset_dir, anno_path)
if not os.path.exists(self.image_dir):
logger.warning(f"Image directory not found: {self.image_dir}")
if not os.path.exists(self.anno_dir):
logger.warning(f"Annotation directory not found: {self.anno_dir}")
# 加载标签
label_path = os.path.join(dataset_dir, label_list)
if os.path.exists(label_path):
with open(label_path, 'r') as f:
self.labels = [line.strip() for line in f.readlines()]
else:
logger.warning(f"Label file not found: {label_path}")
self.labels = []
# 生成文件列表
self.file_ids = self._generate_file_list(dataset_dir, image_dir, anno_path, split_ratio)
def _generate_file_list(self, dataset_dir: str, image_dir: str, anno_path: str, split_ratio: float = 0.8):
"""生成训练/验证文件列表并保存到文件"""
image_files = [f for f in os.listdir(os.path.join(dataset_dir, image_dir))
if f.endswith(('.jpg', '.jpeg', '.png'))]
file_ids = [os.path.splitext(f)[0] for f in image_files]
# 随机打乱并分割数据集
random.shuffle(file_ids)
split_idx = int(len(file_ids) * split_ratio)
if self.mode == 'train':
selected_ids = file_ids[:split_idx]
list_file = os.path.join(dataset_dir, 'train.txt')
else:
selected_ids = file_ids[split_idx:]
list_file = os.path.join(dataset_dir, 'val.txt')
# 保存文件列表
with open(list_file, 'w') as f:
f.write("\n".join(selected_ids))
return selected_ids
def _get_transforms(self):
"""修复后的数据增强流程"""
if self.mode == 'train':
return T.Compose([
{'RandomFlip': {'prob': 0.5}},
{'Resize': {'target_size': 640, 'keep_ratio': True}},
{'Pad': {'size': [640, 640]}}, # 统一填充尺寸
{'CustomNormalize': {}} # 使用修改后的标准化
])
else:
return T.Compose([
{'Resize': {'target_size': 640, 'keep_ratio': True}},
{'Pad': {'size': [640, 640]}}, # 统一填充尺寸
{'CustomNormalize': {}} # 使用修改后的标准化
])
def __getitem__(self, idx: int):
try:
file_id = self.file_ids[idx]
img_path = os.path.join(self.image_dir, f"{file_id}.png")
# 检查图像路径
if not os.path.exists(img_path):
logger.warning(f"Image not found: {img_path}")
return None
img = cv2.imread(img_path)
if img is None:
logger.warning(f"Failed to load image: {img_path}")
return None
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gt_boxes, gt_labels = self._parse_voc_anno(file_id)
sample = {
'image': img,# 保持为HWC格式
'gt_bbox': gt_boxes,
'gt_class': gt_labels,
'im_id': np.array([idx]),
'im_shape': np.array(img.shape[:2]),
'flipped': False
}
if self.transforms:
sample = self.transforms(sample)
return sample
except Exception as e:
logger.error(f"Error loading sample {idx}: {str(e)}")
return None
def __len__(self) -> int:
return len(self.file_ids)
def _parse_voc_anno(self, file_id: str):
xml_path = os.path.join(str(self.dataset_dir), str(self.anno_path), f"{file_id}.xml")
tree = ET.parse(xml_path)
root = tree.getroot()
boxes = []
labels = []
for obj in root.findall('object'):
cls_name = obj.find('name').text
if cls_name not in self.labels:
continue
cls_id = self.labels.index(cls_name)
bbox = obj.find('bndbox')
xmin = float(bbox.find('xmin').text)
ymin = float(bbox.find('ymin').text)
xmax = float(bbox.find('xmax').text)
ymax = float(bbox.find('ymax').text)
boxes.append([xmin, ymin, xmax, ymax])
labels.append(cls_id)
return np.array(boxes, dtype='float32'), np.array(labels, dtype='int32')
# ================ Transformer增强的PP-YOLOE模型 ================
class TransformerPPYOLOE(nn.Layer):
def __init__(self, backbone: nn.Layer, neck: nn.Layer, head: nn.Layer):
super().__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.transformer = SwinTransformer(
embed_dim=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24]
)
def forward(self, inputs: dict):
# 提取RGB图像数据
rgb_input = inputs['image']
if not isinstance(rgb_input, paddle.Tensor):
rgb_input = paddle.to_tensor(rgb_input)
# 提取特征
body_feats = self.backbone(rgb_input)
# Transformer增强
enhanced_feats = []
for i, feat in enumerate(body_feats):
batch_size, channels, height, width = feat.shape
# 转换为序列格式 [batch_size, seq_len, channels]
feat_seq = feat.flatten(2).transpose([0, 2, 1])
# Transformer处理
enhanced_seq = self.transformer(feat_seq)
# 恢复空间维度
enhanced_feat = enhanced_seq.transpose([0, 2, 1]).reshape([batch_size, channels, height, width])
enhanced_feats.append(enhanced_feat)
# 特征金字塔网络
neck_feats = self.neck(enhanced_feats)
# 检测头
head_outputs = self.head(neck_feats)
return head_outputs
# ================ 修改后的collate函数 ================
def collate_fn(batch):
batch = [item for item in batch if item is not None]
if not batch:
return None
collated = {}
for key in batch[0].keys():
if key in ['gt_bbox', 'gt_class']:
collated[key] = [item[key] for item in batch]
elif key == 'image':
# 直接堆叠图像 (N, C, H, W)
collated[key] = paddle.concat([item['image'] for item in batch], axis=0)
else:
collated[key] = paddle.to_tensor(np.stack([item[key] for item in batch]))
return collated
# ================ 模型训练与评估 ================
class RoadObstacleTrainer:
def __init__(self, config: dict):
self.config = config
self.model = None
self.optimizer = None
self.scaler = None
def build_model(self):
backbone = CustomResNet50(norm_layer=nn.BatchNorm2D)
neck = CustomCSPPAN(
in_channels=[512, 1024, 2048],
out_channels=[128, 256, 512],
norm_type='bn',
act=nn.LeakyReLU(negative_slope=0.1),
stage_fn='CSPStage',
block_fn='BasicBlock',
stage_num=1,
block_num=3,
spp=True
)
head = PPYOLOEHead(
in_channels=[128, 256, 512],
num_classes=len(self.config['labels']),
act='swish',
fpn_strides=[8, 16, 32],
grid_cell_scale=5.0,
grid_cell_offset=0.5
)
self.model = TransformerPPYOLOE(
backbone=backbone,
neck=neck,
head=head
)
self.config['pretrained'] = None
if self.config.get('pretrained'):
load_weight(self.model, self.config['pretrained'])
def build_optimizer(self):
lr_scheduler = paddle.optimizer.lr.CosineAnnealingDecay(
learning_rate=self.config['learning_rate'],
T_max=self.config['epochs']
)
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=35.0)
self.optimizer = AdamW(
parameters=self.model.parameters(),
learning_rate=lr_scheduler,
weight_decay=self.config['weight_decay'],
grad_clip=clip
)
self.scaler = GradScaler(enable=True, init_loss_scaling=2. ** 16)
def build_datasets(self):
train_dataset = RoadObstacleDataset(
dataset_dir=self.config['dataset_dir'],
image_dir=self.config['image_dir'],
anno_path=self.config['anno_path'],
mode='train',
transform_mode=self.config.get('transform_mode', 'default')
)
val_dataset = RoadObstacleDataset(
dataset_dir=self.config['dataset_dir'],
image_dir=self.config['image_dir'],
anno_path=self.config['anno_path'],
mode='val'
)
return train_dataset, val_dataset
def train(self):
self.build_model()
self.build_optimizer()
train_dataset, val_dataset = self.build_datasets()
train_loader = DataLoader(
train_dataset,
batch_size=self.config['batch_size'],
shuffle=True,
num_workers=self.config['num_workers'],
collate_fn=collate_fn,
drop_last=True
)
val_loader = DataLoader(
val_dataset,
batch_size=self.config['batch_size'],
shuffle=False,
num_workers=self.config['num_workers'],
collate_fn=collate_fn
)
best_map = 0.0
for epoch in range(self.config['epochs']):
self.model.train()
train_loss = 0.0
train_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{self.config["epochs"]} [Train]')
for batch in train_bar:
if batch is None:
continue # 跳过无效批次
with auto_cast(enable=True):
outputs = self.model(batch)
loss = outputs['loss']
scaled_loss = self.scaler.scale(loss)
scaled_loss.backward()
self.scaler.minimize(self.optimizer, scaled_loss)
self.optimizer.clear_grad()
train_loss += loss.item()
train_bar.set_postfix(loss=loss.item())
val_metrics = self.evaluate(val_loader)
if val_metrics is None:
val_map = 0.0
print("Warning: Val metrics is None. Setting Val mAP to 0.0.")
else:
val_map = val_metrics['map']
if val_map > best_map:
best_map = val_map
paddle.save(self.model.state_dict(), os.path.join(self.config['save_dir'], 'best_model.pdparams'))
print(f"Saved best model with mAP: {val_map:.4f}")
print(
f'Epoch {epoch + 1}/{self.config["epochs"]} - Train Loss: {train_loss / len(train_loader):.4f}, Val mAP: {val_map:.4f}')
paddle.save(self.model.state_dict(), os.path.join(self.config['save_dir'], 'final_model.pdparams'))
return best_map
def evaluate(self, data_loader):
self.model.eval()
metric = VOCMetric(
class_num=len(self.config['labels']),
label_list=self.config['labels'],
overlap_thresh=0.5
)
with paddle.no_grad():
for batch in tqdm(data_loader, desc='Evaluating'):
if batch is None:
continue # 跳过无效批次
outputs = self.model(batch)
eval_data = {
'bbox_pred': outputs['bbox_pred'],
'bbox_num': outputs['bbox_num'],
'gt_bbox': batch['gt_bbox'],
'gt_class': batch['gt_class'],
'difficult': paddle.zeros_like(batch['gt_class'])
}
metric.update(**eval_data) # 使用关键字参数传递
val_metrics = metric.accumulate()
if val_metrics is None:
print("Warning: Val metrics is None. Using default value.")
val_metrics = {'map': 0.0} # 提供默认值
return val_metrics
# ================ 主函数 ================
def main():
# 获取当前脚本所在目录
base_dir = os.path.dirname(os.path.abspath(__file__))
dataset_path = os.path.join(base_dir, 'road_obstacles') # 数据集实际位置
config = {
'dataset_dir': dataset_path, # 使用绝对路径
'image_dir': 'images',
'anno_path': 'Annotations',
'labels': os.path.join(dataset_path, 'label_list.txt'),
'pretrained': None,
'batch_size': 8,
'num_workers': 4,
'learning_rate': 0.001,
'weight_decay': 0.0001,
'epochs': 100,
'save_dir': 'output',
'transform_mode': 'default' # 使用简化版数据增强
}
os.makedirs(config['save_dir'], exist_ok=True)
trainer = RoadObstacleTrainer(config)
print("Starting training...")
best_map = trainer.train()
print(f"Training completed! Best mAP: {best_map:.4f}")
if __name__ == "__main__":
paddle.set_device('gpu')
main()W0603 20:54:55.237680 23316 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 8.9, Driver API Version: 12.8, Runtime API Version: 12.0
W0603 20:54:55.239673 23316 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9.
Epoch 1/100 [Train]: 0%| | 0/168 [00:02<?, ?it/s]
Traceback (most recent call last):
File "C:\Users\38112\PycharmProjects\dog_robot\main.py", line 470, in <module>
main()
File "C:\Users\38112\PycharmProjects\dog_robot\main.py", line 463, in main
best_map = trainer.train()
File "C:\Users\38112\PycharmProjects\dog_robot\main.py", line 379, in train
outputs = self.model(batch)
File "C:\Users\38112\PycharmProjects\robot_dog\.venv\lib\site-packages\paddle\nn\layer\layers.py", line 1429, in __call__
return self.forward(*inputs, **kwargs)
File "C:\Users\38112\PycharmProjects\dog_robot\main.py", line 242, in forward
batch_size, channels, height, width = feat.shape
ValueError: not enough values to unpack (expected 4, got 1)怎么解决这个错误