PP-YOLO系列是百度提出来的。2021年4月提出了PP-YOLOv2(原文链接:https://arxiv.org/abs/2104.10419 ),性能超越同等参数的YOLOv4-CSP和YOLOv5-l,7月份旷视提出了YOLOX,百度团队优化了PP-YOLOv2,提出了PP-YOLOE。PP-YOLOv2的总体情况包括:1)backbone:具有可变形卷积的ResNet50-vd;2)neck:具有SPP层的PAN,DropBlock;3)head:轻量级的IoU感知;4)激活函数:在backbone中使用ReLU激活,neck中使用Mish激活;5)标签分配:为每个ground truth目标分配一个anchor box;6)损失:分类损失、回归损失、目标损失,IoU损失和IoU感知损失;PP-YOLO-E的改进,Anchor-free方式最先在YOLOv1中出现,由于直接预测位置不准确,在后面的 YOLOv2、v3、v4和v5中均采用了Anchor方式。YOLOX中认为按聚类方式确定最优Anchor尺寸局限于特定领域,难以推广,此外还增加了head的复杂度与每张图像的预测数量,故采用了Anchor-free方式,PP-YOLOE中亦采用Anchor-free方式。
architecture: YOLOv3
#pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_x_obj365_pretrained.pdparams
pretrain_weights: /data/czy/PaddleDetection/output/model_20221222_waiguan/ppoex_waiguan/25.pdparams
depth_mult: 1.33
width_mult: 1.25
norm_type: sync_bn
use_ema: true
ema_decay: 0.9998
ema_black_list: ['proj_conv.weight']
custom_black_list: ['reduce_mean']
YOLOv3:
backbone: CSPResNet
neck: CustomCSPPAN
yolo_head: PPYOLOEHead
post_process: ~
CSPResNet:
layers: [3, 6, 6, 3]
channels: [64, 128, 256, 512, 1024]
return_idx: [1, 2, 3]
use_large_stem: True
use_alpha: True
CustomCSPPAN:
out_channels: [768, 384, 192]
stage_num: 1
block_num: 3
act: 'swish'
spp: true
PPYOLOEHead:
fpn_strides: [32, 16, 8]
grid_cell_scale: 5.0
grid_cell_offset: 0.5
static_assigner_epoch: 30
use_varifocal_loss: True
loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
static_assigner:
name: ATSSAssigner
topk: 9
assigner:
name: TaskAlignedAssigner
topk: 13
alpha: 1.0
beta: 6.0
nms:
name: MatrixNMS
keep_top_k: 100
score_threshold: 0.01
post_threshold: 0.01
nms_top_k: -1
background_label: -1
# voc dataset format
metric: VOC
map_type: 11point
num_classes: 17
TrainDataset:
!VOCDataSet
dataset_dir: /data/czy/VOC2007_wg_bmyw
anno_path: train.txt
label_list: label.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
!VOCDataSet
dataset_dir: /data/czy/VOC2007_wg_bmyw
anno_path: train.txt
label_list: label.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
!ImageFolder
anno_path: /data/czy/VOC2007_wg_bmyw/label.txt
# yoloereader
worker_num: 4
eval_height: &eval_height 1024 #704
eval_width: &eval_width 1024 #704
eval_size: &eval_size [*eval_height, *eval_width]
TrainReader:
sample_transforms:
- Decode: {}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
batch_transforms:
# - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344], random_size: True, random_interp: True, keep_ratio: False}
- BatchRandomResize: {target_size: [ 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
- PadGT: {}
batch_size: 2
shuffle: true
drop_last: false
use_shared_memory: true
collate_batch: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, *eval_height, *eval_width]
sample_transforms:
- Decode: {}
- Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
# '../cascade_rcnn/_base_/optimizer_1x.yml'
epoch: 80
LearningRate:
base_lr: 0.0001 #0.0000625
schedulers:
- !CosineDecay
max_epochs: 96
- !LinearWarmup
start_factor: 0.
epochs: 5
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
# '../runtime.yml'
use_gpu: true
use_xpu: false
log_iter: 20
save_dir: output/model
snapshot_epoch: 1
print_flops: false
# Exporting the model
export:
post_process: True # Whether post-processing is included in the network when export model.
nms: True # Whether NMS is included in the network when export model.
benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
fuse_conv_bn: False