网络解析----PP-YOLO

最新推荐文章于 2025-04-06 08:34:12 发布

mmd_0912

最新推荐文章于 2025-04-06 08:34:12 发布

阅读量527

点赞数

文章标签： YOLO

本文链接：https://blog.youkuaiyun.com/m0_37975258/article/details/131370866

版权

PP-YOLO系列是百度提出来的。2021年4月提出了PP-YOLOv2(原文链接：https://arxiv.org/abs/2104.10419 )，性能超越同等参数的YOLOv4-CSP和YOLOv5-l，7月份旷视提出了YOLOX，百度团队优化了PP-YOLOv2，提出了PP-YOLOE。PP-YOLOv2的总体情况包括：1）backbone：具有可变形卷积的ResNet50-vd；2）neck：具有SPP层的PAN，DropBlock；3）head：轻量级的IoU感知；4）激活函数：在backbone中使用ReLU激活，neck中使用Mish激活；5）标签分配：为每个ground truth目标分配一个anchor box；6）损失：分类损失、回归损失、目标损失，IoU损失和IoU感知损失；PP-YOLO-E的改进，Anchor-free方式最先在YOLOv1中出现，由于直接预测位置不准确，在后面的 YOLOv2、v3、v4和v5中均采用了Anchor方式。YOLOX中认为按聚类方式确定最优Anchor尺寸局限于特定领域，难以推广，此外还增加了head的复杂度与每张图像的预测数量，故采用了Anchor-free方式，PP-YOLOE中亦采用Anchor-free方式。

architecture: YOLOv3
#pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_x_obj365_pretrained.pdparams
pretrain_weights: /data/czy/PaddleDetection/output/model_20221222_waiguan/ppoex_waiguan/25.pdparams
depth_mult: 1.33
width_mult: 1.25
norm_type: sync_bn
use_ema: true
ema_decay: 0.9998
ema_black_list: ['proj_conv.weight']
custom_black_list: ['reduce_mean']

YOLOv3:
  backbone: CSPResNet
  neck: CustomCSPPAN
  yolo_head: PPYOLOEHead
  post_process: ~

CSPResNet:
  layers: [3, 6, 6, 3]
  channels: [64, 128, 256, 512, 1024]
  return_idx: [1, 2, 3]
  use_large_stem: True
  use_alpha: True

CustomCSPPAN:
  out_channels: [768, 384, 192]
  stage_num: 1
  block_num: 3
  act: 'swish'
  spp: true

PPYOLOEHead:
  fpn_strides: [32, 16, 8]
  grid_cell_scale: 5.0
  grid_cell_offset: 0.5
  static_assigner_epoch: 30
  use_varifocal_loss: True
  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
  static_assigner:
    name: ATSSAssigner
    topk: 9
  assigner:
    name: TaskAlignedAssigner
    topk: 13
    alpha: 1.0
    beta: 6.0
  nms:
    name: MatrixNMS
    keep_top_k: 100
    score_threshold: 0.01
    post_threshold: 0.01
    nms_top_k: -1
    background_label: -1


# voc dataset format
metric: VOC
map_type: 11point
num_classes: 17

TrainDataset:
  !VOCDataSet
    dataset_dir: /data/czy/VOC2007_wg_bmyw
    anno_path: train.txt
    label_list: label.txt
    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']

EvalDataset:
  !VOCDataSet
    dataset_dir: /data/czy/VOC2007_wg_bmyw
    anno_path: train.txt
    label_list: label.txt
    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']

TestDataset:
  !ImageFolder
    anno_path: /data/czy/VOC2007_wg_bmyw/label.txt
  
# yoloereader
worker_num: 4
eval_height: &eval_height 1024 #704
eval_width: &eval_width 1024 #704
eval_size: &eval_size [*eval_height, *eval_width]

TrainReader:
  sample_transforms:
    - Decode: {}
    - RandomDistort: {}
    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
    - RandomCrop: {}
    - RandomFlip: {}
  batch_transforms:
#    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344], random_size: True, random_interp: True, keep_ratio: False}
    - BatchRandomResize: {target_size: [ 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344], random_size: True, random_interp: True, keep_ratio: False}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
    - PadGT: {}
  batch_size: 2
  shuffle: true
  drop_last: false
  use_shared_memory: true
  collate_batch: true

EvalReader:
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 1

TestReader:
  inputs_def:
    image_shape: [3, *eval_height, *eval_width]
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 1



# '../cascade_rcnn/_base_/optimizer_1x.yml'
epoch: 80

LearningRate:
  base_lr: 0.0001 #0.0000625
  schedulers:
    - !CosineDecay
      max_epochs: 96
    - !LinearWarmup
      start_factor: 0.
      epochs: 5

OptimizerBuilder:
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0005
    type: L2


# '../runtime.yml'
use_gpu: true
use_xpu: false
log_iter: 20
save_dir: output/model
snapshot_epoch: 1
print_flops: false

# Exporting the model
export:
  post_process: True  # Whether post-processing is included in the network when export model.
  nms: True           # Whether NMS is included in the network when export model.
  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  fuse_conv_bn: False