网络解析----PP-YOLO

PP-YOLO系列是百度提出来的。2021年4月提出了PP-YOLOv2(原文链接:https://arxiv.org/abs/2104.10419 ),性能超越同等参数的YOLOv4-CSP和YOLOv5-l,7月份旷视提出了YOLOX,百度团队优化了PP-YOLOv2,提出了PP-YOLOE。PP-YOLOv2的总体情况包括:1)backbone:具有可变形卷积的ResNet50-vd;2)neck:具有SPP层的PAN,DropBlock;3)head:轻量级的IoU感知;4)激活函数:在backbone中使用ReLU激活,neck中使用Mish激活;5)标签分配:为每个ground truth目标分配一个anchor box;6)损失:分类损失、回归损失、目标损失,IoU损失和IoU感知损失;PP-YOLO-E的改进,Anchor-free方式最先在YOLOv1中出现,由于直接预测位置不准确,在后面的 YOLOv2、v3、v4和v5中均采用了Anchor方式。YOLOX中认为按聚类方式确定最优Anchor尺寸局限于特定领域,难以推广,此外还增加了head的复杂度与每张图像的预测数量,故采用了Anchor-free方式,PP-YOLOE中亦采用Anchor-free方式。

architecture: YOLOv3
#pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_x_obj365_pretrained.pdparams
pretrain_weights: /data/czy/PaddleDetection/output/model_20221222_waiguan/ppoex_waiguan/25.pdparams
depth_mult: 1.33
width_mult: 1.25
norm_type: sync_bn
use_ema: true
ema_decay: 0.9998
ema_black_list: ['proj_conv.weight']
custom_black_list: ['reduce_mean']

YOLOv3:
  backbone: CSPResNet
  neck: CustomCSPPAN
  yolo_head: PPYOLOEHead
  post_process: ~

CSPResNet:
  layers: [3, 6, 6, 3]
  channels: [64, 128, 256, 512, 1024]
  return_idx: [1, 2, 3]
  use_large_stem: True
  use_alpha: True

CustomCSPPAN:
  out_channels: [768, 384, 192]
  stage_num: 1
  block_num: 3
  act: 'swish'
  spp: true

PPYOLOEHead:
  fpn_strides: [32, 16, 8]
  grid_cell_scale: 5.0
  grid_cell_offset: 0.5
  static_assigner_epoch: 30
  use_varifocal_loss: True
  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
  static_assigner:
    name: ATSSAssigner
    topk: 9
  assigner:
    name: TaskAlignedAssigner
    topk: 13
    alpha: 1.0
    beta: 6.0
  nms:
    name: MatrixNMS
    keep_top_k: 100
    score_threshold: 0.01
    post_threshold: 0.01
    nms_top_k: -1
    background_label: -1


# voc dataset format
metric: VOC
map_type: 11point
num_classes: 17

TrainDataset:
  !VOCDataSet
    dataset_dir: /data/czy/VOC2007_wg_bmyw
    anno_path: train.txt
    label_list: label.txt
    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']

EvalDataset:
  !VOCDataSet
    dataset_dir: /data/czy/VOC2007_wg_bmyw
    anno_path: train.txt
    label_list: label.txt
    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']

TestDataset:
  !ImageFolder
    anno_path: /data/czy/VOC2007_wg_bmyw/label.txt
  
# yoloereader
worker_num: 4
eval_height: &eval_height 1024 #704
eval_width: &eval_width 1024 #704
eval_size: &eval_size [*eval_height, *eval_width]

TrainReader:
  sample_transforms:
    - Decode: {}
    - RandomDistort: {}
    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
    - RandomCrop: {}
    - RandomFlip: {}
  batch_transforms:
#    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344], random_size: True, random_interp: True, keep_ratio: False}
    - BatchRandomResize: {target_size: [ 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344], random_size: True, random_interp: True, keep_ratio: False}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
    - PadGT: {}
  batch_size: 2
  shuffle: true
  drop_last: false
  use_shared_memory: true
  collate_batch: true

EvalReader:
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 1

TestReader:
  inputs_def:
    image_shape: [3, *eval_height, *eval_width]
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 1



# '../cascade_rcnn/_base_/optimizer_1x.yml'
epoch: 80

LearningRate:
  base_lr: 0.0001 #0.0000625
  schedulers:
    - !CosineDecay
      max_epochs: 96
    - !LinearWarmup
      start_factor: 0.
      epochs: 5

OptimizerBuilder:
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0005
    type: L2


# '../runtime.yml'
use_gpu: true
use_xpu: false
log_iter: 20
save_dir: output/model
snapshot_epoch: 1
print_flops: false

# Exporting the model
export:
  post_process: True  # Whether post-processing is included in the network when export model.
  nms: True           # Whether NMS is included in the network when export model.
  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  fuse_conv_bn: False

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值