yolov3网络结构笔记

从yolo3_darknet53_coco(gluoncv/model_zoo/yolo/yolo3.py )调试获得,供参考

 

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
================================================================================
               Input                            (1, 3, 512, 703)               0
            Conv2D-1                           (1, 32, 512, 703)             864
         BatchNorm-2                           (1, 32, 512, 703)             128
         LeakyReLU-3                           (1, 32, 512, 703)               0
            Conv2D-4                           (1, 64, 256, 352)           18432
         BatchNorm-5                           (1, 64, 256, 352)             256
         LeakyReLU-6                           (1, 64, 256, 352)               0
            Conv2D-7                           (1, 32, 256, 352)            2048
         BatchNorm-8                           (1, 32, 256, 352)             128
         LeakyReLU-9                           (1, 32, 256, 352)               0
           Conv2D-10                           (1, 64, 256, 352)           18432
        BatchNorm-11                           (1, 64, 256, 352)             256
        LeakyReLU-12                           (1, 64, 256, 352)               0
DarknetBasicBlockV3-13                           (1, 64, 256, 352)               0
           Conv2D-14                          (1, 128, 128, 176)           73728
        BatchNorm-15                          (1, 128, 128, 176)             512
        LeakyReLU-16                          (1, 128, 128, 176)               0
           Conv2D-17                           (1, 64, 128, 176)            8192
        BatchNorm-18                           (1, 64, 128, 176)             256
        LeakyReLU-19                           (1, 64, 128, 176)               0
           Conv2D-20                          (1, 128, 128, 176)           73728
        BatchNorm-21                          (1, 128, 128, 176)             512
        LeakyReLU-22                          (1, 128, 128, 176)               0
DarknetBasicBlockV3-23                          (1, 128, 128, 176)               0
           Conv2D-24                           (1, 64, 128, 176)            8192
        BatchNorm-25                           (1, 64, 128, 176)             256
        LeakyReLU-26                           (1, 64, 128, 176)               0
           Conv2D-27                          (1, 128, 128, 176)           73728
        BatchNorm-28                          (1, 128, 128, 176)             512
        LeakyReLU-29                          (1, 128, 128, 176)               0
DarknetBasicBlockV3-30                          (1, 128, 128, 176)               0
           Conv2D-31                            (1, 256, 64, 88)          294912
        BatchNorm-32                            (1, 256, 64, 88)            1024
        LeakyReLU-33                            (1, 256, 64, 88)               0
           Conv2D-34                            (1, 128, 64, 88)           32768
        BatchNorm-35                            (1, 128, 64, 88)             512
        LeakyReLU-36                            (1, 128, 64, 88)               0
           Conv2D-37                            (1, 256, 64, 88)          294912
        BatchNorm-38                            (1, 256, 64, 88)            1024
        LeakyReLU-39                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-40                            (1, 256, 64, 88)               0
           Conv2D-41                            (1, 128, 64, 88)           32768
        BatchNorm-42                            (1, 128, 64, 88)             512
        LeakyReLU-43                            (1, 128, 64, 88)               0
           Conv2D-44                            (1, 256, 64, 88)          294912
        BatchNorm-45                            (1, 256, 64, 88)            1024
        LeakyReLU-46                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-47                            (1, 256, 64, 88)               0
           Conv2D-48                            (1, 128, 64, 88)           32768
        BatchNorm-49                            (1, 128, 64, 88)             512
        LeakyReLU-50                            (1, 128, 64, 88)               0
           Conv2D-51                            (1, 256, 64, 88)          294912
        BatchNorm-52                            (1, 256, 64, 88)            1024
        LeakyReLU-53                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-54                            (1, 256, 64, 88)               0
           Conv2D-55                            (1, 128, 64, 88)           32768
        BatchNorm-56                            (1, 128, 64, 88)             512
        LeakyReLU-57                            (1, 128, 64, 88)               0
           Conv2D-58                            (1, 256, 64, 88)          294912
        BatchNorm-59                            (1, 256, 64, 88)            1024
        LeakyReLU-60                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-61                            (1, 256, 64, 88)               0
           Conv2D-62                            (1, 128, 64, 88)           32768
        BatchNorm-63                            (1, 128, 64, 88)             512
        LeakyReLU-64                            (1, 128, 64, 88)               0
           Conv2D-65                            (1, 256, 64, 88)          294912
        BatchNorm-66                            (1, 256, 64, 88)            1024
        LeakyReLU-67                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-68                            (1, 256, 64, 88)               0
           Conv2D-69                            (1, 128, 64, 88)           32768
        BatchNorm-70                            (1, 128, 64, 88)             512
        LeakyReLU-71                            (1, 128, 64, 88)               0
           Conv2D-72                            (1, 256, 64, 88)          294912
        BatchNorm-73                            (1, 256, 64, 88)            1024
        LeakyReLU-74                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-75                            (1, 256, 64, 88)               0
           Conv2D-76                            (1, 128, 64, 88)           32768
        BatchNorm-77                            (1, 128, 64, 88)             512
        LeakyReLU-78                            (1, 128, 64, 88)               0
           Conv2D-79                            (1, 256, 64, 88)          294912
        BatchNorm-80                            (1, 256, 64, 88)            1024
        LeakyReLU-81                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-82                            (1, 256, 64, 88)               0
           Conv2D-83                            (1, 128, 64, 88)           32768
        BatchNorm-84                            (1, 128, 64, 88)             512
        LeakyReLU-85                            (1, 128, 64, 88)               0
           Conv2D-86                            (1, 256, 64, 88)          294912
        BatchNorm-87                            (1, 256, 64, 88)            1024
        LeakyReLU-88                            (1, 256, 64, 88)               0
DarknetBasicBlockV3-89                            (1, 256, 64, 88)               0
darknet53.features[:15]
--------------------------------------------------------------------------------------------------
           Conv2D-90                            (1, 512, 32, 44)         1179648
        BatchNorm-91                            (1, 512, 32, 44)            2048
        LeakyReLU-92                            (1, 512, 32, 44)               0
           Conv2D-93                            (1, 256, 32, 44)          131072
        BatchNorm-94                            (1, 256, 32, 44)            1024
        LeakyReLU-95                            (1, 256, 32, 44)               0
           Conv2D-96                            (1, 512, 32, 44)         1179648
        BatchNorm-97                            (1, 512, 32, 44)            2048
        LeakyReLU-98                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-99                            (1, 512, 32, 44)               0
          Conv2D-100                            (1, 256, 32, 44)          131072
       BatchNorm-101                            (1, 256, 32, 44)            1024
       LeakyReLU-102                            (1, 256, 32, 44)               0
          Conv2D-103                            (1, 512, 32, 44)         1179648
       BatchNorm-104                            (1, 512, 32, 44)            2048
       LeakyReLU-105                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-106                            (1, 512, 32, 44)               0
          Conv2D-107                            (1, 256, 32, 44)          131072
       BatchNorm-108                            (1, 256, 32, 44)            1024
       LeakyReLU-109                            (1, 256, 32, 44)               0
          Conv2D-110                            (1, 512, 32, 44)         1179648
       BatchNorm-111                            (1, 512, 32, 44)            2048
       LeakyReLU-112                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-113                            (1, 512, 32, 44)               0
          Conv2D-114                            (1, 256, 32, 44)          131072
       BatchNorm-115                            (1, 256, 32, 44)            1024
       LeakyReLU-116                            (1, 256, 32, 44)               0
          Conv2D-117                            (1, 512, 32, 44)         1179648
       BatchNorm-118                            (1, 512, 32, 44)            2048
       LeakyReLU-119                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-120                            (1, 512, 32, 44)               0
          Conv2D-121                            (1, 256, 32, 44)          131072
       BatchNorm-122                            (1, 256, 32, 44)            1024
       LeakyReLU-123                            (1, 256, 32, 44)               0
          Conv2D-124                            (1, 512, 32, 44)         1179648
       BatchNorm-125                            (1, 512, 32, 44)            2048
       LeakyReLU-126                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-127                            (1, 512, 32, 44)               0
          Conv2D-128                            (1, 256, 32, 44)          131072
       BatchNorm-129                            (1, 256, 32, 44)            1024
       LeakyReLU-130                            (1, 256, 32, 44)               0
          Conv2D-131                            (1, 512, 32, 44)         1179648
       BatchNorm-132                            (1, 512, 32, 44)            2048
       LeakyReLU-133                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-134                            (1, 512, 32, 44)               0
          Conv2D-135                            (1, 256, 32, 44)          131072
       BatchNorm-136                            (1, 256, 32, 44)            1024
       LeakyReLU-137                            (1, 256, 32, 44)               0
          Conv2D-138                            (1, 512, 32, 44)         1179648
       BatchNorm-139                            (1, 512, 32, 44)            2048
       LeakyReLU-140                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-141                            (1, 512, 32, 44)               0
          Conv2D-142                            (1, 256, 32, 44)          131072
       BatchNorm-143                            (1, 256, 32, 44)            1024
       LeakyReLU-144                            (1, 256, 32, 44)               0
          Conv2D-145                            (1, 512, 32, 44)         1179648
       BatchNorm-146                            (1, 512, 32, 44)            2048
       LeakyReLU-147                            (1, 512, 32, 44)               0
DarknetBasicBlockV3-148                            (1, 512, 32, 44)               0
darknet53.features[15:24]
--------------------------------------------------------------------------------------------------
          Conv2D-149                           (1, 1024, 16, 22)         4718592
       BatchNorm-150                           (1, 1024, 16, 22)            4096
       LeakyReLU-151                           (1, 1024, 16, 22)               0
          Conv2D-152                            (1, 512, 16, 22)          524288
       BatchNorm-153                            (1, 512, 16, 22)            2048
       LeakyReLU-154                            (1, 512, 16, 22)               0
          Conv2D-155                           (1, 1024, 16, 22)         4718592
       BatchNorm-156                           (1, 1024, 16, 22)            4096
       LeakyReLU-157                           (1, 1024, 16, 22)               0
DarknetBasicBlockV3-158                           (1, 1024, 16, 22)               0
          Conv2D-159                            (1, 512, 16, 22)          524288
       BatchNorm-160                            (1, 512, 16, 22)            2048
       LeakyReLU-161                            (1, 512, 16, 22)               0
          Conv2D-162                           (1, 1024, 16, 22)         4718592
       BatchNorm-163                           (1, 1024, 16, 22)            4096
       LeakyReLU-164                           (1, 1024, 16, 22)               0
DarknetBasicBlockV3-165                           (1, 1024, 16, 22)               0
          Conv2D-166                            (1, 512, 16, 22)          524288
       BatchNorm-167                            (1, 512, 16, 22)            2048
       LeakyReLU-168                            (1, 512, 16, 22)               0
          Conv2D-169                           (1, 1024, 16, 22)         4718592
       BatchNorm-170                           (1, 1024, 16, 22)            4096
       LeakyReLU-171                           (1, 1024, 16, 22)               0
DarknetBasicBlockV3-172                           (1, 1024, 16, 22)               0
          Conv2D-173                            (1, 512, 16, 22)          524288
       BatchNorm-174                            (1, 512, 16, 22)            2048
       LeakyReLU-175                            (1, 512, 16, 22)               0
          Conv2D-176                           (1, 1024, 16, 22)         4718592
       BatchNorm-177                           (1, 1024, 16, 22)            4096
       LeakyReLU-178                           (1, 1024, 16, 22)               0
DarknetBasicBlockV3-179                           (1, 1024, 16, 22)               0
darknet53.features[24:]
--------------------------------------------------------------------------------------------------
          Conv2D-180                            (1, 512, 16, 22)          524288
       BatchNorm-181                            (1, 512, 16, 22)            2048
       LeakyReLU-182                            (1, 512, 16, 22)               0
          Conv2D-183                           (1, 1024, 16, 22)         4718592
       BatchNorm-184                           (1, 1024, 16, 22)            4096
       LeakyReLU-185                           (1, 1024, 16, 22)               0
          Conv2D-186                            (1, 512, 16, 22)          524288
       BatchNorm-187                            (1, 512, 16, 22)            2048
       LeakyReLU-188                            (1, 512, 16, 22)               0
          Conv2D-189                           (1, 1024, 16, 22)         4718592
       BatchNorm-190                           (1, 1024, 16, 22)            4096
       LeakyReLU-191                           (1, 1024, 16, 22)               0
          Conv2D-192                            (1, 512, 16, 22)          524288
       BatchNorm-193                            (1, 512, 16, 22)            2048
       LeakyReLU-194                            (1, 512, 16, 22)               0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
          Conv2D-195                           (1, 1024, 16, 22)         4718592
       BatchNorm-196                           (1, 1024, 16, 22)            4096
       LeakyReLU-197                           (1, 1024, 16, 22)               0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-198         (1, 512, 16, 22), (1, 1024, 16, 22)               0
--------------------------------------------------------------------------------------------------
          Conv2D-199                            (1, 255, 16, 22)          261375
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
    YOLOOutputV3-200                               (1, 84480, 6)           32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
          Conv2D-201                            (1, 256, 16, 22)          131072
       BatchNorm-202                            (1, 256, 16, 22)            1024
       LeakyReLU-203                            (1, 256, 16, 22)               0
YOLOV3.transitions[0]----->_conv2d
--------------------------------------------------------------------------------------------------
          Conv2D-204                            (1, 256, 32, 44)          196608
       BatchNorm-205                            (1, 256, 32, 44)            1024
       LeakyReLU-206                            (1, 256, 32, 44)               0
          Conv2D-207                            (1, 512, 32, 44)         1179648
       BatchNorm-208                            (1, 512, 32, 44)            2048
       LeakyReLU-209                            (1, 512, 32, 44)               0
          Conv2D-210                            (1, 256, 32, 44)          131072
       BatchNorm-211                            (1, 256, 32, 44)            1024
       LeakyReLU-212                            (1, 256, 32, 44)               0
          Conv2D-213                            (1, 512, 32, 44)         1179648
       BatchNorm-214                            (1, 512, 32, 44)            2048
       LeakyReLU-215                            (1, 512, 32, 44)               0
          Conv2D-216                            (1, 256, 32, 44)          131072
       BatchNorm-217                            (1, 256, 32, 44)            1024
       LeakyReLU-218                            (1, 256, 32, 44)               0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
          Conv2D-219                            (1, 512, 32, 44)         1179648
       BatchNorm-220                            (1, 512, 32, 44)            2048
       LeakyReLU-221                            (1, 512, 32, 44)               0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-222          (1, 256, 32, 44), (1, 512, 32, 44)               0
--------------------------------------------------------------------------------------------------
          Conv2D-223                            (1, 255, 32, 44)          130815
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
    YOLOOutputV3-224                              (1, 337920, 6)           32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
          Conv2D-225                            (1, 128, 32, 44)           32768
       BatchNorm-226                            (1, 128, 32, 44)             512
       LeakyReLU-227                            (1, 128, 32, 44)               0
YOLOV3.transitions[1]----->_conv2d
--------------------------------------------------------------------------------------------------
          Conv2D-228                            (1, 128, 64, 88)           49152
       BatchNorm-229                            (1, 128, 64, 88)             512
       LeakyReLU-230                            (1, 128, 64, 88)               0
          Conv2D-231                            (1, 256, 64, 88)          294912
       BatchNorm-232                            (1, 256, 64, 88)            1024
       LeakyReLU-233                            (1, 256, 64, 88)               0
          Conv2D-234                            (1, 128, 64, 88)           32768
       BatchNorm-235                            (1, 128, 64, 88)             512
       LeakyReLU-236                            (1, 128, 64, 88)               0
          Conv2D-237                            (1, 256, 64, 88)          294912
       BatchNorm-238                            (1, 256, 64, 88)            1024
       LeakyReLU-239                            (1, 256, 64, 88)               0
          Conv2D-240                            (1, 128, 64, 88)           32768
       BatchNorm-241                            (1, 128, 64, 88)             512
       LeakyReLU-242                            (1, 128, 64, 88)               0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
          Conv2D-243                            (1, 256, 64, 88)          294912
       BatchNorm-244                            (1, 256, 64, 88)            1024
       LeakyReLU-245                            (1, 256, 64, 88)               0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-246          (1, 128, 64, 88), (1, 256, 64, 88)               0
--------------------------------------------------------------------------------------------------
          Conv2D-247                            (1, 255, 64, 88)           65535
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
    YOLOOutputV3-248                             (1, 1351680, 6)           32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
          YOLOV3-249       (1, 100, 1), (1, 100, 1), (1, 100, 4)               0
================================================================================

 

打印日志如下,有用的没用的都放这里了

darknet_version =  v3
num_layers =  53
darknet layers =  [1, 2, 8, 8, 4]
darknet channels =  [32, 64, 128, 256, 512, 1024]
anchors =  [116.  90. 156. 198. 373. 326.]
self._num_pred =  85
self._num_anchors =  3
all_pred = 255
anchors =  [[[[116.  90.]
   [156. 198.]
   [373. 326.]]]]
self.anchors =  
[[[[116.  90.]
   [156. 198.]
   [373. 326.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors =  [ 30.  61.  62.  45.  59. 119.]
self._num_pred =  85
self._num_anchors =  3
all_pred = 255
anchors =  [[[[ 30.  61.]
   [ 62.  45.]
   [ 59. 119.]]]]
self.anchors =  
[[[[ 30.  61.]
   [ 62.  45.]
   [ 59. 119.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors =  [10. 13. 16. 30. 33. 23.]
self._num_pred =  85
self._num_anchors =  3
all_pred = 255
anchors =  [[[[10. 13.]
   [16. 30.]
   [33. 23.]]]]
self.anchors =  
[[[[10. 13.]
   [16. 30.]
   [33. 23.]]]]
<NDArray 1x1x3x2 @cpu(0)>
Shape of pre-processed image: (1, 3, 512, 703)
x.shape =  (1, 256, 64, 88)
x.shape =  (1, 512, 32, 44)
x.shape =  (1, 1024, 16, 22)
pred.shape =  (1, 255, 16, 22)
pred.shape =  (1, 255, 352)
pred.shape =  (1, 352, 3, 85)
raw_box_centers.shape =  (1, 352, 3, 2)
raw_box_scales.shape =  (1, 352, 3, 2)
objness.shape =  (1, 352, 3, 1)
class_pred.shape =  (1, 352, 3, 80)
offsets.shape =  (1, 1, 128, 128, 2)
slice_like offsets.shape =  (1, 1, 16, 22, 2)
reshape offsets.shape =  (1, 352, 1, 2)
box_centers.shape =  (1, 352, 3, 2)
anchors.shape =  (1, 1, 3, 2)
box_scales.shape =  (1, 352, 3, 2)
confidence.shape =  (1, 352, 3, 1)
class_score.shape =  (1, 352, 3, 80)
bbox.shape =  (1, 352, 3, 4)
bboxes.shape =  (80, 1, 352, 3, 4)
scores.shape =  (80, 1, 352, 3, 1)
ids.shape =  (80, 1, 352, 3, 1)
detections =  (80, 1, 352, 3, 6)
detections =  (1, 84480, 6)
dets.shape =  (1, 84480, 6)
self.transitions =  <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape =  (1, 256, 16, 22)
upsample.shape =  (1, 256, 32, 44)
route_now.shape =  (1, 512, 32, 44)
x.shape =  (1, 768, 32, 44)
pred.shape =  (1, 255, 32, 44)
pred.shape =  (1, 255, 1408)
pred.shape =  (1, 1408, 3, 85)
raw_box_centers.shape =  (1, 1408, 3, 2)
raw_box_scales.shape =  (1, 1408, 3, 2)
objness.shape =  (1, 1408, 3, 1)
class_pred.shape =  (1, 1408, 3, 80)
offsets.shape =  (1, 1, 128, 128, 2)
slice_like offsets.shape =  (1, 1, 32, 44, 2)
reshape offsets.shape =  (1, 1408, 1, 2)
box_centers.shape =  (1, 1408, 3, 2)
anchors.shape =  (1, 1, 3, 2)
box_scales.shape =  (1, 1408, 3, 2)
confidence.shape =  (1, 1408, 3, 1)
class_score.shape =  (1, 1408, 3, 80)
bbox.shape =  (1, 1408, 3, 4)
bboxes.shape =  (80, 1, 1408, 3, 4)
scores.shape =  (80, 1, 1408, 3, 1)
ids.shape =  (80, 1, 1408, 3, 1)
detections =  (80, 1, 1408, 3, 6)
detections =  (1, 337920, 6)
dets.shape =  (1, 337920, 6)
self.transitions =  <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape =  (1, 128, 32, 44)
upsample.shape =  (1, 128, 64, 88)
route_now.shape =  (1, 256, 64, 88)
x.shape =  (1, 384, 64, 88)
pred.shape =  (1, 255, 64, 88)
pred.shape =  (1, 255, 5632)
pred.shape =  (1, 5632, 3, 85)
raw_box_centers.shape =  (1, 5632, 3, 2)
raw_box_scales.shape =  (1, 5632, 3, 2)
objness.shape =  (1, 5632, 3, 1)
class_pred.shape =  (1, 5632, 3, 80)
offsets.shape =  (1, 1, 128, 128, 2)
slice_like offsets.shape =  (1, 1, 64, 88, 2)
reshape offsets.shape =  (1, 5632, 1, 2)
box_centers.shape =  (1, 5632, 3, 2)
anchors.shape =  (1, 1, 3, 2)
box_scales.shape =  (1, 5632, 3, 2)
confidence.shape =  (1, 5632, 3, 1)
class_score.shape =  (1, 5632, 3, 80)
bbox.shape =  (1, 5632, 3, 4)
bboxes.shape =  (80, 1, 5632, 3, 4)
scores.shape =  (80, 1, 5632, 3, 1)
ids.shape =  (80, 1, 5632, 3, 1)
detections =  (80, 1, 5632, 3, 6)
detections =  (1, 1351680, 6)
dets.shape =  (1, 1351680, 6)
all_detections.shape =  3
all_detections[0].shape =  (1, 84480, 6)
result.shape =  (1, 1774080, 6)
after result.shape =  (1, 1774080, 6)
slice result.shape =  (1, 100, 6)

自己总结的计算过程如下:

img ——> darknet53 ——> YOLODetectionBlockV3.body ——> YOLODetectionBlockV3.tip ——> YOLOOutputV3.prediction ——>
(reshape, transpose, slice_axis) ——> [raw_box_centers(2), raw_box_scales(2), objness(1), class_pred(80)] ——>
[box_centers(offsets, _stride), box_scales(anchors), confidence, class_score(class_pred * confidence)] ——> 
[bboxes(4), scores(1)] + ids(from arrange,1) ——> (reshape) ——> detections ——> box_nms ——> slice_axis ——> 
(ids, scores, bboxes)

class YOLOV3(gluon.HybridBlock): 添加的打印如下:

    def hybrid_forward(self, F, x, *args):
        """YOLOV3 network hybrid forward.
        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        x : mxnet.nd.NDArray
            Input data.
        *args : optional, mxnet.nd.NDArray
            During training, extra inputs are required:
            (gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t)
            These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function.
        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            During inference, return detections in shape (B, N, 6)
            with format (cid, score, xmin, ymin, xmax, ymax)
            During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss).
        """
        all_box_centers = []
        all_box_scales = []
        all_objectness = []
        all_class_pred = []
        all_anchors = []
        all_offsets = []
        all_feat_maps = []
        all_detections = []
        routes = []
        for stage in self.stages:
            x = stage(x)
            routes.append(x)
            print("x.shape = ", x.shape)


        # the YOLO output layers are used in reverse order, i.e., from very deep layers to shallow
        for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs):
            x, tip = block(x)
            dets = output(tip)
            print("dets.shape = ", dets.shape)
            all_detections.append(dets)
            if i >= len(routes) - 1:
                break
            # add transition layers
            print("self.transitions = ", type(self.transitions))
            x = self.transitions[i](x)
            print("x.shape = ", x.shape)
            # upsample feature map reverse to shallow layers
            upsample = _upsample(x, stride=2)
            print("upsample.shape = ", upsample.shape)
            route_now = routes[::-1][i + 1]
            print("route_now.shape = ", route_now.shape)
            x = F.concat(upsample, route_now, dim=1)
            print("x.shape = ", x.shape)

        print("all_detections.shape = ", len(all_detections))
        print("all_detections[0].shape = ", all_detections[0].shape)
        # concat all detection results from different stages
        result = F.concat(*all_detections, dim=1)
        print("result.shape = ", result.shape)
        # apply nms per class
        if self.nms_thresh > 0 and self.nms_thresh < 1:
            result = F.contrib.box_nms(
                result, overlap_thresh=self.nms_thresh, valid_thresh=0.01,
                topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False)

            print("after result.shape = ", result.shape)

            if self.post_nms > 0:
                result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
            print("slice result.shape = ", result.shape)
        ids = result.slice_axis(axis=-1, begin=0, end=1)
        scores = result.slice_axis(axis=-1, begin=1, end=2)
        bboxes = result.slice_axis(axis=-1, begin=2, end=None)
        return ids, scores, bboxes

class YOLOOutputV3(gluon.HybridBlock):添加的打印如下:

class YOLOOutputV3(gluon.HybridBlock):
    """YOLO output layer V3.
    Parameters
    ----------
    index : int
        Index of the yolo output layer, to avoid naming conflicts only.
    num_class : int
        Number of foreground objects.
    anchors : iterable
        The anchor setting. Reference: https://arxiv.org/pdf/1804.02767.pdf.
    stride : int
        Stride of feature map.
    alloc_size : tuple of int, default is (128, 128)
        For advanced users. Define `alloc_size` to generate large enough anchor
        maps, which will later saved in parameters. During inference, we support arbitrary
        input image by cropping corresponding area of the anchor map. This allow us
        to export to symbol so we can run it in c++, Scalar, etc.
    """
    def __init__(self, index, num_class, anchors, stride,
                 alloc_size=(128, 128), **kwargs):
        super(YOLOOutputV3, self).__init__(**kwargs)
        anchors = np.array(anchors).astype('float32')
        print("anchors = ", anchors)
        self._classes = num_class
        self._num_pred = 1 + 4 + num_class  # 1 objness + 4 box + num_class
        self._num_anchors = anchors.size // 2
        self._stride = stride
        with self.name_scope():
            print("self._num_pred = ", self._num_pred)
            print("self._num_anchors = ", self._num_anchors)
            all_pred = self._num_pred * self._num_anchors
            print("all_pred = %d" % all_pred)
            self.prediction = nn.Conv2D(all_pred, kernel_size=1, padding=0, strides=1)
            # anchors will be multiplied to predictions
            anchors = anchors.reshape(1, 1, -1, 2)
            print("anchors = ", anchors)
            self.anchors = self.params.get_constant('anchor_%d'%(index), anchors)
            print("self.anchors = ", self.anchors.value)
            # offsets will be added to predictions
            grid_x = np.arange(alloc_size[1])
            grid_y = np.arange(alloc_size[0])
            grid_x, grid_y = np.meshgrid(grid_x, grid_y)
            # stack to (n, n, 2)
            offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)
            # expand dims to (1, 1, n, n, 2) so it's easier for broadcasting
            offsets = np.expand_dims(np.expand_dims(offsets, axis=0), axis=0)
            self.offsets = self.params.get_constant('offset_%d'%(index), offsets)

    def hybrid_forward(self, F, x, anchors, offsets):
        """Hybrid Forward of YOLOV3Output layer.
        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        x : mxnet.nd.NDArray
            Input feature map.
        anchors : mxnet.nd.NDArray
            Anchors loaded from self, no need to supply.
        offsets : mxnet.nd.NDArray
            Offsets loaded from self, no need to supply.
        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            During training, return (bbox, raw_box_centers, raw_box_scales, objness,
            class_pred, anchors, offsets).
            During inference, return detections.
        """

        # prediction flat to (batch, pred per pixel, height * width)
        pred = self.prediction(x)
        print("pred.shape = ", pred.shape)
        pred = pred.reshape((0, self._num_anchors * self._num_pred, -1))
        print("pred.shape = ", pred.shape)
        # transpose to (batch, height * width, num_anchor, num_pred)
        pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))
        print("pred.shape = ", pred.shape)
        # components
        raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)
        print("raw_box_centers.shape = ", raw_box_centers.shape)
        raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)
        print("raw_box_scales.shape = ", raw_box_scales.shape)
        objness = pred.slice_axis(axis=-1, begin=4, end=5)
        print("objness.shape = ", objness.shape)
        class_pred = pred.slice_axis(axis=-1, begin=5, end=None)
        print("class_pred.shape = ", class_pred.shape)

        # valid offsets, (1, 1, height, width, 2)

        print("offsets.shape = ", offsets.shape)
        offsets = F.slice_like(offsets, x * 0, axes=(2, 3))
        print("slice_like offsets.shape = ", offsets.shape)
        # reshape to (1, height*width, 1, 2)
        offsets = offsets.reshape((1, -1, 1, 2))
        print("reshape offsets.shape = ", offsets.shape)
        # print(offsets)

        box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._stride
        print("box_centers.shape = ", box_centers.shape)
        print("anchors.shape = ", anchors.shape)

        box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)
        print("box_scales.shape = ", box_scales.shape)
        confidence = F.sigmoid(objness)
        print("confidence.shape = ", confidence.shape)
        class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence)
        print("class_score.shape = ", class_score.shape)
        wh = box_scales / 2.0
        bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)
        print("bbox.shape = ", bbox.shape)

        # prediction per class
        bboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1))
        print("bboxes.shape = ", bboxes.shape)
        scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1)
        print("scores.shape = ", scores.shape)
        ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1)))
        print("ids.shape = ", ids.shape)
        detections = F.concat(ids, scores, bboxes, dim=-1)
        print("detections = ", detections.shape)
        # reshape to (B, xx, 6)
        detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6))
        print("detections = ", detections.shape)
        return detections

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值