从yolo3_darknet53_coco(gluoncv/model_zoo/yolo/yolo3.py )调试获得,供参考
--------------------------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================================
Input (1, 3, 512, 703) 0
Conv2D-1 (1, 32, 512, 703) 864
BatchNorm-2 (1, 32, 512, 703) 128
LeakyReLU-3 (1, 32, 512, 703) 0
Conv2D-4 (1, 64, 256, 352) 18432
BatchNorm-5 (1, 64, 256, 352) 256
LeakyReLU-6 (1, 64, 256, 352) 0
Conv2D-7 (1, 32, 256, 352) 2048
BatchNorm-8 (1, 32, 256, 352) 128
LeakyReLU-9 (1, 32, 256, 352) 0
Conv2D-10 (1, 64, 256, 352) 18432
BatchNorm-11 (1, 64, 256, 352) 256
LeakyReLU-12 (1, 64, 256, 352) 0
DarknetBasicBlockV3-13 (1, 64, 256, 352) 0
Conv2D-14 (1, 128, 128, 176) 73728
BatchNorm-15 (1, 128, 128, 176) 512
LeakyReLU-16 (1, 128, 128, 176) 0
Conv2D-17 (1, 64, 128, 176) 8192
BatchNorm-18 (1, 64, 128, 176) 256
LeakyReLU-19 (1, 64, 128, 176) 0
Conv2D-20 (1, 128, 128, 176) 73728
BatchNorm-21 (1, 128, 128, 176) 512
LeakyReLU-22 (1, 128, 128, 176) 0
DarknetBasicBlockV3-23 (1, 128, 128, 176) 0
Conv2D-24 (1, 64, 128, 176) 8192
BatchNorm-25 (1, 64, 128, 176) 256
LeakyReLU-26 (1, 64, 128, 176) 0
Conv2D-27 (1, 128, 128, 176) 73728
BatchNorm-28 (1, 128, 128, 176) 512
LeakyReLU-29 (1, 128, 128, 176) 0
DarknetBasicBlockV3-30 (1, 128, 128, 176) 0
Conv2D-31 (1, 256, 64, 88) 294912
BatchNorm-32 (1, 256, 64, 88) 1024
LeakyReLU-33 (1, 256, 64, 88) 0
Conv2D-34 (1, 128, 64, 88) 32768
BatchNorm-35 (1, 128, 64, 88) 512
LeakyReLU-36 (1, 128, 64, 88) 0
Conv2D-37 (1, 256, 64, 88) 294912
BatchNorm-38 (1, 256, 64, 88) 1024
LeakyReLU-39 (1, 256, 64, 88) 0
DarknetBasicBlockV3-40 (1, 256, 64, 88) 0
Conv2D-41 (1, 128, 64, 88) 32768
BatchNorm-42 (1, 128, 64, 88) 512
LeakyReLU-43 (1, 128, 64, 88) 0
Conv2D-44 (1, 256, 64, 88) 294912
BatchNorm-45 (1, 256, 64, 88) 1024
LeakyReLU-46 (1, 256, 64, 88) 0
DarknetBasicBlockV3-47 (1, 256, 64, 88) 0
Conv2D-48 (1, 128, 64, 88) 32768
BatchNorm-49 (1, 128, 64, 88) 512
LeakyReLU-50 (1, 128, 64, 88) 0
Conv2D-51 (1, 256, 64, 88) 294912
BatchNorm-52 (1, 256, 64, 88) 1024
LeakyReLU-53 (1, 256, 64, 88) 0
DarknetBasicBlockV3-54 (1, 256, 64, 88) 0
Conv2D-55 (1, 128, 64, 88) 32768
BatchNorm-56 (1, 128, 64, 88) 512
LeakyReLU-57 (1, 128, 64, 88) 0
Conv2D-58 (1, 256, 64, 88) 294912
BatchNorm-59 (1, 256, 64, 88) 1024
LeakyReLU-60 (1, 256, 64, 88) 0
DarknetBasicBlockV3-61 (1, 256, 64, 88) 0
Conv2D-62 (1, 128, 64, 88) 32768
BatchNorm-63 (1, 128, 64, 88) 512
LeakyReLU-64 (1, 128, 64, 88) 0
Conv2D-65 (1, 256, 64, 88) 294912
BatchNorm-66 (1, 256, 64, 88) 1024
LeakyReLU-67 (1, 256, 64, 88) 0
DarknetBasicBlockV3-68 (1, 256, 64, 88) 0
Conv2D-69 (1, 128, 64, 88) 32768
BatchNorm-70 (1, 128, 64, 88) 512
LeakyReLU-71 (1, 128, 64, 88) 0
Conv2D-72 (1, 256, 64, 88) 294912
BatchNorm-73 (1, 256, 64, 88) 1024
LeakyReLU-74 (1, 256, 64, 88) 0
DarknetBasicBlockV3-75 (1, 256, 64, 88) 0
Conv2D-76 (1, 128, 64, 88) 32768
BatchNorm-77 (1, 128, 64, 88) 512
LeakyReLU-78 (1, 128, 64, 88) 0
Conv2D-79 (1, 256, 64, 88) 294912
BatchNorm-80 (1, 256, 64, 88) 1024
LeakyReLU-81 (1, 256, 64, 88) 0
DarknetBasicBlockV3-82 (1, 256, 64, 88) 0
Conv2D-83 (1, 128, 64, 88) 32768
BatchNorm-84 (1, 128, 64, 88) 512
LeakyReLU-85 (1, 128, 64, 88) 0
Conv2D-86 (1, 256, 64, 88) 294912
BatchNorm-87 (1, 256, 64, 88) 1024
LeakyReLU-88 (1, 256, 64, 88) 0
DarknetBasicBlockV3-89 (1, 256, 64, 88) 0
darknet53.features[:15]
--------------------------------------------------------------------------------------------------
Conv2D-90 (1, 512, 32, 44) 1179648
BatchNorm-91 (1, 512, 32, 44) 2048
LeakyReLU-92 (1, 512, 32, 44) 0
Conv2D-93 (1, 256, 32, 44) 131072
BatchNorm-94 (1, 256, 32, 44) 1024
LeakyReLU-95 (1, 256, 32, 44) 0
Conv2D-96 (1, 512, 32, 44) 1179648
BatchNorm-97 (1, 512, 32, 44) 2048
LeakyReLU-98 (1, 512, 32, 44) 0
DarknetBasicBlockV3-99 (1, 512, 32, 44) 0
Conv2D-100 (1, 256, 32, 44) 131072
BatchNorm-101 (1, 256, 32, 44) 1024
LeakyReLU-102 (1, 256, 32, 44) 0
Conv2D-103 (1, 512, 32, 44) 1179648
BatchNorm-104 (1, 512, 32, 44) 2048
LeakyReLU-105 (1, 512, 32, 44) 0
DarknetBasicBlockV3-106 (1, 512, 32, 44) 0
Conv2D-107 (1, 256, 32, 44) 131072
BatchNorm-108 (1, 256, 32, 44) 1024
LeakyReLU-109 (1, 256, 32, 44) 0
Conv2D-110 (1, 512, 32, 44) 1179648
BatchNorm-111 (1, 512, 32, 44) 2048
LeakyReLU-112 (1, 512, 32, 44) 0
DarknetBasicBlockV3-113 (1, 512, 32, 44) 0
Conv2D-114 (1, 256, 32, 44) 131072
BatchNorm-115 (1, 256, 32, 44) 1024
LeakyReLU-116 (1, 256, 32, 44) 0
Conv2D-117 (1, 512, 32, 44) 1179648
BatchNorm-118 (1, 512, 32, 44) 2048
LeakyReLU-119 (1, 512, 32, 44) 0
DarknetBasicBlockV3-120 (1, 512, 32, 44) 0
Conv2D-121 (1, 256, 32, 44) 131072
BatchNorm-122 (1, 256, 32, 44) 1024
LeakyReLU-123 (1, 256, 32, 44) 0
Conv2D-124 (1, 512, 32, 44) 1179648
BatchNorm-125 (1, 512, 32, 44) 2048
LeakyReLU-126 (1, 512, 32, 44) 0
DarknetBasicBlockV3-127 (1, 512, 32, 44) 0
Conv2D-128 (1, 256, 32, 44) 131072
BatchNorm-129 (1, 256, 32, 44) 1024
LeakyReLU-130 (1, 256, 32, 44) 0
Conv2D-131 (1, 512, 32, 44) 1179648
BatchNorm-132 (1, 512, 32, 44) 2048
LeakyReLU-133 (1, 512, 32, 44) 0
DarknetBasicBlockV3-134 (1, 512, 32, 44) 0
Conv2D-135 (1, 256, 32, 44) 131072
BatchNorm-136 (1, 256, 32, 44) 1024
LeakyReLU-137 (1, 256, 32, 44) 0
Conv2D-138 (1, 512, 32, 44) 1179648
BatchNorm-139 (1, 512, 32, 44) 2048
LeakyReLU-140 (1, 512, 32, 44) 0
DarknetBasicBlockV3-141 (1, 512, 32, 44) 0
Conv2D-142 (1, 256, 32, 44) 131072
BatchNorm-143 (1, 256, 32, 44) 1024
LeakyReLU-144 (1, 256, 32, 44) 0
Conv2D-145 (1, 512, 32, 44) 1179648
BatchNorm-146 (1, 512, 32, 44) 2048
LeakyReLU-147 (1, 512, 32, 44) 0
DarknetBasicBlockV3-148 (1, 512, 32, 44) 0
darknet53.features[15:24]
--------------------------------------------------------------------------------------------------
Conv2D-149 (1, 1024, 16, 22) 4718592
BatchNorm-150 (1, 1024, 16, 22) 4096
LeakyReLU-151 (1, 1024, 16, 22) 0
Conv2D-152 (1, 512, 16, 22) 524288
BatchNorm-153 (1, 512, 16, 22) 2048
LeakyReLU-154 (1, 512, 16, 22) 0
Conv2D-155 (1, 1024, 16, 22) 4718592
BatchNorm-156 (1, 1024, 16, 22) 4096
LeakyReLU-157 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-158 (1, 1024, 16, 22) 0
Conv2D-159 (1, 512, 16, 22) 524288
BatchNorm-160 (1, 512, 16, 22) 2048
LeakyReLU-161 (1, 512, 16, 22) 0
Conv2D-162 (1, 1024, 16, 22) 4718592
BatchNorm-163 (1, 1024, 16, 22) 4096
LeakyReLU-164 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-165 (1, 1024, 16, 22) 0
Conv2D-166 (1, 512, 16, 22) 524288
BatchNorm-167 (1, 512, 16, 22) 2048
LeakyReLU-168 (1, 512, 16, 22) 0
Conv2D-169 (1, 1024, 16, 22) 4718592
BatchNorm-170 (1, 1024, 16, 22) 4096
LeakyReLU-171 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-172 (1, 1024, 16, 22) 0
Conv2D-173 (1, 512, 16, 22) 524288
BatchNorm-174 (1, 512, 16, 22) 2048
LeakyReLU-175 (1, 512, 16, 22) 0
Conv2D-176 (1, 1024, 16, 22) 4718592
BatchNorm-177 (1, 1024, 16, 22) 4096
LeakyReLU-178 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-179 (1, 1024, 16, 22) 0
darknet53.features[24:]
--------------------------------------------------------------------------------------------------
Conv2D-180 (1, 512, 16, 22) 524288
BatchNorm-181 (1, 512, 16, 22) 2048
LeakyReLU-182 (1, 512, 16, 22) 0
Conv2D-183 (1, 1024, 16, 22) 4718592
BatchNorm-184 (1, 1024, 16, 22) 4096
LeakyReLU-185 (1, 1024, 16, 22) 0
Conv2D-186 (1, 512, 16, 22) 524288
BatchNorm-187 (1, 512, 16, 22) 2048
LeakyReLU-188 (1, 512, 16, 22) 0
Conv2D-189 (1, 1024, 16, 22) 4718592
BatchNorm-190 (1, 1024, 16, 22) 4096
LeakyReLU-191 (1, 1024, 16, 22) 0
Conv2D-192 (1, 512, 16, 22) 524288
BatchNorm-193 (1, 512, 16, 22) 2048
LeakyReLU-194 (1, 512, 16, 22) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
Conv2D-195 (1, 1024, 16, 22) 4718592
BatchNorm-196 (1, 1024, 16, 22) 4096
LeakyReLU-197 (1, 1024, 16, 22) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-198 (1, 512, 16, 22), (1, 1024, 16, 22) 0
--------------------------------------------------------------------------------------------------
Conv2D-199 (1, 255, 16, 22) 261375
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
YOLOOutputV3-200 (1, 84480, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
Conv2D-201 (1, 256, 16, 22) 131072
BatchNorm-202 (1, 256, 16, 22) 1024
LeakyReLU-203 (1, 256, 16, 22) 0
YOLOV3.transitions[0]----->_conv2d
--------------------------------------------------------------------------------------------------
Conv2D-204 (1, 256, 32, 44) 196608
BatchNorm-205 (1, 256, 32, 44) 1024
LeakyReLU-206 (1, 256, 32, 44) 0
Conv2D-207 (1, 512, 32, 44) 1179648
BatchNorm-208 (1, 512, 32, 44) 2048
LeakyReLU-209 (1, 512, 32, 44) 0
Conv2D-210 (1, 256, 32, 44) 131072
BatchNorm-211 (1, 256, 32, 44) 1024
LeakyReLU-212 (1, 256, 32, 44) 0
Conv2D-213 (1, 512, 32, 44) 1179648
BatchNorm-214 (1, 512, 32, 44) 2048
LeakyReLU-215 (1, 512, 32, 44) 0
Conv2D-216 (1, 256, 32, 44) 131072
BatchNorm-217 (1, 256, 32, 44) 1024
LeakyReLU-218 (1, 256, 32, 44) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
Conv2D-219 (1, 512, 32, 44) 1179648
BatchNorm-220 (1, 512, 32, 44) 2048
LeakyReLU-221 (1, 512, 32, 44) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-222 (1, 256, 32, 44), (1, 512, 32, 44) 0
--------------------------------------------------------------------------------------------------
Conv2D-223 (1, 255, 32, 44) 130815
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
YOLOOutputV3-224 (1, 337920, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
Conv2D-225 (1, 128, 32, 44) 32768
BatchNorm-226 (1, 128, 32, 44) 512
LeakyReLU-227 (1, 128, 32, 44) 0
YOLOV3.transitions[1]----->_conv2d
--------------------------------------------------------------------------------------------------
Conv2D-228 (1, 128, 64, 88) 49152
BatchNorm-229 (1, 128, 64, 88) 512
LeakyReLU-230 (1, 128, 64, 88) 0
Conv2D-231 (1, 256, 64, 88) 294912
BatchNorm-232 (1, 256, 64, 88) 1024
LeakyReLU-233 (1, 256, 64, 88) 0
Conv2D-234 (1, 128, 64, 88) 32768
BatchNorm-235 (1, 128, 64, 88) 512
LeakyReLU-236 (1, 128, 64, 88) 0
Conv2D-237 (1, 256, 64, 88) 294912
BatchNorm-238 (1, 256, 64, 88) 1024
LeakyReLU-239 (1, 256, 64, 88) 0
Conv2D-240 (1, 128, 64, 88) 32768
BatchNorm-241 (1, 128, 64, 88) 512
LeakyReLU-242 (1, 128, 64, 88) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------
Conv2D-243 (1, 256, 64, 88) 294912
BatchNorm-244 (1, 256, 64, 88) 1024
LeakyReLU-245 (1, 256, 64, 88) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-246 (1, 128, 64, 88), (1, 256, 64, 88) 0
--------------------------------------------------------------------------------------------------
Conv2D-247 (1, 255, 64, 88) 65535
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------
YOLOOutputV3-248 (1, 1351680, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------
YOLOV3-249 (1, 100, 1), (1, 100, 1), (1, 100, 4) 0
================================================================================
打印日志如下,有用的没用的都放这里了
darknet_version = v3
num_layers = 53
darknet layers = [1, 2, 8, 8, 4]
darknet channels = [32, 64, 128, 256, 512, 1024]
anchors = [116. 90. 156. 198. 373. 326.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[116. 90.]
[156. 198.]
[373. 326.]]]]
self.anchors =
[[[[116. 90.]
[156. 198.]
[373. 326.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors = [ 30. 61. 62. 45. 59. 119.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[ 30. 61.]
[ 62. 45.]
[ 59. 119.]]]]
self.anchors =
[[[[ 30. 61.]
[ 62. 45.]
[ 59. 119.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors = [10. 13. 16. 30. 33. 23.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[10. 13.]
[16. 30.]
[33. 23.]]]]
self.anchors =
[[[[10. 13.]
[16. 30.]
[33. 23.]]]]
<NDArray 1x1x3x2 @cpu(0)>
Shape of pre-processed image: (1, 3, 512, 703)
x.shape = (1, 256, 64, 88)
x.shape = (1, 512, 32, 44)
x.shape = (1, 1024, 16, 22)
pred.shape = (1, 255, 16, 22)
pred.shape = (1, 255, 352)
pred.shape = (1, 352, 3, 85)
raw_box_centers.shape = (1, 352, 3, 2)
raw_box_scales.shape = (1, 352, 3, 2)
objness.shape = (1, 352, 3, 1)
class_pred.shape = (1, 352, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 16, 22, 2)
reshape offsets.shape = (1, 352, 1, 2)
box_centers.shape = (1, 352, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 352, 3, 2)
confidence.shape = (1, 352, 3, 1)
class_score.shape = (1, 352, 3, 80)
bbox.shape = (1, 352, 3, 4)
bboxes.shape = (80, 1, 352, 3, 4)
scores.shape = (80, 1, 352, 3, 1)
ids.shape = (80, 1, 352, 3, 1)
detections = (80, 1, 352, 3, 6)
detections = (1, 84480, 6)
dets.shape = (1, 84480, 6)
self.transitions = <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape = (1, 256, 16, 22)
upsample.shape = (1, 256, 32, 44)
route_now.shape = (1, 512, 32, 44)
x.shape = (1, 768, 32, 44)
pred.shape = (1, 255, 32, 44)
pred.shape = (1, 255, 1408)
pred.shape = (1, 1408, 3, 85)
raw_box_centers.shape = (1, 1408, 3, 2)
raw_box_scales.shape = (1, 1408, 3, 2)
objness.shape = (1, 1408, 3, 1)
class_pred.shape = (1, 1408, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 32, 44, 2)
reshape offsets.shape = (1, 1408, 1, 2)
box_centers.shape = (1, 1408, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 1408, 3, 2)
confidence.shape = (1, 1408, 3, 1)
class_score.shape = (1, 1408, 3, 80)
bbox.shape = (1, 1408, 3, 4)
bboxes.shape = (80, 1, 1408, 3, 4)
scores.shape = (80, 1, 1408, 3, 1)
ids.shape = (80, 1, 1408, 3, 1)
detections = (80, 1, 1408, 3, 6)
detections = (1, 337920, 6)
dets.shape = (1, 337920, 6)
self.transitions = <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape = (1, 128, 32, 44)
upsample.shape = (1, 128, 64, 88)
route_now.shape = (1, 256, 64, 88)
x.shape = (1, 384, 64, 88)
pred.shape = (1, 255, 64, 88)
pred.shape = (1, 255, 5632)
pred.shape = (1, 5632, 3, 85)
raw_box_centers.shape = (1, 5632, 3, 2)
raw_box_scales.shape = (1, 5632, 3, 2)
objness.shape = (1, 5632, 3, 1)
class_pred.shape = (1, 5632, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 64, 88, 2)
reshape offsets.shape = (1, 5632, 1, 2)
box_centers.shape = (1, 5632, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 5632, 3, 2)
confidence.shape = (1, 5632, 3, 1)
class_score.shape = (1, 5632, 3, 80)
bbox.shape = (1, 5632, 3, 4)
bboxes.shape = (80, 1, 5632, 3, 4)
scores.shape = (80, 1, 5632, 3, 1)
ids.shape = (80, 1, 5632, 3, 1)
detections = (80, 1, 5632, 3, 6)
detections = (1, 1351680, 6)
dets.shape = (1, 1351680, 6)
all_detections.shape = 3
all_detections[0].shape = (1, 84480, 6)
result.shape = (1, 1774080, 6)
after result.shape = (1, 1774080, 6)
slice result.shape = (1, 100, 6)
自己总结的计算过程如下:
img ——> darknet53 ——> YOLODetectionBlockV3.body ——> YOLODetectionBlockV3.tip ——> YOLOOutputV3.prediction ——>
(reshape, transpose, slice_axis) ——> [raw_box_centers(2), raw_box_scales(2), objness(1), class_pred(80)] ——>
[box_centers(offsets, _stride), box_scales(anchors), confidence, class_score(class_pred * confidence)] ——>
[bboxes(4), scores(1)] + ids(from arrange,1) ——> (reshape) ——> detections ——> box_nms ——> slice_axis ——>
(ids, scores, bboxes)
class YOLOV3(gluon.HybridBlock): 添加的打印如下:
def hybrid_forward(self, F, x, *args):
"""YOLOV3 network hybrid forward.
Parameters
----------
F : mxnet.nd or mxnet.sym
`F` is mxnet.sym if hybridized or mxnet.nd if not.
x : mxnet.nd.NDArray
Input data.
*args : optional, mxnet.nd.NDArray
During training, extra inputs are required:
(gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t)
These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function.
Returns
-------
(tuple of) mxnet.nd.NDArray
During inference, return detections in shape (B, N, 6)
with format (cid, score, xmin, ymin, xmax, ymax)
During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss).
"""
all_box_centers = []
all_box_scales = []
all_objectness = []
all_class_pred = []
all_anchors = []
all_offsets = []
all_feat_maps = []
all_detections = []
routes = []
for stage in self.stages:
x = stage(x)
routes.append(x)
print("x.shape = ", x.shape)
# the YOLO output layers are used in reverse order, i.e., from very deep layers to shallow
for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs):
x, tip = block(x)
dets = output(tip)
print("dets.shape = ", dets.shape)
all_detections.append(dets)
if i >= len(routes) - 1:
break
# add transition layers
print("self.transitions = ", type(self.transitions))
x = self.transitions[i](x)
print("x.shape = ", x.shape)
# upsample feature map reverse to shallow layers
upsample = _upsample(x, stride=2)
print("upsample.shape = ", upsample.shape)
route_now = routes[::-1][i + 1]
print("route_now.shape = ", route_now.shape)
x = F.concat(upsample, route_now, dim=1)
print("x.shape = ", x.shape)
print("all_detections.shape = ", len(all_detections))
print("all_detections[0].shape = ", all_detections[0].shape)
# concat all detection results from different stages
result = F.concat(*all_detections, dim=1)
print("result.shape = ", result.shape)
# apply nms per class
if self.nms_thresh > 0 and self.nms_thresh < 1:
result = F.contrib.box_nms(
result, overlap_thresh=self.nms_thresh, valid_thresh=0.01,
topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False)
print("after result.shape = ", result.shape)
if self.post_nms > 0:
result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
print("slice result.shape = ", result.shape)
ids = result.slice_axis(axis=-1, begin=0, end=1)
scores = result.slice_axis(axis=-1, begin=1, end=2)
bboxes = result.slice_axis(axis=-1, begin=2, end=None)
return ids, scores, bboxes
class YOLOOutputV3(gluon.HybridBlock):添加的打印如下:
class YOLOOutputV3(gluon.HybridBlock):
"""YOLO output layer V3.
Parameters
----------
index : int
Index of the yolo output layer, to avoid naming conflicts only.
num_class : int
Number of foreground objects.
anchors : iterable
The anchor setting. Reference: https://arxiv.org/pdf/1804.02767.pdf.
stride : int
Stride of feature map.
alloc_size : tuple of int, default is (128, 128)
For advanced users. Define `alloc_size` to generate large enough anchor
maps, which will later saved in parameters. During inference, we support arbitrary
input image by cropping corresponding area of the anchor map. This allow us
to export to symbol so we can run it in c++, Scalar, etc.
"""
def __init__(self, index, num_class, anchors, stride,
alloc_size=(128, 128), **kwargs):
super(YOLOOutputV3, self).__init__(**kwargs)
anchors = np.array(anchors).astype('float32')
print("anchors = ", anchors)
self._classes = num_class
self._num_pred = 1 + 4 + num_class # 1 objness + 4 box + num_class
self._num_anchors = anchors.size // 2
self._stride = stride
with self.name_scope():
print("self._num_pred = ", self._num_pred)
print("self._num_anchors = ", self._num_anchors)
all_pred = self._num_pred * self._num_anchors
print("all_pred = %d" % all_pred)
self.prediction = nn.Conv2D(all_pred, kernel_size=1, padding=0, strides=1)
# anchors will be multiplied to predictions
anchors = anchors.reshape(1, 1, -1, 2)
print("anchors = ", anchors)
self.anchors = self.params.get_constant('anchor_%d'%(index), anchors)
print("self.anchors = ", self.anchors.value)
# offsets will be added to predictions
grid_x = np.arange(alloc_size[1])
grid_y = np.arange(alloc_size[0])
grid_x, grid_y = np.meshgrid(grid_x, grid_y)
# stack to (n, n, 2)
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)
# expand dims to (1, 1, n, n, 2) so it's easier for broadcasting
offsets = np.expand_dims(np.expand_dims(offsets, axis=0), axis=0)
self.offsets = self.params.get_constant('offset_%d'%(index), offsets)
def hybrid_forward(self, F, x, anchors, offsets):
"""Hybrid Forward of YOLOV3Output layer.
Parameters
----------
F : mxnet.nd or mxnet.sym
`F` is mxnet.sym if hybridized or mxnet.nd if not.
x : mxnet.nd.NDArray
Input feature map.
anchors : mxnet.nd.NDArray
Anchors loaded from self, no need to supply.
offsets : mxnet.nd.NDArray
Offsets loaded from self, no need to supply.
Returns
-------
(tuple of) mxnet.nd.NDArray
During training, return (bbox, raw_box_centers, raw_box_scales, objness,
class_pred, anchors, offsets).
During inference, return detections.
"""
# prediction flat to (batch, pred per pixel, height * width)
pred = self.prediction(x)
print("pred.shape = ", pred.shape)
pred = pred.reshape((0, self._num_anchors * self._num_pred, -1))
print("pred.shape = ", pred.shape)
# transpose to (batch, height * width, num_anchor, num_pred)
pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))
print("pred.shape = ", pred.shape)
# components
raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)
print("raw_box_centers.shape = ", raw_box_centers.shape)
raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)
print("raw_box_scales.shape = ", raw_box_scales.shape)
objness = pred.slice_axis(axis=-1, begin=4, end=5)
print("objness.shape = ", objness.shape)
class_pred = pred.slice_axis(axis=-1, begin=5, end=None)
print("class_pred.shape = ", class_pred.shape)
# valid offsets, (1, 1, height, width, 2)
print("offsets.shape = ", offsets.shape)
offsets = F.slice_like(offsets, x * 0, axes=(2, 3))
print("slice_like offsets.shape = ", offsets.shape)
# reshape to (1, height*width, 1, 2)
offsets = offsets.reshape((1, -1, 1, 2))
print("reshape offsets.shape = ", offsets.shape)
# print(offsets)
box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._stride
print("box_centers.shape = ", box_centers.shape)
print("anchors.shape = ", anchors.shape)
box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)
print("box_scales.shape = ", box_scales.shape)
confidence = F.sigmoid(objness)
print("confidence.shape = ", confidence.shape)
class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence)
print("class_score.shape = ", class_score.shape)
wh = box_scales / 2.0
bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)
print("bbox.shape = ", bbox.shape)
# prediction per class
bboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1))
print("bboxes.shape = ", bboxes.shape)
scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1)
print("scores.shape = ", scores.shape)
ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1)))
print("ids.shape = ", ids.shape)
detections = F.concat(ids, scores, bboxes, dim=-1)
print("detections = ", detections.shape)
# reshape to (B, xx, 6)
detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6))
print("detections = ", detections.shape)
return detections