# x: input_shape=(batches_None, size_None, size_None, 3 anchors, 80 classes+5) # 5: tx,ty,tw,th, p0
def yolo_boxes(pred, anchors, classes):
# extraction
# pred: (batch_size, grid, grid, anchors, (tx,ty, tw,th, objectness scores or p), ...classes )
grid_size = tf.shape(pred)[1:3] # input_image_shape: (grid_0, grid_1) == (height,width) ==(rows, columns)
# why need tf.name_scope? please check the followng testing
# with tf.name_scope("yolo_boxes"):
# (grid_0, grid_1) == (height,width) ==Transpose==> (grid_1, grid_0)=(width, height)==(gx,gy)
#divide the input image into grid_size[1] x grid_size[0] cells
grid = tf.meshgrid( tf.range(grid_size[1]), tf.range(grid_size[0]) )#################
# axis=-1, stack the data along axis=1(column)==>[[ [0,0],[1,0],...],...]==>[[ [[0, 0]],[[1, 0]],...],...]
grid = tf.expand_dims( tf.stack(grid, axis=-1), axis=2 ) #(gx, gy, 2) ==> [gx, gy, 1,2], ]]
# anchor box
box_xy, box_wh, objectness, class_probs = tf.split(
pred, (2,2,1, classes), axis=-1
)
box_xy =tf.sigmoid(box_xy) # ph_z = 1/(1+e^ph_z) # (None, None, None, 3, 2)
objectness = tf.sigmoid(objectness) # (None, None, None, 3, 1)
class_probs =tf.sigmoid(class_probs) # (None, None, None, 3, 80)
pred_box = tf.concat( (box_xy, box_wh), axis=-1 ) # original xywh for loss # (None, None, None, 3, 4)
# Cx, Cy, pw, ph are normalized by the image width and height(grid_size,grid_size)
# bx = sigmoid(bx)+Cx, by=sigmoid(by)+Cy
box_xy = ( box_xy + tf.cast(grid, tf.float32) )/ tf.cast(grid_size, tf.float32) # grid_size shape:(2,)
# bw=pw* e^tw , bh=ph* e^th
box_wh = tf.exp(box_wh) *anchors
box_xmin_ymin = box_xy - box_wh/2 # top-left corner
box_xmax_ymax = box_xy + box_wh/2 # bottom-right corner
# [xmin, ymin, xmax, ymax]
bbox = tf.concat( [box_xmin_ymin, box_xmax_ymax], axis=-1 ) # (None, None, None, 3 anchors, 4 box_(x1,y1,x2,y2))
return bbox, objectness, class_probs, pred_box
#bounding box priors # normalization
# yolo_anchors = np.array([(10, 13), (16, 30), (33, 23),
# (30, 61), (62, 45), (59, 119),
# (116, 90), (156, 198), (373, 326)], np.float32) / 416
# shape=(None, None, None, 3, 85)
box_0=yolo_boxes(output_0_82, yolo_anchors[[6,7,8]], 80)
print( box_0 )
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1618 try:
-> 1619 c_op = c_api.TF_FinishOperation(op_desc)
1620 except errors.InvalidArgumentError as e:
InvalidArgumentError: Duplicate node name in graph: 'ones'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-440-8002bb14c10c> in <module>
42
43 # shape=(None, None, None, 3, 85)
---> 44 box_0=yolo_boxes(output_0_82, yolo_anchors[[6,7,8]], 80)
45 # ValueError: Duplicate node name in graph: 'ones'
46
<ipython-input-440-8002bb14c10c> in yolo_boxes(pred, anchors, classes)
9 # (grid_0, grid_1) == (height,width) ==Transpose==> (grid_1, grid_0)=(width, height)==(gx,gy)
10 #divide the input image into grid_size[1] x grid_size[0] cells
---> 11 grid = tf.meshgrid( tf.range(grid_size[1]), tf.range(grid_size[0]) ) #grid
12 # axis=-1, stack the data along axis=1(column)==>[[ [0,0],[1,0],...],...]==>[[ [[0, 0]],[[1, 0]],...],...]
13 grid = tf.expand_dims( tf.stack(grid, axis=-1), axis=2 ) #(gx, gy, 2) ==> [gx, gy, 1,2], ]]
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\ops\array_ops.py in meshgrid(*args, **kwargs)
3063
3064 # TODO(nolivia): improve performance with a broadcast
-> 3065 mult_fact = ones(shapes, output_dtype)
3066 return [x * mult_fact for x in output]
3067
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\ops\array_ops.py in ones(shape, dtype, name)
2669 if not shape._shape_tuple():
2670 shape = reshape(shape, [-1]) # Ensure it's a vector
-> 2671 output = fill(shape, constant(one, dtype=dtype), name=name)
2672 assert output.dtype.base_dtype == dtype
2673 return output
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\ops\array_ops.py in fill(dims, value, name)
231 A `Tensor`. Has the same type as `value`.
232 """
--> 233 result = gen_array_ops.fill(dims, value, name=name)
234 tensor_util.maybe_set_static_shape(result, dims)
235 return result
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py in fill(dims, value, name)
3244 # Add nodes to the TensorFlow graph.
3245 _, _, _op, _outputs = _op_def_library._apply_op_helper(
-> 3246 "Fill", dims=dims, value=value, name=name)
3247 _result = _outputs[:]
3248 if _execute.must_record_gradient():
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
740 op = g._create_op_internal(op_type_name, inputs, dtypes=None,
741 name=scope, input_types=input_types,
--> 742 attrs=attr_protos, op_def=op_def)
743
744 # `outputs` is returned as a separate return value so that the output
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
593 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
594 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595 compute_device)
596
597 def capture(self, tensor, name=None, shape=None):
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3320 input_types=input_types,
3321 original_op=self._default_original_op,
-> 3322 op_def=op_def)
3323 self._create_op_helper(ret, compute_device=compute_device)
3324 return ret
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1784 op_def, inputs, node_def.attr)
1785 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786 control_input_ops)
1787 name = compat.as_str(node_def.name)
1788 # pylint: enable=protected-access
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1620 except errors.InvalidArgumentError as e:
1621 # Convert to ValueError for backwards compatibility.
-> 1622 raise ValueError(str(e))
1623
1624 return c_op
ValueError: Duplicate node name in graph: 'ones'
issue: belong to the function identity or naming conflict!!!
https://blog.youkuaiyun.com/Linli522362242/article/details/106325257
Solution:1 using with tf.name_scope("yolo_boxes_10"):
# x: input_shape=(batches_None, size_None, size_None, 3 anchors, 80 classes+5) # 5: tx,ty,tw,th, p0
def yolo_boxes(pred, anchors, classes):
# extraction
# pred: (batch_size, grid, grid, anchors, (tx,ty, tw,th, objectness scores or p), ...classes )
grid_size = tf.shape(pred)[1:3] # input_image_shape: (grid_0, grid_1) == (height,width) ==(rows, columns)
# why need tf.name_scope? please check the followng testing
with tf.name_scope("yolo_boxes_10"):
# (grid_0, grid_1) == (height,width) ==Transpose==> (grid_1, grid_0)=(width, height)==(gx,gy)
#divide the input image into grid_size[1] x grid_size[0] cells
grid = tf.meshgrid( tf.range(grid_size[1]), tf.range(grid_size[0]) ) #grid
# axis=-1, stack the data along axis=1(column)==>[[ [0,0],[1,0],...],...]==>[[ [[0, 0]],[[1, 0]],...],...]
grid = tf.expand_dims( tf.stack(grid, axis=-1), axis=2 ) #(gx, gy, 2) ==> [gx, gy, 1,2], ]]
# anchor box
box_xy, box_wh, objectness, class_probs = tf.split(
pred, (2,2,1, classes), axis=-1
)
box_xy =tf.sigmoid(box_xy) # ph_z = 1/(1+e^ph_z) # (None, None, None, 3, 2)
objectness = tf.sigmoid(objectness) # (None, None, None, 3, 1)
class_probs =tf.sigmoid(class_probs) # (None, None, None, 3, 80)
pred_box = tf.concat( (box_xy, box_wh), axis=-1 ) # original xywh for loss # (None, None, None, 3, 4)
# Cx, Cy, pw, ph are normalized by the image width and height(grid_size,grid_size)
# bx = sigmoid(bx)+Cx, by=sigmoid(by)+Cy
box_xy = ( box_xy + tf.cast(grid, tf.float32) )/ tf.cast(grid_size, tf.float32) # grid_size shape:(2,)
# bw=pw* e^tw , bh=ph* e^th
box_wh = tf.exp(box_wh) *anchors
box_xmin_ymin = box_xy - box_wh/2 # top-left corner
box_xmax_ymax = box_xy + box_wh/2 # bottom-right corner
# [xmin, ymin, xmax, ymax]
bbox = tf.concat( [box_xmin_ymin, box_xmax_ymax], axis=-1 ) # (None, None, None, 3 anchors, 4 box_(x1,y1,x2,y2))
return bbox, objectness, class_probs, pred_box
#bounding box priors # normalization
# yolo_anchors = np.array([(10, 13), (16, 30), (33, 23),
# (30, 61), (62, 45), (59, 119),
# (116, 90), (156, 198), (373, 326)], np.float32) / 416
# shape=(None, None, None, 3, 85)
box_0=yolo_boxes(output_0_82, yolo_anchors[[6,7,8]], 80)
print( box_0 )
(<tf.Tensor 'concat_19:0' shape=(None, None, None, 3, 4) dtype=float32>, <tf.Tensor 'Sigmoid_43:0' shape=(None, None, None, 3, 1) dtype=float32>, <tf.Tensor 'Sigmoid_44:0' shape=(None, None, None, 3, 80) dtype=float32>, <tf.Tensor 'concat_18:0' shape=(None, None, None, 3, 4) dtype=float32>)
Solution2 (I suggested): box_0=Lambda( lambda x: yolo_boxes(x, yolo_anchors[[6,7,8]], 80),
name="yolo_boxes_0")(output_0_82) #Lambda layer can inference the grid size
from tensorflow.keras.layers import Lambda
# x: input_shape=(batches_None, size_None, size_None, 3 anchors, 80 classes+5) # 5: tx,ty,tw,th, p0
def yolo_boxes(pred, anchors, classes):
# extraction
# pred: (batch_size, grid, grid, anchors, (tx,ty, tw,th, objectness scores or p), ...classes )
grid_size = tf.shape(pred)[1:3] # input_image_shape: (grid_0, grid_1) == (height,width) ==(rows, columns)
# why need tf.name_scope? please check the followng testing
# with tf.name_scope("yolo_boxes_10"):
# (grid_0, grid_1) == (height,width) ==Transpose==> (grid_1, grid_0)=(width, height)==(gx,gy)
#divide the input image into grid_size[1] x grid_size[0] cells
grid = tf.meshgrid( tf.range(grid_size[1]), tf.range(grid_size[0]) ) #grid
# axis=-1, stack the data along axis=1(column)==>[[ [0,0],[1,0],...],...]==>[[ [[0, 0]],[[1, 0]],...],...]
grid = tf.expand_dims( tf.stack(grid, axis=-1), axis=2 ) #(gx, gy, 2) ==> [gx, gy, 1,2], ]]
# anchor box
box_xy, box_wh, objectness, class_probs = tf.split(
pred, (2,2,1, classes), axis=-1
)
box_xy =tf.sigmoid(box_xy) # ph_z = 1/(1+e^ph_z) # (None, None, None, 3, 2)
objectness = tf.sigmoid(objectness) # (None, None, None, 3, 1)
class_probs =tf.sigmoid(class_probs) # (None, None, None, 3, 80)
pred_box = tf.concat( (box_xy, box_wh), axis=-1 ) # original xywh for loss # (None, None, None, 3, 4)
# Cx, Cy, pw, ph are normalized by the image width and height(grid_size,grid_size)
# bx = sigmoid(bx)+Cx, by=sigmoid(by)+Cy
box_xy = ( box_xy + tf.cast(grid, tf.float32) )/ tf.cast(grid_size, tf.float32) # grid_size shape:(2,)
# bw=pw* e^tw , bh=ph* e^th
box_wh = tf.exp(box_wh) *anchors
box_xmin_ymin = box_xy - box_wh/2 # top-left corner
box_xmax_ymax = box_xy + box_wh/2 # bottom-right corner
# [xmin, ymin, xmax, ymax]
bbox = tf.concat( [box_xmin_ymin, box_xmax_ymax], axis=-1 ) # (None, None, None, 3 anchors, 4 box_(x1,y1,x2,y2))
return bbox, objectness, class_probs, pred_box
#bounding box priors # normalization
# yolo_anchors = np.array([(10, 13), (16, 30), (33, 23),
# (30, 61), (62, 45), (59, 119),
# (116, 90), (156, 198), (373, 326)], np.float32) / 416
# shape=(None, None, None, 3, 85)
# box_0=yolo_boxes(output_0_82, yolo_anchors[[6,7,8]], 80)
# ValueError: Duplicate node name in graph: 'ones' # solution: with tf.name_scope("yolo_boxes_10"):
# (<tf.Tensor 'concat_19:0' shape=(None, None, None, 3, 4) dtype=float32>,
# <tf.Tensor 'Sigmoid_43:0' shape=(None, None, None, 3, 1) dtype=float32>,
# <tf.Tensor 'Sigmoid_44:0' shape=(None, None, None, 3, 80) dtype=float32>,
# <tf.Tensor 'concat_18:0' shape=(None, None, None, 3, 4) dtype=float32>)
box_0=Lambda( lambda x: yolo_boxes(x, yolo_anchors[[6,7,8]], 80),
name="yolo_boxes_0")(output_0_82) #Lambda layer can inference the grid size
print( box_0 )
# Tensor("yolo_boxes_0_57/range:0", shape=(None,), dtype=int32)
# Tensor("yolo_boxes_0_57/range_1:0", shape=(None,), dtype=int32)
# (<tf.Tensor 'yolo_boxes_0_57/Identity:0' shape=(None, None, None, 3, 4) dtype=float32>,
# <tf.Tensor 'yolo_boxes_0_57/Identity_1:0' shape=(None, None, None, 3, 1) dtype=float32>,
# <tf.Tensor 'yolo_boxes_0_57/Identity_2:0' shape=(None, None, None, 3, 80) dtype=float32>,
# <tf.Tensor 'yolo_boxes_0_57/Identity_3:0' shape=(None, None, None, 3, 4) dtype=float32>)
(<tf.Tensor 'yolo_boxes_0_57/Identity:0' shape=(None, None, None, 3, 4) dtype=float32>,
<tf.Tensor 'yolo_boxes_0_57/Identity_1:0' shape=(None, None, None, 3, 1) dtype=float32>,
<tf.Tensor 'yolo_boxes_0_57/Identity_2:0' shape=(None, None, None, 3, 80) dtype=float32>,
<tf.Tensor 'yolo_boxes_0_57/Identity_3:0' shape=(None, None, None, 3, 4) dtype=float32>)
YOLOv3目标检测框预测解析
本文详细解析了YOLOv3算法中如何从网络输出预测目标检测框的位置及类别概率,包括网格单元格的划分、边界框坐标的计算、置信度得分和类别概率的计算等关键步骤。
3184





