边界光滑损失
def compute_edge_aware_smooth_loss(self, pred_disp, img):
def gradient(pred):
D_dx = -pred[:, :, 1:, :] + pred[:, :, :-1, :]
D_dy = -pred[:, 1:, :, :] + pred[:, :-1, :, :]
return D_dx, D_dy
img_dx, img_dy = gradient(img)
disp_dx, disp_dy = gradient(pred_disp)
weight_x = tf.exp(-tf.reduce_mean(tf.abs(img_dx), 3, keep_dims=True))
weight_y = tf.exp(-tf.reduce_mean(tf.abs(img_dy), 3, keep_dims=True))
loss = tf.reduce_mean(weight_x*tf.abs(disp_dx)) + tf.reduce_mean(weight_y*tf.abs(disp_dy))
return loss
对于维度为(3, 2, 3, 3)的z1:
z1=np.array([[[[0.9,0.1,0.4],[0.7,0.4,0.2],[0.9,0.1,0.3]],
[[0.4,0.5,0.0],[0.4,0.0,0.9],[0.6,0.7,0.1]]],
[[[0.3,0.1,0.6],[0.2,0.4,0.2],[0.1,0.6,0.8]],
[[0.7,0.1,0.3],[0.4,0.3,0.7],[0.6,0.1,0.2]]],
[[[0.3,0.5,0.7],[0.9,0.0,0.1],[0.4,0.2,0.9]],
[[0.6,0.9,0.6],[0.3,0.7,0.4],[0.9,0.8,0.5]]]])
代表batch size=3,其中一个batch是:
[[[0.9,0.1,0.4],[0.7,0.4,0.2],[0.9,0.1,0.3]],
[[0.4,0.5,0.0],[0.4,0.0,0.9],[0.6,0.7,0.1]]]
这个batch表示一个2x3的三通道图像,各通道值为:
通道1:
[[0.9 0.7 0.9]
[0.4 0.4 0.6]]
通道2:
[[0.1 0.4 0.1]
[0.5 0. 0.7]]
通道3:
[[0.4 0.2 0.3]
[0. 0.9 0.1]]
那么,该batch对应的img_dx就是:
[[[ 0.5 -0.4 0.4]
[ 0.3 0.4 -0.7]
[ 0.3 -0.6 0.2]]]
其中,每列是一个通道。
对一个batch,如上面的img_dx,把三个通道对应元素的绝对值相加求平均值,取平均值的相反数:
weight_x = tf.exp(-tf.reduce_mean(tf.abs(img_dx), 3, keepdims=True))
-tf.reduce_mean(tf.abs(img_dx), 3, keepdims=True)的结果是:
[[[-0.43333333]
[-0.46666667]
[-0.36666667]]]
计算过程:
-(0.5+0.4+0.4)/3=-0.43333333
-(0.3+0.4+0.7)/3=-0.46666667
-(0.3+0.6+0.2)/3=-0.36666667
weight_x是:
[[[0.64834434]
[0.62708909]
[0.69304062]]]
计算过程是:
对每个元素x取e^x:
e^(-0.43333333)=0.64834434
e^(-0.46666667)=0.62708909
e^(-0.36666667)=0.69304062
meshgrid
import tensorflow as tf
import numpy as np
def meshgrid(batch, height, width, is_homogeneous=True):
x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1),[1, 0]))
y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
tf.ones(shape=tf.stack([1, width])))
x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32)
y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32)
if is_homogeneous:
ones = tf.ones_like(x_t)
coords = tf.stack([x_t, y_t, ones], axis=0)
else:
coords = tf.stack([x_t, y_t], axis=0)
coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1])
return coords
#[batch, 2 (3 if homogeneous), height, width]
y=meshgrid(1,4,5)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
print
print sess.run(y)
batch size=1时4x5的图像产生的坐标是:
[[[[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]]
[[0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1.]
[2. 2. 2. 2. 2.]
[3. 3. 3. 3. 3.]]
[[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]]]]