SampleNet解决分类问题
首先,我们进入源码文件夹中找到分类问题的文件夹,其架构如下图所示:
我们首先看models文件夹,找到sanplenet_model文件,点击打开。
这个文件存放的是samplenet的网络骨架模型以及一些关键方法,我们进一步看其中的codes
SampleNet的骨干结构:
def get_model(
point_cloud, is_training, num_output_points, bottleneck_size, bn_decay=None
):
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
input_image = tf.expand_dims(point_cloud, -1)
# Point functions (MLP implemented as conv2d)
#
net = tf_util.conv2d(
input_image,
64,
[1, 3],#卷积核大小
padding="VALID",
stride=[1, 1],
bn=True,
is_training=is_training,
scope="conv1",
bn_decay=bn_decay,
)
net = tf_util.conv2d(
net,
64,
[1, 1],
padding="VALID",
stride=[1, 1],
bn=True,
is_training=is_training,
scope="conv2",
bn_decay=bn_decay,
)
net = tf_util.conv2d(
net,
64,
[1, 1],
padding="VALID",
stride=[1, 1],
bn=True,
is_training=is_training,
scope="conv3",
bn_decay=bn_decay,
)
net = tf_util.conv2d(
net,
128,
[1, 1],
padding="VALID",
stride=[1, 1],
bn=True,
is_training=is_training,
scope="conv4",
bn_decay=bn_decay,
)
net = tf_util.conv2d(
net,
bottleneck_size,
[1, 1],
padding="VALID",
stride=[1, 1],
bn=True,
is_training=is_training,
scope="conv5",
bn_decay=bn_decay,
)
net = tf_util.max_pool2d(net, [num_point, 1], padding="VALID", scope="maxpool")
net = tf.reshape(net, [batch_size, -1])
net = tf_util.fully_connected(
net, 256, bn=True, is_training=is_training, scope="fc11b", bn_decay=bn_decay
)
net = tf_util.fully_connected(
net, 256, bn=True, is_training=is_training, scope="fc12b", bn_decay=bn_decay
)
net = tf_util.fully_connected(
net, 256, bn=True, is_training=is_training, scope="fc13b", bn_decay=bn_decay
)
net = tf_util.fully_connected(
net,
3 * num_output_points,
bn=True,
is_training=is_training,
scope="fc14b",
bn_decay=bn_decay,
activation_fn=None,
)
out_point_cloud = tf.reshape(net, [batch_size, -1, 3])
return out_point_cloud
下面的代码是距离计算的方法:
#下面的代码是距离计算的方法:
def calc_distances(p0, points):
return ((p0 - points) ** 2).sum(axis=1)
最远点采样FPS实现:
def fps_from_given_pc(pts, k, given_pc):
farthest_pts = np.zeros((k, 3))
t = np.size(given_pc) // 3
farthest_pts[0:t] = given_pc
distances = calc_distances(farthest_pts[0], pts)
for i in range(1, t):
distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
for i in range(t, k):
farthest_pts[i] = pts[np.argmax(distances)]
distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
return farthest_pts
最邻配比法代码:
#最邻配比法 (nearest neighborhood matching)
def nn_matching(full_pc, idx, k, complete_fps=True):
batch_size = np.size(full_pc, 0)
out_pc = np.zeros((full_pc.shape[0], k, 3))
for ii in range(0, batch_size):
best_idx = idx[ii]
if complete_fps:
best_idx = unique(best_idx)
out_pc[ii] = fps_from_given_pc(full_pc[ii], k, full_pc[ii][best_idx])
else:
out_pc[ii] = full_pc[ii][best_idx]
return out_pc[:, 0:k, :]
EMD算法代码:
#EMD算法
def emd_matching(full_pc, gen_pc, sess):
batch_size = np.size(full_pc, 0)
k = np.size(gen_pc, 1)
out_pc = np.zeros_like(gen_pc)
match_mat_tensor = approx_match(
tf.convert_to_tensor(full_pc), tf.convert_to_tensor(gen_pc)
)
pc1_match_idx_tensor = tf.cast(tf.argmax(match_mat_tensor, axis=2), dtype=tf.int32)
pc1_match_idx = pc1_match_idx_tensor.eval(session=sess)
for ii in range(0, batch_size):
best_idx = unique(pc1_match_idx[ii])
out_pc[ii] = fps_from_given_pc(full_pc[ii], k, full_pc[ii][best_idx])
return out_pc
get_nn_indices
def get_nn_indices(ref_pc, samp_pc):
_, idx, _, _ = nn_distance(samp_pc, ref_pc)
return idx
计算标准化误差
def get_simplification_loss(ref_pc, samp_pc, pc_size, gamma=1, delta=0):
cost_p1_p2, _, cost_p2_p1, _ = nn_distance(samp_pc, ref_pc)
max_cost = tf.reduce_max(cost_p1_p2, axis=1)
max_cost = tf.reduce_mean(max_cost)
cost_p1_p2 = tf.reduce_mean(cost_p1_p2)
cost_p2_p1 = tf.reduce_mean(cost_p2_p1)
loss = cost_p1_p2 + max_cost + (gamma + delta * pc_size) * cost_p2_p1
tf.summary.scalar("cost_p1_p2", cost_p1_p2)
tf.summary.scalar("cost_p2_p1", cost_p2_p1)
tf.summary.scalar("max_cost", max_cost)
return loss
下面,我们将主要看看本论文的创新点之一——软投影操作
下方的代码展示了软投影算法的class以及运行实例,通过硬软投影后的均方误差比对,我们可以明显发现软投影的优势所在。
class SoftProjection(object):
def __init__(
self, group_size, initial_temperature=1.0, is_temperature_trainable=True
):
"""Computes a soft nearest neighbor point cloud.
Arguments:
group_size: An integer, number of neighbors in nearest neighborhood.
initial_temperature: A positive real number, initialization constant for temperature parameter.
is_temperature_trainable: bool.
Inputs:
point_cloud: A `Tensor` of shape (batch_size, num_in_points, 3), original point cloud.
query_cloud: A `Tensor` of shape (batch_size, num_out_points, 3), generated point cloud
Outputs:
projected_point_cloud: A `Tensor` of shape (batch_size, num_out_points, 3),
the query_cloud projected onto its group_size nearest neighborhood,
controlled by the learnable temperature parameter.
weights: A `Tensor` of shape (batch_size, num_out_points, group_size, 1),
the projection weights of the query_cloud onto its group_size nearest neighborhood
dist: A `Tensor` of shape (batch_size, num_out_points, group_size, 1),
the square distance of each query point from its neighbors divided by squared temperature parameter
"""
self._group_size = group_size
# create temperature variable
self._temperature = tf.get_variable(
"temperature",
initializer=tf.constant(initial_temperature, dtype=tf.float32),
trainable=is_temperature_trainable,
dtype=tf.float32,
)
# sigma is exposed for loss calculation
self.sigma = self._temperature ** 2
def __call__(self, point_cloud, query_cloud, hard=False):
return self.project(point_cloud, query_cloud, hard)
def _group_points(self, point_cloud, query_cloud):
group_size = self._group_size
_, num_out_points, _ = query_cloud.shape
# find nearest group_size neighbours in point_cloud
_, idx = knn_point(group_size, point_cloud, query_cloud)
grouped_points = group_point(point_cloud, idx)
return grouped_points
def _get_distances(self, grouped_points, query_cloud):
group_size = self._group_size
# remove centers to get absolute distances
deltas = grouped_points - tf.tile(
tf.expand_dims(query_cloud, 2), [1, 1, group_size, 1]
)
dist = tf.reduce_sum(deltas ** 2, axis=3, keepdims=True) / self.sigma
return dist
def project(self, point_cloud, query_cloud, hard):
grouped_points = self._group_points(
point_cloud, query_cloud
) # (batch_size, num_out_points, group_size, 3)
dist = self._get_distances(grouped_points, query_cloud)
# pass through softmax to get weights
weights = tf.nn.softmax(-dist, axis=2)
if hard:
# convert softmax weights to one_hot encoding
weights = tf.one_hot(tf.argmax(weights, axis=2), depth=self._group_size)
weights = tf.transpose(weights, perm=[0, 1, 3, 2])
# get weighted average of grouped_points
projected_point_cloud = tf.reduce_sum(
grouped_points * weights, axis=2
) # (batch_size, num_out_points, 3)
return projected_point_cloud, weights, dist
"""SoftProjection test"""
if __name__ == "__main__":
tf.enable_eager_execution()
projector = SoftProjection(3, initial_temperature=0.01)
sigma = projector.sigma
point_cloud = np.array(
[
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[5, 4, 4],
[4, 5, 4],
[4, 4, 5],
[8, 7, 7],
[7, 8, 7],
[7, 7, 8],
]
)
query_cloud = np.array(
[[0, 0, 0], [1, 0, 0], [2, 0, 0], [5, 5, 5], [7, 7, 8], [7, 7, 8.5]]
)
expected_cloud_soft = np.array(
[
[0.333, 0.333, 0.333],
[1, 0, 0],
[1, 0, 0],
[4.333, 4.333, 4.333],
[7, 7, 8],
[7, 7, 8],
]
)
expected_cloud_hard = np.array(
[[1, 0, 0], [1, 0, 0], [1, 0, 0], [5, 4, 4], [7, 7, 8], [7, 7, 8]]
)
# expend to batch_size = 2
point_cloud = np.stack([point_cloud, point_cloud * 3], axis=0)
query_cloud = np.stack([query_cloud, query_cloud * 3], axis=0)
expected_cloud_soft = np.stack(
[expected_cloud_soft, expected_cloud_soft * 3], axis=0
)
expected_cloud_hard = np.stack(
[expected_cloud_hard, expected_cloud_hard * 3], axis=0
)
point_cloud_pl = tf.convert_to_tensor(point_cloud, dtype=tf.float32)
query_cloud_pl = tf.convert_to_tensor(query_cloud, dtype=tf.float32)
soft_projected_points, soft_projection_weights, dist = projector(
point_cloud_pl, query_cloud_pl
)
hard_projected_points, hard_projection_weights, _ = projector(
point_cloud_pl, query_cloud_pl, hard=True
)
soft_projected_points = soft_projected_points.numpy()
soft_projection_weights = soft_projection_weights.numpy()
hard_projected_points = hard_projected_points.numpy()
hard_projection_weights = hard_projection_weights.numpy()
expected_cloud_soft = expected_cloud_soft.squeeze()
soft_projected_points = soft_projected_points.squeeze()
soft_projection_weights = soft_projection_weights.squeeze()
hard_projected_points = hard_projected_points.squeeze()
hard_projection_weights = hard_projection_weights.squeeze()
print("soft_projection_weights:")
print(soft_projection_weights)
mse = np.mean(np.sum((soft_projected_points - expected_cloud_soft) ** 2, axis=1))
print("mean soft error:")
print(mse)
mse = np.mean(np.sum((hard_projected_points - expected_cloud_hard) ** 2, axis=1))
print("mean hard error:")
print(mse)
总结
本次代码解析主要内容包括对主干网络结构的学习、对两种匹配模式的学习(EMD算法和最邻配比法)、对计算误差的学习以及对软投影的示例学习。