SampleNet论文源码解读【1】

SampleNet解决分类问题

首先,我们进入源码文件夹中找到分类问题的文件夹,其架构如下图所示:
架构如上
我们首先看models文件夹,找到sanplenet_model文件,点击打开。
这个文件存放的是samplenet的网络骨架模型以及一些关键方法,我们进一步看其中的codes

SampleNet的骨干结构:


def get_model(
    point_cloud, is_training, num_output_points, bottleneck_size, bn_decay=None
):

    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    input_image = tf.expand_dims(point_cloud, -1)

    # Point functions (MLP implemented as conv2d)
    # 
    net = tf_util.conv2d(
        input_image,
        64,
        [1, 3],#卷积核大小
        padding="VALID",
        stride=[1, 1],
        bn=True,
        is_training=is_training,
        scope="conv1",
        bn_decay=bn_decay,
    )
    net = tf_util.conv2d(
        net,
        64,
        [1, 1],
        padding="VALID",
        stride=[1, 1],
        bn=True,
        is_training=is_training,
        scope="conv2",
        bn_decay=bn_decay,
    )
    net = tf_util.conv2d(
        net,
        64,
        [1, 1],
        padding="VALID",
        stride=[1, 1],
        bn=True,
        is_training=is_training,
        scope="conv3",
        bn_decay=bn_decay,
    )
    net = tf_util.conv2d(
        net,
        128,
        [1, 1],
        padding="VALID",
        stride=[1, 1],
        bn=True,
        is_training=is_training,
        scope="conv4",
        bn_decay=bn_decay,
    )
    net = tf_util.conv2d(
        net,
        bottleneck_size,
        [1, 1],
        padding="VALID",
        stride=[1, 1],
        bn=True,
        is_training=is_training,
        scope="conv5",
        bn_decay=bn_decay,
    )

    net = tf_util.max_pool2d(net, [num_point, 1], padding="VALID", scope="maxpool")

    net = tf.reshape(net, [batch_size, -1])

    net = tf_util.fully_connected(
        net, 256, bn=True, is_training=is_training, scope="fc11b", bn_decay=bn_decay
    )
    net = tf_util.fully_connected(
        net, 256, bn=True, is_training=is_training, scope="fc12b", bn_decay=bn_decay
    )
    net = tf_util.fully_connected(
        net, 256, bn=True, is_training=is_training, scope="fc13b", bn_decay=bn_decay
    )
    net = tf_util.fully_connected(
        net,
        3 * num_output_points,
        bn=True,
        is_training=is_training,
        scope="fc14b",
        bn_decay=bn_decay,
        activation_fn=None,
    )

    out_point_cloud = tf.reshape(net, [batch_size, -1, 3])

    return out_point_cloud

下面的代码是距离计算的方法:

#下面的代码是距离计算的方法:
def calc_distances(p0, points):
    return ((p0 - points) ** 2).sum(axis=1)

最远点采样FPS实现:

def fps_from_given_pc(pts, k, given_pc):
    farthest_pts = np.zeros((k, 3))
    t = np.size(given_pc) // 3
    farthest_pts[0:t] = given_pc

    distances = calc_distances(farthest_pts[0], pts)
    for i in range(1, t):
        distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))

    for i in range(t, k):
        farthest_pts[i] = pts[np.argmax(distances)]
        distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
    return farthest_pts

最邻配比法代码:

#最邻配比法 (nearest neighborhood matching)
def nn_matching(full_pc, idx, k, complete_fps=True):
    batch_size = np.size(full_pc, 0)
    out_pc = np.zeros((full_pc.shape[0], k, 3))
    for ii in range(0, batch_size):
        best_idx = idx[ii]
        if complete_fps:
            best_idx = unique(best_idx)
            out_pc[ii] = fps_from_given_pc(full_pc[ii], k, full_pc[ii][best_idx])
        else:
            out_pc[ii] = full_pc[ii][best_idx]
    return out_pc[:, 0:k, :]

EMD算法代码:

#EMD算法
def emd_matching(full_pc, gen_pc, sess):
    batch_size = np.size(full_pc, 0)
    k = np.size(gen_pc, 1)
    out_pc = np.zeros_like(gen_pc)

    match_mat_tensor = approx_match(
        tf.convert_to_tensor(full_pc), tf.convert_to_tensor(gen_pc)
    )
    pc1_match_idx_tensor = tf.cast(tf.argmax(match_mat_tensor, axis=2), dtype=tf.int32)

    pc1_match_idx = pc1_match_idx_tensor.eval(session=sess)

    for ii in range(0, batch_size):
        best_idx = unique(pc1_match_idx[ii])
        out_pc[ii] = fps_from_given_pc(full_pc[ii], k, full_pc[ii][best_idx])

    return out_pc

get_nn_indices

def get_nn_indices(ref_pc, samp_pc):
    _, idx, _, _ = nn_distance(samp_pc, ref_pc)
    return idx

计算标准化误差

def get_simplification_loss(ref_pc, samp_pc, pc_size, gamma=1, delta=0):
    cost_p1_p2, _, cost_p2_p1, _ = nn_distance(samp_pc, ref_pc)
    max_cost = tf.reduce_max(cost_p1_p2, axis=1)
    max_cost = tf.reduce_mean(max_cost)
    cost_p1_p2 = tf.reduce_mean(cost_p1_p2)
    cost_p2_p1 = tf.reduce_mean(cost_p2_p1)
    loss = cost_p1_p2 + max_cost + (gamma + delta * pc_size) * cost_p2_p1

    tf.summary.scalar("cost_p1_p2", cost_p1_p2)
    tf.summary.scalar("cost_p2_p1", cost_p2_p1)
    tf.summary.scalar("max_cost", max_cost)

    return loss

下面,我们将主要看看本论文的创新点之一——软投影操作

下方的代码展示了软投影算法的class以及运行实例,通过硬软投影后的均方误差比对,我们可以明显发现软投影的优势所在。

class SoftProjection(object):
    def __init__(
        self, group_size, initial_temperature=1.0, is_temperature_trainable=True
    ):
        """Computes a soft nearest neighbor point cloud.
        Arguments:
            group_size: An integer, number of neighbors in nearest neighborhood.
            initial_temperature: A positive real number, initialization constant for temperature parameter.
            is_temperature_trainable: bool.
        Inputs:
            point_cloud: A `Tensor` of shape (batch_size, num_in_points, 3), original point cloud.
            query_cloud: A `Tensor` of shape (batch_size, num_out_points, 3), generated point cloud
        Outputs:
            projected_point_cloud: A `Tensor` of shape (batch_size, num_out_points, 3),
                the query_cloud projected onto its group_size nearest neighborhood,
                controlled by the learnable temperature parameter.
            weights: A `Tensor` of shape (batch_size, num_out_points, group_size, 1),
                the projection weights of the query_cloud onto its group_size nearest neighborhood
            dist: A `Tensor` of shape (batch_size, num_out_points, group_size, 1),
                the square distance of each query point from its neighbors divided by squared temperature parameter
        """

        self._group_size = group_size

        # create temperature variable
        self._temperature = tf.get_variable(
            "temperature",
            initializer=tf.constant(initial_temperature, dtype=tf.float32),
            trainable=is_temperature_trainable,
            dtype=tf.float32,
        )

        # sigma is exposed for loss calculation
        self.sigma = self._temperature ** 2

    def __call__(self, point_cloud, query_cloud, hard=False):
        return self.project(point_cloud, query_cloud, hard)

    def _group_points(self, point_cloud, query_cloud):
        group_size = self._group_size
        _, num_out_points, _ = query_cloud.shape

        # find nearest group_size neighbours in point_cloud
        _, idx = knn_point(group_size, point_cloud, query_cloud)
        grouped_points = group_point(point_cloud, idx)
        return grouped_points

    def _get_distances(self, grouped_points, query_cloud):
        group_size = self._group_size

        # remove centers to get absolute distances
        deltas = grouped_points - tf.tile(
            tf.expand_dims(query_cloud, 2), [1, 1, group_size, 1]
        )
        dist = tf.reduce_sum(deltas ** 2, axis=3, keepdims=True) / self.sigma
        return dist

    def project(self, point_cloud, query_cloud, hard):
        grouped_points = self._group_points(
            point_cloud, query_cloud
        )  # (batch_size, num_out_points, group_size, 3)
        dist = self._get_distances(grouped_points, query_cloud)

        # pass through softmax to get weights
        weights = tf.nn.softmax(-dist, axis=2)
        if hard:
            # convert softmax weights to one_hot encoding
            weights = tf.one_hot(tf.argmax(weights, axis=2), depth=self._group_size)
            weights = tf.transpose(weights, perm=[0, 1, 3, 2])

        # get weighted average of grouped_points
        projected_point_cloud = tf.reduce_sum(
            grouped_points * weights, axis=2
        )  # (batch_size, num_out_points, 3)
        return projected_point_cloud, weights, dist


"""SoftProjection test"""
if __name__ == "__main__":
    tf.enable_eager_execution()
    projector = SoftProjection(3, initial_temperature=0.01)
    sigma = projector.sigma
    point_cloud = np.array(
        [
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1],
            [5, 4, 4],
            [4, 5, 4],
            [4, 4, 5],
            [8, 7, 7],
            [7, 8, 7],
            [7, 7, 8],
        ]
    )
    query_cloud = np.array(
        [[0, 0, 0], [1, 0, 0], [2, 0, 0], [5, 5, 5], [7, 7, 8], [7, 7, 8.5]]
    )
    expected_cloud_soft = np.array(
        [
            [0.333, 0.333, 0.333],
            [1, 0, 0],
            [1, 0, 0],
            [4.333, 4.333, 4.333],
            [7, 7, 8],
            [7, 7, 8],
        ]
    )

    expected_cloud_hard = np.array(
        [[1, 0, 0], [1, 0, 0], [1, 0, 0], [5, 4, 4], [7, 7, 8], [7, 7, 8]]
    )

    # expend to batch_size = 2
    point_cloud = np.stack([point_cloud, point_cloud * 3], axis=0)
    query_cloud = np.stack([query_cloud, query_cloud * 3], axis=0)
    expected_cloud_soft = np.stack(
        [expected_cloud_soft, expected_cloud_soft * 3], axis=0
    )
    expected_cloud_hard = np.stack(
        [expected_cloud_hard, expected_cloud_hard * 3], axis=0
    )

    point_cloud_pl = tf.convert_to_tensor(point_cloud, dtype=tf.float32)
    query_cloud_pl = tf.convert_to_tensor(query_cloud, dtype=tf.float32)

    soft_projected_points, soft_projection_weights, dist = projector(
        point_cloud_pl, query_cloud_pl
    )
    hard_projected_points, hard_projection_weights, _ = projector(
        point_cloud_pl, query_cloud_pl, hard=True
    )

    soft_projected_points = soft_projected_points.numpy()
    soft_projection_weights = soft_projection_weights.numpy()
    hard_projected_points = hard_projected_points.numpy()
    hard_projection_weights = hard_projection_weights.numpy()

    expected_cloud_soft = expected_cloud_soft.squeeze()
    soft_projected_points = soft_projected_points.squeeze()
    soft_projection_weights = soft_projection_weights.squeeze()
    hard_projected_points = hard_projected_points.squeeze()
    hard_projection_weights = hard_projection_weights.squeeze()

    print("soft_projection_weights:")
    print(soft_projection_weights)

    mse = np.mean(np.sum((soft_projected_points - expected_cloud_soft) ** 2, axis=1))
    print("mean soft error:")
    print(mse)

    mse = np.mean(np.sum((hard_projected_points - expected_cloud_hard) ** 2, axis=1))
    print("mean hard error:")
    print(mse)

总结

本次代码解析主要内容包括对主干网络结构的学习、对两种匹配模式的学习(EMD算法和最邻配比法)、对计算误差的学习以及对软投影的示例学习。

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值