1. 参考文献
3D Fully Convolutional Network for Vehicle Detection in Point Cloud
2. 模型实现
'''
Baidu Inc.
Ref:
3D Fully Convolutional Network for Vehicle Detection in Point Cloud
Author: HSW
Date: 2018-05-02
'''
import sys
import numpy as np
import tensorflow as tf
from prepare_data2 import *
from baidu_cnn_3d import *
KITTI_TRAIN_DATA_CNT = 7481
KITTI_TEST_DATA_CNT = 7518
# create 3D-CNN Model
def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20), activation=tf.nn.relu, is_train = True):
'''
Inputs:
sess: tensorflow Session Object
voxel_shape: voxel shape for network first layer
activation:
phrase_train:
Outputs:
voxel, graph, sess
'''
voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])
phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None
with tf.variable_scope("3D_CNN_Model") as scope:
model = Full_CNN_3D_Model()
model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)
if is_train:
initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
sess.run(tf.variables_initializer(initialized_var))
return voxel, model, phase_train
# read batch data
def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)):
'''
Inputs:
batch_size:
data_set_dir:
objectType: default is "Car"
split: default is "training"
resolution:
scale: outputSize / inputSize
limitX:
limitY:
limitZ:
Outputs:
'''
kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
TRAIN_PROCESSED_IDX = 0
TEST_PROCESSED_IDX = 0
if split == "training":
while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT:
batch_voxel = []
batch_g_obj = []
batch_g_cord = []
idx = 0
while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT:
print(TRAIN_PROCESSED_IDX)
voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)
TRAIN_PROCESSED_IDX += 1
if voxel is None:
continue
idx += 1
# print(voxel.shape)
batch_voxel.append(voxel)
batch_g_obj.append(g_obj)
batch_g_cord.append(g_cord)
yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
elif split == "testing":
while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT:
batch_voxel = []
idx = 0
while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT:
voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)
TEST_PROCESSED_IDX += 1
if voxel is None:
continue
idx += 1
batch_voxel.append(voxel)
yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]
# train 3D-CNN Model
def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101):
'''
Inputs:
batch_num:
data_set_dir:
modelType:
objectType:
resolution:
scale:
lr:
limitX, limitY, limitZ:
Outputs:
None
'''
batch_size = batch_num
training_epochs = epoch
sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
voxel_shape = (sizeX, sizeY, sizeZ)
with tf.Session() as sess:
voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)
saver = tf.train.Saver()
total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)
optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(training_epochs):
batchCnt = 0;
for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ):
# print("batch_g_obj")
# print(batch_g_obj.shape)
sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))
print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))
print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))
batchCnt += 1
if (epoch > 0) and (epoch % 10 == 0):
saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")
print("Training Finishied !")
# test 3D-CNN Model
def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)):
'''
Inputs:
batch_num:
data_set_dir:
resolution:
scale:
limitX, limitY, limitZ:
Outputs:
None
'''
sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
voxel_shape = (sizeX, sizeY, sizeZ)
batch_size = batch_num;
batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)
batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)
with tf.Session() as sess:
is_train = False
voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)
new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")
last_model = "./velodyne_kitti_train_40.ckpt"
saver.restore(sess, last_model)
objectness = model.objectness
cordinate = model.cordinate
y_pred = model.y
objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
cordinate = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]
y_pred = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
idx = np.where(y_pred >= 0.995)
spheres = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
centers = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
corners = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]
print(centers)
print(corners)
if __name__ == "__main__":
batch_num = 3
data_set_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
modelType = 1
objectType = "Car"
resolution = (0.2, 0.2, 0.2)
scale = 0.25
lr = 0.001
limitX = (0, 80)
limitY = (-40, 40)
limitZ = (-2.5, 1.5)
epoch = 101
train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)
saver = tf.train.Saver()
2.1 网络模型
'''
Baidu Inc.
Ref:
3D Fully Convolutional Network for Vehicle Detection in Point Cloud
Author: HSW
Date: 2018-05-02
'''
import numpy as np
import tensorflow as tf
class Full_CNN_3D_Model(object):
'''
Define Full CNN Model
'''
def __init__(self):
pass;
def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True):
if modelType == 0:
# Modefied 3D-CNN, 该网络结构不可使用,因为降采样太严重(降采样1/8)导致在预测时会出现较大误差
self.layer1 = self.conv3d_layer(voxel , 1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
self.layer2 = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
self.layer3 = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
self.layer4 = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)
self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
self.cordinate = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
self.y = tf.nn.softmax(self.objectness, dim=-1)
elif modelType == 1:
# 3D-CNN(论文网络结构: 降采样1/4,即InputSize / OutputSize = 0.25)
self.layer1 = self.conv3d_layer(voxel , 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
self.layer2 = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
self.layer3 = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
base_shape = self.layer2.get_shape().as_list()
obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
self.objectness = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
self.cordinate = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_sha