深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现

最新推荐文章于 2025-06-21 08:40:58 发布

原创

最新推荐文章于 2025-06-21 08:40:58 发布 · 2.5k 阅读

15 ·

CC 4.0 BY-SA版权

文章标签：

#激光雷达 #点云目标检测

1. 参考文献

3D Fully Convolutional Network for Vehicle Detection in Point Cloud

2. 模型实现

'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''


import sys
import numpy as np 
import tensorflow as tf 
from prepare_data2 import * 
from baidu_cnn_3d import * 

KITTI_TRAIN_DATA_CNT = 7481
KITTI_TEST_DATA_CNT  = 7518


# create 3D-CNN Model
def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20),  activation=tf.nn.relu, is_train = True): 
	'''
	Inputs: 
		sess: tensorflow Session Object 
		voxel_shape: voxel shape for network first layer 
		activation: 
		phrase_train: 
	Outputs: 
		voxel, graph, sess 
	'''
	voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])

	phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None 
	
	with tf.variable_scope("3D_CNN_Model") as scope: 
		model = Full_CNN_3D_Model()
		
		model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)
		
	if is_train: 
		initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
		sess.run(tf.variables_initializer(initialized_var))
	
	return voxel, model, phase_train


# read batch data 
def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)): 
	'''
	Inputs: 
		batch_size: 
		data_set_dir: 
		objectType: default is "Car"
		split: default is "training"
		resolution: 
		scale: outputSize / inputSize 
		limitX: 
		limitY: 
		limitZ: 
	Outputs: 
		
	'''
	kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
	
	TRAIN_PROCESSED_IDX  = 0
	TEST_PROCESSED_IDX   = 0
	
	if split == "training": 
		while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 
			batch_voxel = []
			batch_g_obj = []
			batch_g_cord = []
			
			idx = 0 
			while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 	
				
				print(TRAIN_PROCESSED_IDX)
				voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)
				TRAIN_PROCESSED_IDX += 1
				
				if voxel is None:
					continue
				
				idx += 1 
				
				# print(voxel.shape)
				batch_voxel.append(voxel)
				batch_g_obj.append(g_obj)
				batch_g_cord.append(g_cord)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
	
	elif split == "testing": 	
		while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			batch_voxel = []
			
			idx = 0
			while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			
				voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)
				TEST_PROCESSED_IDX += 1
				
				if voxel is None: 
					continue
				
				idx += 1	
				batch_voxel.append(voxel)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]
		
	

# train 3D-CNN Model 
def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		modelType: 
		objectType: 
		resolution: 
		scale: 
		lr: 
		limitX, limitY, limitZ: 
	Outputs: 
		None
	'''
	batch_size      = batch_num
	training_epochs = epoch
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	with tf.Session() as sess: 
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)
		saver = tf.train.Saver()
		
		total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)
		
		optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)
		
		init = tf.global_variables_initializer()
		
		sess.run(init)
		
		for epoch in range(training_epochs): 
			batchCnt = 0; 
			for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ): 
				# print("batch_g_obj")
				# print(batch_g_obj.shape)
				sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))
				
				batchCnt += 1
				
			if (epoch > 0) and (epoch % 10 == 0): 
				saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")
				
		print("Training Finishied !")


# test 3D-CNN Model
def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		resolution: 
		scale:
		limitX, limitY, limitZ:  
	Outputs: 
		None 
	'''
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	batch_size = batch_num; 
	
	batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)
	
	batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)
	
	with tf.Session() as sess: 
		is_train = False
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)
		new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")
		last_model = "./velodyne_kitti_train_40.ckpt"
		saver.restore(sess, last_model)
		
		objectness = model.objectness
		cordinate  = model.cordinate
		y_pred     = model.y 
		
		objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		cordinate  = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]
		y_pred     = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		
		idx        = np.where(y_pred >= 0.995)
		
		spheres    = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
		
		centers    = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
		
		corners    = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]
		
		print(centers)
		print(corners)
		

if __name__ == "__main__":
	
	batch_num       = 3
	data_set_dir    = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
	modelType       = 1
	objectType      = "Car"
	resolution      = (0.2, 0.2, 0.2)
	scale           = 0.25 
	lr              = 0.001
	limitX          = (0, 80)
	limitY          = (-40, 40)
	limitZ          = (-2.5, 1.5)       
	epoch           = 101 
	train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)
	saver = tf.train.Saver()

2.1 网络模型

'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''

import numpy as np 
import tensorflow as tf 

class Full_CNN_3D_Model(object): 
	'''
		Define Full CNN Model
	'''
	
	def __init__(self): 
		pass; 
		
	def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True): 
		if modelType == 0: 
			# Modefied 3D-CNN, 该网络结构不可使用,因为降采样太严重(降采样1/8)导致在预测时会出现较大误差 
			self.layer1     = self.conv3d_layer(voxel      ,  1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
						
			self.layer2     = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3     = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			self.layer4     = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)
			
			self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
			
			self.cordinate  = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
			
			self.y          = tf.nn.softmax(self.objectness, dim=-1)
			
		elif modelType == 1: 
			# 3D-CNN(论文网络结构： 降采样1/4，即InputSize / OutputSize = 0.25)
			self.layer1       = self.conv3d_layer(voxel      ,  1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
			
			self.layer2       = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3       = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			base_shape        = self.layer2.get_shape().as_list()
			
			obj_output_shape  = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
			
			cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
			
			self.objectness   = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
			
			self.cordinate    = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_sha