import config as cfg
from data_utils import FlowerDataLoader
from data_utils import nms, check_directory, fetch_selective_search_images, calc_ground_truth_predict_bounding_box
from networks import AlexNet, SVMModel, RegressionNet
import collections
import os
import pickle
import numpy as np
import cv2 as cv
import tensorflow as tf
import time
class SolverType(object):
TRAIN_FINE_TUNE_MODEL = 0
GENERATE_TRAIN_SVM_FEATURES = 1
TRAIN_SVM_MODEL = 2
GENERATE_TRAIN_REGRESSION_FEATURES = 3
TRAIN_REGRESSION_MODEL = 4
PREDICT_BOUNDING_BOX = 5
PREDICT_BOUNDING_BOX_STEP1 = 6
PREDICT_BOUNDING_BOX_STEP2 = 7
PREDICT_BOUNDING_BOX_STEP3 = 8
PREDICT_BOUNDING_BOX_STEP4 = 9
class Solver(object):
def __init__(self, solver_type):
self.is_training = False
self.is_svm = False
self.is_regression = False
if SolverType.TRAIN_FINE_TUNE_MODEL == solver_type:
with tf.Graph().as_default():
print("进行Fine Tune模型训练操作....")
self.is_training = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training)
self.data_loader = FlowerDataLoader()
self.__set_fine_tune_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir)
self.__get_or_create_global_step()
self.__create_tf_train_op()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__fine_tune_train
elif SolverType.GENERATE_TRAIN_SVM_FEATURES == solver_type:
with tf.Graph().as_default():
print("生成SVM分类训练用高阶特征属性,并持久化磁盘文件....")
self.is_training = False
self.is_svm = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm)
self.data_loader = FlowerDataLoader()
self.__set_fine_tune_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
self.__get_or_create_global_step()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__persistent_svm_higher_features
elif SolverType.TRAIN_SVM_MODEL == solver_type:
print("进行SVM模型训练操作....")
self.is_svm = True
self.is_training = True
self.net = SVMModel(is_training=self.is_training)
self.run = self.__svm_train
elif SolverType.GENERATE_TRAIN_REGRESSION_FEATURES == solver_type:
with tf.Graph().as_default():
print("生成回归模型训练用高阶特征属性,并持久化磁盘文件....")
self.is_training = False
self.is_svm = False
self.is_regression = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm, is_regression=self.is_regression)
self.data_loader = FlowerDataLoader()
self.__set_fine_tune_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
self.__get_or_create_global_step()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__persistent_regression_higher_features_new
elif SolverType.TRAIN_REGRESSION_MODEL == solver_type:
with tf.Graph().as_default():
print("进行回归模型训练..")
self.is_regression = True
self.is_training = True
self.net = RegressionNet(is_training=self.is_training)
self.data_loader = FlowerDataLoader()
self.__set_regression_net_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir)
self.__get_or_create_global_step()
self.__create_tf_train_op()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__regression_train
elif SolverType.PREDICT_BOUNDING_BOX_STEP1 == solver_type:
print("产生Selective Search候选框对应的图像信息....")
self.run = self.__fetch_selective_search_box
elif SolverType.PREDICT_BOUNDING_BOX_STEP2 == solver_type:
with tf.Graph().as_default():
print("进行获取SVM分类以及Regression Box高阶特征操作....")
self.is_training = False
self.is_svm = True
self.is_regression = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm, is_regression=self.is_regression)
self.__set_fine_tune_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
self.__get_or_create_global_step()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__fetch_higher_features_by_images
elif SolverType.PREDICT_BOUNDING_BOX_STEP3 == solver_type:
print("进行SVM模型预测操作,得到各个边框属于某个类别的预测值!!!")
self.net = SVMModel(is_training=False)
self.run = self.__fetch_svm_predict
elif SolverType.PREDICT_BOUNDING_BOX_STEP4 == solver_type:
with tf.Graph().as_default():
print("进行Bounding Box回归预测操作,得到各个边框的偏移量信息....")
self.is_training = False
self.is_svm = False
self.is_regression = True
self.net = RegressionNet(is_training=self.is_training)
self.__set_regression_net_config()
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
self.__get_or_create_global_step()
self.__create_tf_saver()
self.__create_tf_summary()
self.__create_tf_session_and_initial()
self.run = self.__fetch_regression_bounding_box
else:
raise Exception("不支持当前solver_stype:{}".format(solver_type))
def __fetch_regression_bounding_box(self, x):
"""
获取回归模型返回的偏移值
:param x:
:return:
"""
return self.session.run(self.net.logits, feed_dict={self.net.input_data: x})
def __fetch_svm_predict(self, x):
"""
对高阶的特征属性数据x进行svm模型预测,并返回各个边框属于类别的概率信息
x表示一张图形的所有候选框的高阶特征数据
:param x:
:return:
"""
result = []
labels = self.net.fetch_labels()
for label in labels:
predict = self.net.predict_proba(x, label)
"""
实现方式:
方法2. 简单(定位任务的思路):获取概率最大的那个作为最终的预测边框
方法1. 使用NMS非极大值抑制的策略来获取确定边框
"""
if cfg.SVM_PREDICT_PROBABILITY_WITH_NMS:
index = np.where(predict >= cfg.SVM_PREDICT_PROBABILITY_THRESHOLD)[0]
if np.size(index) > 0:
for idx in index:
result.append([idx, label, predict[idx]])
else:
max_predict_index = np.argmax(predict)
max_predict = predict[max_predict_index]
if max_predict > cfg.SVM_PREDICT_PROBABILITY_THRESHOLD:
result.append([max_predict_index, label, max_predict])
result = np.asarray(result)
return result
def __fetch_higher_features_by_images(self, images):
"""
获取对应图像的高阶特征信息,也就是同时返回图像在Conv5和FC7层输出作为高阶特征
:param images:
:return:
"""
svm_higher_features, regression_higher_features = self.session.run(
[self.net.svm_logits, self.net.regression_logits],
feed_dict={self.net.input_data: images}
)
return svm_higher_features, regression_higher_features
def __fetch_selective_search_box(self, file_path):
"""
对于给定的文件路径的图形获取对应的边框信息,主要返回两个值:边框图像以及边框的坐标
:param file_path:
:return:
"""
result = fetch_selective_search_images(file_path=file_path,
image_width=cfg.IMAGE_WIDTH,
image_height=cfg.IMAGE_HEIGHT)
images = []
bounding_boxs = []
if result is not None:
for image, box in result:
images.append(image)
bounding_boxs.append(box)
images = np.asarray(images)
bounding_boxs = np.asarray(bounding_boxs)
return images, bounding_boxs
def __set_regression_net_config(self):
"""
配置RegressionNet相关的属性信息
:return:
"""
self.initial_learning_rate = cfg.REGRESSION_INITIAL_LEARNING_RATE
self.decay_steps = cfg.REGRESSION_DECAY_STEPS
self.decay_rate = cfg.REGRESSION_DECAY_RATE
self.summary_writer_log_dir = cfg.REGRESSION_SUMMARY_WRITER_LOG_DIR
self.checkpoint_dir = cfg.REGRESSION_CHECKPOINT_DIR
self.checkpoint_path = os.path.join(self.checkpoint_dir, cfg.REGRESSION_CHECKPOINT_FILENAME)
self.max_steps = cfg.REGRESSION_MAX_STEP
self.summary_step = cfg.REGRESSION_SUMMARY_STEP
self.checkpoint_step = cfg.REGRESSION_CHECKPOINT_STEP
def __set_fine_tune_config(self):
"""
配置Fine Tune的AlexNet相关的属性信息
:return:
"""
self.initial_learning_rate = cfg.FINE_TUNE_INITIAL_LEARNING_RATE
self.decay_steps = cfg.FINE_TUNE_DECAY_STEPS
self.decay_rate = cfg.FINE_TUNE_DECAY_RATE
self.summary_writer_log_dir = cfg.FINE_TUNE_SUMMARY_WRITER_LOG_DIR
self.checkpoint_dir = cfg.FINE_TUNE_CHECKPOINT_DIR
self.checkpoint_path = os.path.join(self.checkpoint_dir, cfg.FINE_TUNE_CHECKPOINT_FILENAME)
self.max_steps = cfg.FINE_TUNE_MAX_STEP
self.summary_step = cfg.FINE_TUNE_SUMMARY_STEP
self.checkpoint_step = cfg.FINE_TUNE_CHECKPOINT_STEP
def __get_or_create_global_step(self):
self.global_step = tf.train.get_or_create_global_step()
def __create_tf_train_op(self):
if self.is_training:
with tf.variable_scope("train"):
self.learning_rate = tf.train.exponential_decay(
learning_rate=self.initial_learning_rate,
global_step=self.global_step,
decay_steps=self.decay_steps,
decay_rate=self.decay_rate,
name='learning_rate')
tf.summary.scalar('learning_rate', self.learning_rate)
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) \
.minimize(self.net.total_loss, global_step=self.global_step)
if self.is_regression:
self.train_op = self.optimizer
else:
self.ema = tf.train.ExponentialMovingAverage(0.99)
with tf.control_dependencies([self.optimizer]):
self.train_op = self.ema.apply(tf.trainable_variables())
def __create_tf_saver(self):
self.saver = tf.train.Saver(max_to_keep=2)
def __create_tf_summary(self):
self.summary = tf.summary.merge_all()
self.writer = tf.summary.FileWriter(self.summary_writer_log_dir, graph=tf.get_default_graph())
def __create_tf_session_and_initial(self):
self.session = tf.Session()
ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
print("进行模型恢复操作...")
self.saver.restore(self.session, ckpt.model_checkpoint_path)
self.saver.recover_last_checkpoints(ckpt.all_model_checkpoint_paths)
else:
self.session.run(tf.global_variables_initializer())
print('未有模型持久化文件,从头开始训练!')
def __svm_train(self):
"""
进行SVM模型训练
:return:
"""
self.net.train()
def __fine_tune_train(self):
if not self.is_training:
raise Exception("Train method request set 'is_training' parameter is True.")
start_step = self.session.run(self.global_step)
end_step = start_step + self.max_steps
for step in range(start_step, end_step):
images, labels = self.data_loader.get_fine_tune_batch()
feed_dict = {self.net.input_data: images, self.net.label: labels}
if step % self.summary_step == 0:
summary_, loss_, accuracy_, _ = self.session.run(
[self.summary, self.net.total_loss, self.net.accuracy, self.train_op],
feed_dict=feed_dict)
self.writer.add_summary(summary_, global_step=step)
print("Training Step:{}, Loss:{}, Accuracy:{}".format(step, loss_, accuracy_))
else:
self.session.run(self.train_op, feed_dict=feed_dict)
if (step+1) % self.checkpoint_step == 0:
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
def __regression_train(self):
if not self.is_training:
raise Exception("Train method request set 'is_training' parameter is True.")
start_step = self.session.run(self.global_step)
end_step = start_step + self.max_steps
for step in range(start_step, end_step):
images, labels = self.data_loader.get_regression_box_train_batch()
feed_dict = {self.net.input_data: images, self.net.label: labels}
if step % self.summary_step == 0:
summary_, loss_, _ = self.session.run(
[self.summary, self.net.total_loss, self.train_op],
feed_dict=feed_dict)
self.writer.add_summary(summary_, global_step=step)
print("Training Step:{}, Loss:{}".format(step, loss_))
else:
self.session.run(self.train_op, feed_dict=feed_dict)
if (step+1) % self.checkpoint_step == 0:
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
def __fine_tune_predict(self, images):
"""
运行,得到Fine Tune模型的返回值(logits 作为svm模型的输入)
:param images:
"""
return self.session.run(self.net.logits, feed_dict={self.net.input_data: images})
def __persistent_svm_higher_features(self):
"""
持久化用于svm模型训练的高阶特征数据
注意:在svm模型训练中,是针对每个类别训练一个svm模型,所有在这里需要对于每个类别产生一个训练数据文件
:return:
"""
check_directory(cfg.TRAIN_LABEL_DICT_FILE_PATH, created=False, error=True)
class_name_2_index_dict = pickle.load(open(cfg.TRAIN_LABEL_DICT_FILE_PATH, 'rb'))
for class_name, index in class_name_2_index_dict.items():
print("Start process type '{}/{}' datas...".format(index, class_name))
X = None
Y = None
while self.data_loader.has_next_structure_higher_features_batch(index):
images, labels = self.data_loader.get_structure_higher_features(label=index)
if images is None or labels is None:
continue
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
print("Number of occurrences of each category:{}".format(collections.Counter(Y)))
data = np.concatenate((X, np.reshape(Y, (-1, 1))), axis=1)
svm_higher_features_save_path = cfg.TRAIN_SVM_HIGHER_FEATURES_DATA_FILE_PATH.format(index)
check_directory(os.path.dirname(svm_higher_features_save_path))
np.save(svm_higher_features_save_path, data)
def __persistent_regression_higher_features(self):
"""
获取训练回归模型用的特征属性数据,并持久化磁盘文件
获取所有和GT的IoU值在0.6以上的候选框全部作为训练数据
:return:
"""
X = None
Y = None
print("开始生成回归用的高阶特征!")
while self.data_loader.has_next_regression_box_batch():
images, labels = self.data_loader.get_regression_box_batch()
if images is None or labels is None:
continue
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels, axis=0)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
data = np.concatenate((X, Y), axis=1)
regression_higher_features_save_path = cfg.TRAIN_REGRESSION_HIGHER_FEATURES_DATA_FILE_PATH
check_directory(os.path.dirname(regression_higher_features_save_path))
np.save(regression_higher_features_save_path, data)
def __persistent_regression_higher_features_new(self):
"""
获取训练回归模型用的特征属性数据,并持久化磁盘文件
获取所有和GT的IoU值在0.6以上的候选框全部作为训练数据
:return:
"""
X = None
Y = None
print("开始生成回归用的高阶特征!")
for batch_start_id in range(0, len(self.data_loader.regression_box_higher_features_samples_index),
self.data_loader.regression_box_batch_size):
batch_index = self.data_loader.regression_box_higher_features_samples_index[
batch_start_id: batch_start_id + self.data_loader.regression_box_batch_size]
images = self.data_loader.fine_tune_x[batch_index]
labels = self.data_loader.regression_box_higher_features_Y[batch_index]
if images is None or labels is None:
continue
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels, axis=0)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
data = np.concatenate((X, Y), axis=1)
regression_higher_features_save_path = cfg.TRAIN_REGRESSION_HIGHER_FEATURES_DATA_FILE_PATH
check_directory(os.path.dirname(regression_higher_features_save_path))
np.save(regression_higher_features_save_path, data)
def run_solver():
flag = 5
if flag == 0:
solver = Solver(SolverType.TRAIN_FINE_TUNE_MODEL)
solver.run()
elif flag == 1:
solver = Solver(SolverType.GENERATE_TRAIN_SVM_FEATURES)
solver.run()
elif flag == 2:
solver = Solver(SolverType.TRAIN_SVM_MODEL)
solver.run()
elif flag == 3:
solver = Solver(SolverType.GENERATE_TRAIN_REGRESSION_FEATURES)
solver.run()
elif flag == 4:
solver = Solver(SolverType.TRAIN_REGRESSION_MODEL)
solver.run()
elif flag == 5:
file_path = '../rcnn_datas/2flowers/jpg/t1.jpg'
print("获取候选框.....")
t1 = time.time()
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP1)
"""
images: path这个图像所对应的所有候选框图像数据,形状为: [total_roi_samples, H, W, C]
boxs: path这个对象对应的候选框所对应的坐标值,形状为: [total_roi_samples, 4], 坐标的顺序为: [左上角x,左上角y,右下角x, 右下角y]
eg: images[0]和boxs[0]分别表示第一个ROI区域图像以及这个图像在原始图像中的坐标点信息
"""
images, bounding_boxs = solver.run(file_path)
print("候选框获取完成.....")
print(time.time() - t1)
print(np.shape(images))
print(np.shape(bounding_boxs))
print("开始获取高阶特征....")
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP2)
"""
svm_higher_features: 特征提取网络的第FC7层的输出,形状为: [total_roi_samples, 4096], 用于svm模型的预测
regression_higher_features: 特征提取网络的第Conv5层的输出,形状为: [total_roi_samples, 9216], 用于回归模型的预测
"""
svm_higher_features, regression_higher_features = solver.run(images)
print('预测step2 获取高阶特征结果:')
print(np.shape(svm_higher_features))
print(np.shape(regression_higher_features))
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP3)
"""
predict_svm: 预测结果,保留的是所有可能属于物品的的边框信息,形状:[K, 3], K表示总共有K个边框可以确定属于物体
第一列为边框id(下标),第二列为边框所属类别,第三列为边框所属类别的可能性概率值
"""
predict_svm = solver.run(svm_higher_features)
print('step3 svm分类预测结果为:', predict_svm)
box_indexs = predict_svm[:, 0].astype(np.int)
boxs = np.reshape(bounding_boxs[box_indexs], (-1, np.shape(bounding_boxs)[1]))
rhf = regression_higher_features[box_indexs]
rhf = np.reshape(rhf, (-1, np.shape(regression_higher_features)[1]))
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP4)
"""
# offset_box: 针对确定属于物体的边框,进行回归模型后,所得到的转换系数,形状为: [K, 4]
# 也就是每个边框产生4个转换系数: tx\ty\tw\th
"""
offset_box = solver.run(rhf)
print(offset_box)
final_box = calc_ground_truth_predict_bounding_box(boxs, offset_box)
print("SS产生的边框(SVM确定属于物体):\n{}".format(boxs))
print("边框值(微调):\n{}".format(final_box))
if cfg.SVM_PREDICT_PROBABILITY_WITH_NMS:
boxes_probs = predict_svm[:, 2]
boxes_labels = predict_svm[:, 1].astype(np.int)
final_boxes_index, final_boxes_prob = nms(
boxes=final_box,
probs=boxes_probs,
boxes_labels=boxes_labels,
prob_threshold=cfg.SVM_PREDICT_PROBABILITY_THRESHOLD
)
print(final_boxes_index)
print("NMS微调之后的边框值:\n{}".format(final_box[final_boxes_index]))
print(final_boxes_prob)
nms_final_boxs = final_box[final_boxes_index]
print("最终的实际边框信息")
print(nms_final_boxs)
print(boxes_labels[final_boxes_index])
print(boxes_probs[final_boxes_index])
show_image = cv.imread(file_path)
for roi, bb_roi in zip(boxs, final_box):
cv.rectangle(show_image, pt1=(roi[0], roi[1]),
pt2=(roi[2], roi[3]), color=(255, 0, 0), thickness=2)
cv.rectangle(show_image, pt1=(bb_roi[0], bb_roi[1]),
pt2=(bb_roi[2], bb_roi[3]), color=(0, 0, 255), thickness=2)
label_2_name = {
1: "DIR0",
2: "DIR1"
}
label_2_color = {
1: (0, 255, 255),
2: (200, 0, 100)
}
for idx in final_boxes_index:
roi = final_box[idx]
label = boxes_labels[idx]
prob = boxes_probs[idx]
color = label_2_color[label]
cv.rectangle(show_image, pt1=(roi[0], roi[1]), pt2=(roi[2], roi[3]),
color=color, thickness=2)
cv.putText(show_image, text='%s:%.3f' % (label_2_name[label], prob),
org=(roi[0] + 10, roi[1] + 10), fontFace=cv.FONT_HERSHEY_SIMPLEX,
fontScale=1, color=color, thickness=2, lineType=cv.LINE_AA)
cv.imwrite('./result.png', show_image)
cv.imshow('show_image', show_image)
cv.waitKey(0)
cv.destroyAllWindows()
else:
show_image = cv.imread(file_path)
for roi, bb_roi in zip(boxs, final_box):
cv.rectangle(show_image, pt1=(roi[0], roi[1]),
pt2=(roi[2], roi[3]), color=(255, 0, 0), thickness=2)
cv.rectangle(show_image, pt1=(bb_roi[0], bb_roi[1]),
pt2=(bb_roi[2], bb_roi[3]), color=(0, 0, 255), thickness=2)
cv.imwrite('./result.png', show_image)
cv.imshow('show_image', show_image)
cv.waitKey(0)
cv.destroyAllWindows()
if __name__ == '__main__':
run_solver()