本示例可以实现的功能:
1,用tf.contrib.layers搭建分类网络(自己也可以用tf.nn搭建);2,直接通过数据的路径读取批量数据(或者直接给个包含图像路径及标签的txt,在用caffe时经常这样搞),不用转换文件格式;3,学习率衰减;4,用tensorboard观察模型的结构及训练过程中的loss, accuracy, learning rate变化情况,有助于调参;5,保存模型;6,加载训练好的模型,并对图像进行预测。
这里使用flower data数据集http://download.tensorflow.org/example_images/flower_photos.tgz
该数据分为5类。
主函数:train.py
其中get_batch用于加载批量数据。get_files返回图像路径及标签
#coding:utf-8
import os
import numpy as np
import tensorflow as tf
import glob
import model
init_lr = 0.001
decay_steps = 10000
MAX_STEP = 200000
N_CLASSES = 5
IMG_W = 224
IMG_H = 224
BATCH_SIZE = 32
CAPACITY = 2000
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # gpu编号
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4} # 手动指定一个名字到label的映射关系,必须从0开始
train_dir = 'flowers/flower_photos' # 该文件下放着各类图像的子文件夹这里有5个
logs_train_dir = './model_save'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # 设置最小gpu使用量
def get_batch(image, label, image_W, image_H, batch_size, capacity):
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# make an input queue
input_queue = tf.train.slice_input_producer([image, label], shuffle=False)
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents, channels=3)
# 数据增强
#image = tf.image.resize_image_with_pad(image, target_height=image_W, target_width=image_H)
image = tf.image.resize_images(image, (image_W, image_H))
# 随机左右翻转
image = tf.image.random_flip_left_right(image)
# 随机上下翻转
image = tf.image.random_flip_up_down(image)
# 随机设置图片的亮度
image = tf.image.random_brightness(image, max_delta=32/255.0)
# 随机设置图片的对比度
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
# 随机设置图片的色度
image = tf.image.random_hue(image, max_delta=0.3)
# 随机设置图片的饱和度
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
# 标准化,使图片的均值为0,方差为1
image = tf.image.per_image_standardization(image)
image_batch, label_batch = tf.train.batch([image, label],
batch_size= batch_size,
num_threads= 64,
capacity = capacity)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
def get_files(file_dir):
image_list, label_list = [], []
for label in os.listdir(file_dir):
for img in glob.glob(os.path.join(file_dir, label, "*.jpg")):
image_list.append(img)
label_list.append(label_dict[label])
print('There are %d data' %(len(image_list)))
temp = np.array([image_list, label_list])
temp = temp.transpose()
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list
def main():
global_step = tf.Variable(0, name='global_step', trainable=False)
# dataset
train, train_label = get_files(train_dir)
# label without one-hot
batch_train, batch_labels = get_batch(train,
train_label,
IMG_W,
IMG_H,
BATCH_SIZE,
CAPACITY)
# network
#logits = model.model2(batch_train, BATCH_SIZE, N_CLASSES)
logits = model.model4(batch_train, N_CLASSES, is_trian=True)
# loss
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=batch_labels)
loss = tf.reduce_mean(cross_entropy, name='loss')
tf.summary.scalar('train_loss', loss)
# optimizer
lr = tf.train.exponential_decay(learning_rate=init_lr, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1)
tf.summary.scalar('learning_rate', lr)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step)
# accuracy
correct = tf.nn.in_top_k(logits, batch_labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar('train_acc', accuracy)
summary_op = tf.summary.merge_all()
sess = tf.Session(config=config)
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
#saver.restore(sess, logs_train_dir+'/model.ckpt-174000')
try:
for step in range(MAX_STEP):
if coord.should_stop():
break
_, learning_rate, tra_loss, tra_acc = sess.run([optimizer, lr, loss, accuracy])
if step % 50 == 0:
print('Step %4d, lr %f, train loss = %.2f, train accuracy = %.2f%%' %(step, learning_rate, tra_loss, tra_acc*100.0))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)
if step % 2000 == 0 or (step + 1) == MAX_STEP:
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
if __name__ == '__main__':
main()
模型定义:model.py
#coding:utf-8
from tensorflow.contrib.layers.python.layers import batch_norm
import tensorflow as tf
import inspect
import os
import numpy as np
import time
def model4(x, N_CLASSES, is_trian = False):
x = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
x = batch_norm(x, decay=0.9, updates_collections=None, is_training=is_trian) # 训练阶段is_trainging设置为true,训练完毕后使用模型时设置为false
x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
x1_1 = tf.contrib.layers.conv2d(x, 64, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu) # 1X1 核
x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
x3_3 = tf.contrib.layers.conv2d(x, 64, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu) # 3x3 核
x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
x5_5 = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu) # 5x5 核
x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
x = tf.concat([x1_1, x3_3, x5_5], axis=-1) # 连接在一起,得到64*3=192个通道
x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
x1_1 = tf.contrib.layers.conv2d(x, 128, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu)
x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
x3_3 = tf.contrib.layers.conv2d(x, 128, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu)
x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
x5_5 = tf.contrib.layers.conv2d(x, 128, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
x = tf.concat([x1_1, x3_3, x5_5], axis=-1)
x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
shp = x.get_shape()
x = tf.reshape(x, [-1, shp[1]*shp[2]*shp[3]]) # flatten
x = tf.contrib.layers.fully_connected(x, N_CLASSES, activation_fn=None) # output logist without softmax
return x
def model2(images, batch_size, n_classes):
'''Build the model
Args:
images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
Returns:
output tensor with the computed logits, float, [batch_size, n_classes]
'''
#conv1, shape = [kernel size, kernel size, channels, kernel numbers]
with tf.variable_scope('conv1') as scope:
weights = tf.get_variable('weights',
shape = [3,3,3, 16],
dtype = tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name= scope.name)
#pool1 and norm1
with tf.variable_scope('pooling1_lrn') as scope:
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
padding='SAME', name='pooling1')
norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75,name='norm1')
#conv2
with tf.variable_scope('conv2') as scope:
weights = tf.get_variable('weights',
shape=[3,3,16,16],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name='conv2')
#pool2 and norm2
with tf.variable_scope('pooling2_lrn') as scope:
norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75,name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
padding='SAME',name='pooling2')
#local3
with tf.variable_scope('local3') as scope:
reshape = tf.reshape(pool2, shape=[batch_size, -1])
dim = reshape.get_shape()[1].value
weights = tf.get_variable('weights',
shape=[dim,128],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[128],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
#local4
with tf.variable_scope('local4') as scope:
weights = tf.get_variable('weights',
shape=[128,128],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[128],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
# full connect
with tf.variable_scope('softmax_linear') as scope:
weights = tf.get_variable('softmax_linear',
shape=[128, n_classes],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[n_classes],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
logits = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
return logits
加载训练好的模型并进行预测,predict.py:
#coding:utf-8
import os, cv2
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # use cpu
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import glob
import model
N_CLASSES = 5
IMG_W = 224
IMG_H = IMG_W
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # use gpu 0
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
label_dict_res = {v:k for k,v in label_dict.items()}
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
global sess, pred, x
# process image
x = tf.placeholder(tf.float32, shape=[IMG_W, IMG_W, 3])
x_norm = tf.image.per_image_standardization(x)
x_4d = tf.reshape(x_norm, [1, IMG_W, IMG_W, 3])
# predict
logit = model.model4(x_4d, N_CLASSES, is_trian=False)
#logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
pred = tf.nn.softmax(logit)
saver = tf.train.Saver()
sess = tf.Session(config=config)
saver.restore(sess, logs_train_dir)
print('load model done...')
def evaluate_image(img_dir):
# read image
im = cv2.imread(img_dir)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (IMG_W, IMG_W))
image_array = np.array(im)
prediction = sess.run(pred, feed_dict={x: image_array})
max_index = np.argmax(prediction)
print("%s, predict: %s, prob: %f" %(os.path.basename(img_dir), label_dict_res[max_index], prediction[0][max_index]))
if __name__ == '__main__':
init_tf()
# data_path = 'flowers/flower_photos'
# label = os.listdir(data_path)
# for l in label:
# if os.path.isfile(os.path.join(data_path, l)):
# continue
# for img in glob.glob(os.path.join(data_path, l, "*.jpg")):
# print(img)
# evaluate_image(img_dir=img)
for img in glob.glob("./*.jpg"):
evaluate_image(img)
sess.close()
实现批量预测,同时预测多张图像,节约时间, predict_batch.py:
#coding:utf-8
import os, cv2, time
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # use cpu
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import glob
import model
N_CLASSES = 5
IMG_W = 224
IMG_H = IMG_W
batch_size = 32
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # use gpu 0
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
label_dict_res = {v:k for k,v in label_dict.items()}
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
def get_imgpath(path):
img_list = []
for fpath , dirs , fs in os.walk(path):
for f in fs:
img_path = os.path.join(fpath , f)
if os.path.dirname(img_path) == os.getcwd():
continue
if not os.path.isfile(img_path):
continue
if os.path.basename(img_path)[-3:] == "jpg":
img_list.append(img_path)
return img_list
def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
global sess, pred, x
# process image
x = tf.placeholder(tf.float32, shape=[None, IMG_W, IMG_W, 3])
# predict
logit = model.model4(x, N_CLASSES, is_trian=False)
#logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
pred = tf.nn.softmax(logit)
saver = tf.train.Saver()
sess = tf.Session(config=config)
saver.restore(sess, logs_train_dir)
print('load model done...')
def evaluate_image(img_dir):
# read and process image
batch_img = []
for img in img_dir:
im = cv2.imread(img)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (IMG_W, IMG_W))
im_mean = np.mean(im)
stddev = max(np.std(im), 1.0/np.sqrt(IMG_W*IMG_H*3))
im = (im - im_mean) / stddev
image_array = np.array(im)
batch_img.append(image_array)
# output sotfmax
prediction = sess.run(pred, feed_dict={x: batch_img})
for i in range(len(img_dir)):
img = img_dir[i]
max_index = np.argmax(prediction[i])
print("img:%s, predict: %s, prob: %f" % (img, label_dict_res[max_index], prediction[i][max_index]))
if __name__ == '__main__':
init_tf()
data_path = 'flowers/flower_photos'
img_list = get_imgpath(data_path)
total_batch = len(img_list)/batch_size
start = time.time()
for i in range(total_batch):
print(str(i) + "-"*50)
batch_img = img_list[i*batch_size: (i+1)*batch_size]
evaluate_image(batch_img)
print("time cost:", time.time()-start)
sess.close()