TensorFlow实现inception-v3迁移学习
前言
本来是打算跑通《TensorFlow实战Google深度学习框架》第二版的程序,但是没有成功,这个程序的思想是把inception-v3除了全连接层的其它层固定住,改变全连接层,前向传播和正常训练inception-v3一样,但反向传播更新参数只更新全连接层,最后自己训练后保存的模型也是包含inception-v3(除去它原本的全连接层)。对五种花数据的处理方式是直接将它们放到一个np的数组中保存下来,这导致最后保存的np数组文件很庞大,3.66g,可能是显卡太渣,再训练时总是出现内存不足的问题,即使是batch调到很小,可能是因为np数组文件太大了吧。没办法,正好之前跑通过网上一个vgg16迁移学习的程序,那个程序的思想是先将五种花的原始数据通过vgg16,取出通过卷积网络后得到的特征值(一个4096的特征向量),将这些向量保存作为训练数据,然后自己构建一个全连接层的网络,用这些保存的向量训练这个自己构建的全连接层网络,最后得到的模型就只包含这个全连接层,很小,训练起来很快,也算是迁移学习的一种思想吧。于是就将inception-v3也改成了后一种,算是再熟悉一下吧。
准备
- flower_photos花朵数据集,解压后会有5个文件夹,每个文件夹内是一种花的全部图片。下载地址:flower_photos
- inception-v3训练好的模型。下载地址:百度网盘 提取码:u3ni
代码
首先是数据的处理 data_process.py
import os
import numpy as np
import tensorflow as tf
import utils
import tensorflow.contrib.slim as slim
# 加载通过TensorFlow-Slim定义好的inception_v3模型。
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3 # 这就是models—master文件中的.py文件
import csv
# 在运行时需要先自行从Google下载inception_v3.ckpt文件。
CKPT_FILE = 'datasets/inception_v3.ckpt'
data_dir = 'datasets/flower_photos/'
contents = os.listdir(data_dir)
classes = [each for each in contents if os.path.isdir(data_dir + each)]
# 首先设置计算batch的值,如果运算平台的内存越大,这个值可以设置得越高
batch_size = 10
# 用codes_list来存储特征值
codes_list = []
# 用labels来存储花的类别
labels = []
# batch数组用来临时存储图片数据
batch = []
codes = None
with tf.Session() as sess:
# 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。
input_ = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images')
with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
logits, feature = inception_v3.inception_v3(
input_, num_classes = 1001,is_training=False)
# 初始化没有加载进来的变量。
init = tf.global_variables_initializer()
sess.run(init)
# 定义加载Google训练好的Inception-v3模型的Saver。
load_fn = slim.assign_from_checkpoint_fn(
CKPT_FILE,
slim.get_model_variables(),
ignore_missing_vars=True)
load_fn(sess)
# 对每个不同种类的花分别用Inception-v3计算特征值
for each in classes:
print("Starting {} images".format(each))
class_path = data_dir + each
files = os.listdir(class_path)
for ii, file in enumerate(files, 1):
# 载入图片并放入batch数组中
img = utils.load_image(os.path.join(class_path, file))
batch.append(img.reshape((1, 299, 299, 3)))
labels.append(each)
# 如果图片数量到了batch_size则开始具体的运算
if ii % batch_size == 0 or ii == len(files):
images = np.concatenate(batch)# images.shape = (10, 299, 299, 3)
feed_dict = {input_: images}
# 计算特征值
features = sess.run(feature, feed_dict=feed_dict)
codes_batch = features['PreLogits'].reshape(features['PreLogits'].shape[0],2048)
print(codes_batch)
# 将结果放入到codes数组中
if codes is None:
codes = codes_batch
else:
codes = np.concatenate((codes, codes_batch))# codes.shape = (xxx, 2048)
print(codes.shape)
# 清空数组准备下一个batch的计算
batch = []
print('{} images processed'.format(ii))
np.save('codes.npy',codes)
with open('labels.csv', 'w') as f:
writer = csv.writer(f, delimiter='\n')
writer.writerow(labels)
这样就把全部五种花的图片通过inception-v3得到最后2048维的特征向量。这里注意要把is_training设为False,不然跑出来的特征根本不能用。
执行以上数据处理还需要这个功能模块 utils.py
import skimage
import skimage.io
import skimage.transform
import numpy as np
# synset = [l.strip() for l in open('synset.txt').readlines()]
# returns image of shape [299, 299, 3]
# [height, width, depth]
def load_image(path):
# load image
img = skimage.io.imread(path)
img = img / 255.0
assert (0 <= img).all() and (img <= 1.0).all()
# print "Original Image Shape: ", img.shape
# we crop image from center
short_edge = min(img.shape[:2])
yy = int((img.shape[0] - short_edge) / 2)
xx = int((img.shape[1] - short_edge) / 2)
crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
# resize to 224, 224
resized_img = skimage.transform.resize(crop_img, (299, 299))
return resized_img
# returns the top1 string
def print_prob(prob, file_path):
synset = [l.strip() for l in open(file_path).readlines()]
# print prob
pred = np.argsort(prob)[::-1]
# Get top1 label
top1 = synset[pred[0]]
print(("Top1: ", top1, prob[pred[0]]))
# Get top5 label
top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)]
print(("Top5: ", top5))
return top1
def load_image2(path, height=None, width=None):
# load image
img = skimage.io.imread(path)
img = img / 255.0
if height is not None and width is not None:
ny = height
nx = width
elif height is not None:
ny = height
nx = img.shape[1] * ny / img.shape[0]
elif width is not None:
nx = width
ny = img.shape[0] * nx / img.shape[1]
else:
ny = img.shape[0]
nx = img.shape[1]
return skimage.transform.resize(img, (ny, nx))
接下来就是训练了 train.py
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import StratifiedShuffleSplit
import csv
import tensorflow as tf
def get_batches(x, y, n_batches=10):
""" 这是一个生成器函数,按照n_batches的大小将数据划分了小块 """
batch_size = len(x)//n_batches
for ii in range(0, n_batches*batch_size, batch_size):
# 如果不是最后一个batch,那么这个batch中应该有batch_size个数据
if ii != (n_batches-1)*batch_size:
X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size]
# 否则的话,那剩余的不够batch_size的数据都凑入到一个batch中
else:
X, Y = x[ii:], y[ii:]
# 生成器语法,返回X和Y
yield X, Y
codes = np.load('codes.npy')
# print(codes)
with open('labels.csv','r') as f:
reader = csv.reader(f)
labels = [i for i in reader if i != []]
# print(labels)
lb = MultiLabelBinarizer()
lb.fit(labels)
labels_vecs = lb.transform(labels)
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
train_idx, val_idx = next(ss.split(codes, labels))
half_val_len = int(len(val_idx)/2)
val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]
train_x, train_y = codes[train_idx], labels_vecs[train_idx]
val_x, val_y = codes[val_idx], labels_vecs[val_idx]
test_x, test_y = codes[test_idx], labels_vecs[test_idx]
print("Train shapes (x, y):", train_x.shape, train_y.shape)
print("Validation shapes (x, y):", val_x.shape, val_y.shape)
print("Test shapes (x, y):", test_x.shape, test_y.shape)
# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]],name="input")
tf.add_to_collection('input', inputs_)
# 标签数据的维度
labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])
# 加入一个256维的全连接的层
fc = tf.contrib.layers.fully_connected(inputs_, 256)
# 加入一个5维的全连接层
logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)
# 计算cross entropy值
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)
# 计算损失函数
cost = tf.reduce_mean(cross_entropy)
# 采用用得最广泛的AdamOptimizer优化器
optimizer = tf.train.AdamOptimizer().minimize(cost)
# 得到最后的预测分布
predicted = tf.nn.softmax(logits,name="predicted")
tf.add_to_collection('predicted', predicted)
# 计算准确度
correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 运行多少轮次
epochs = 10
# 统计训练效果的频率
iteration = 0
# 保存模型的保存器
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for e in range(epochs):
for x, y in get_batches(train_x, train_y):
feed = {inputs_: x,
labels_: y}
# 训练模型
loss, _ = sess.run([cost, optimizer], feed_dict=feed)
# print("Epoch: {}/{}".format(e+1, epochs),
# "Iteration: {}".format(iteration),
# "Training loss: {:.5f}".format(loss))
iteration += 1
if iteration % 10 == 0:
feed = {inputs_: val_x,
labels_: val_y}
val_acc = sess.run(accuracy, feed_dict=feed)
# 输出用验证机验证训练进度
print("Epoch: {}/{}".format(e, epochs),
"Iteration: {}".format(iteration),
"Validation Acc: {:.4f}".format(val_acc))
# 保存模型
saver.save(sess, "train_dir/flowers.ckpt")
# 测试模型
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('train_dir'))
feed = {inputs_: test_x,
labels_: test_y}
test_acc = sess.run(accuracy, feed_dict=feed)
print("Test accuracy: {:.4f}".format(test_acc))
训练结果:
Epoch: 0/10 Iteration: 10 Validation Acc: 0.8283
Epoch: 1/10 Iteration: 20 Validation Acc: 0.8937
Epoch: 2/10 Iteration: 30 Validation Acc: 0.8937
Epoch: 3/10 Iteration: 40 Validation Acc: 0.8992
Epoch: 4/10 Iteration: 50 Validation Acc: 0.9074
Epoch: 5/10 Iteration: 60 Validation Acc: 0.9128
Epoch: 6/10 Iteration: 70 Validation Acc: 0.9155
Epoch: 7/10 Iteration: 80 Validation Acc: 0.9183
Epoch: 8/10 Iteration: 90 Validation Acc: 0.9183
Epoch: 9/10 Iteration: 100 Validation Acc: 0.9155
Test accuracy: 0.9128
每次的训练结果不一样,但也大差不差。
最后,写了一个测试单张图片的程序 test.py
import numpy as np
import tensorflow as tf
import utils
import tensorflow.contrib.slim as slim
# 加载通过TensorFlow-Slim定义好的inception_v3模型。
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3 # 这就是models—master文件中的.py文件
# 在运行时需要先自行从Google下载inception_v3.ckpt文件。
CKPT_FILE = 'datasets/inception_v3.ckpt'
photo = '1.jpg'
def process(photo):
with tf.Session() as sess:
# 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。
input_ = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images')
with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
logits, feature = inception_v3.inception_v3(
input_, num_classes = 1001,is_training=False)
# 初始化没有加载进来的变量。
init = tf.global_variables_initializer()
sess.run(init)
# 定义加载Google训练好的Inception-v3模型的Saver。
load_fn = slim.assign_from_checkpoint_fn(
CKPT_FILE,
slim.get_model_variables(),
ignore_missing_vars=True)
load_fn(sess)
img = utils.load_image(photo)
feed_dict = {input_: img.reshape((1, 299, 299, 3))}
# 计算特征值
features = sess.run(feature, feed_dict=feed_dict)
codes_batch = features['PreLogits'].reshape(features['PreLogits'].shape[0],2048)
print(codes_batch.shape)
return codes_batch
with tf.Session() as sess:
saver = tf.train.import_meta_graph("train_dir/flowers.ckpt.meta")
saver.restore(sess,'train_dir/flowers.ckpt')
graph = tf.get_default_graph()
# 输入数据的维度
# inputs_ = graph.get_operation_by_name('input').outputs[0]
inputs_ = tf.get_collection('input')[0]
codes_batch = process(photo)
feed = {inputs_: codes_batch}
predicted = graph.get_operation_by_name('predicted').outputs[0]
# predicted = tf.get_collection('predicted')[0]
test = sess.run(predicted, feed_dict=feed)
print(test)
从网上随便找一张向日葵的图片测试,得到以下结果:
[[2.2303613e-03 1.4069825e-04 1.3377503e-04 9.9734050e-01 1.5470626e-04]]
第四个代表向日葵,可以识别。
结束
本人小白一个,有什么不对的地方希望不要误导了各位看官。