TensorFlow实现inception-v3迁移学习

本文介绍使用Inception-v3预训练模型进行迁移学习的过程,包括数据预处理、特征提取及新模型训练,最终在五种花卉数据集上达到高精度。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

TensorFlow实现inception-v3迁移学习

前言

本来是打算跑通《TensorFlow实战Google深度学习框架》第二版的程序,但是没有成功,这个程序的思想是把inception-v3除了全连接层的其它层固定住,改变全连接层,前向传播和正常训练inception-v3一样,但反向传播更新参数只更新全连接层,最后自己训练后保存的模型也是包含inception-v3(除去它原本的全连接层)。对五种花数据的处理方式是直接将它们放到一个np的数组中保存下来,这导致最后保存的np数组文件很庞大,3.66g,可能是显卡太渣,再训练时总是出现内存不足的问题,即使是batch调到很小,可能是因为np数组文件太大了吧。没办法,正好之前跑通过网上一个vgg16迁移学习的程序,那个程序的思想是先将五种花的原始数据通过vgg16,取出通过卷积网络后得到的特征值(一个4096的特征向量),将这些向量保存作为训练数据,然后自己构建一个全连接层的网络,用这些保存的向量训练这个自己构建的全连接层网络,最后得到的模型就只包含这个全连接层,很小,训练起来很快,也算是迁移学习的一种思想吧。于是就将inception-v3也改成了后一种,算是再熟悉一下吧。

准备

  1. flower_photos花朵数据集,解压后会有5个文件夹,每个文件夹内是一种花的全部图片。下载地址:flower_photos
  2. inception-v3训练好的模型。下载地址:百度网盘 提取码:u3ni

代码

首先是数据的处理 data_process.py

import os
import numpy as np
import tensorflow as tf

import utils
import tensorflow.contrib.slim as slim

# 加载通过TensorFlow-Slim定义好的inception_v3模型。
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3 # 这就是models—master文件中的.py文件

import csv

# 在运行时需要先自行从Google下载inception_v3.ckpt文件。
CKPT_FILE = 'datasets/inception_v3.ckpt'

data_dir = 'datasets/flower_photos/'
contents = os.listdir(data_dir)
classes = [each for each in contents if os.path.isdir(data_dir + each)]

# 首先设置计算batch的值,如果运算平台的内存越大,这个值可以设置得越高
batch_size = 10
# 用codes_list来存储特征值
codes_list = []
# 用labels来存储花的类别
labels = []
# batch数组用来临时存储图片数据
batch = []

codes = None

with tf.Session() as sess:
    # 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。
    input_ = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images')

    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        logits, feature = inception_v3.inception_v3(
            input_, num_classes = 1001,is_training=False)

    # 初始化没有加载进来的变量。
    init = tf.global_variables_initializer()
    sess.run(init)

    # 定义加载Google训练好的Inception-v3模型的Saver。
    load_fn = slim.assign_from_checkpoint_fn(
      CKPT_FILE,
      slim.get_model_variables(),
      ignore_missing_vars=True)

    load_fn(sess)
    
    # 对每个不同种类的花分别用Inception-v3计算特征值
    for each in classes:
        print("Starting {} images".format(each))
        class_path = data_dir + each
        files = os.listdir(class_path)
        for ii, file in enumerate(files, 1):
            # 载入图片并放入batch数组中
            img = utils.load_image(os.path.join(class_path, file))
            batch.append(img.reshape((1, 299, 299, 3)))
            labels.append(each)
            
            
            # 如果图片数量到了batch_size则开始具体的运算
            if ii % batch_size == 0 or ii == len(files):
                images = np.concatenate(batch)# images.shape = (10, 299, 299, 3)
                feed_dict = {input_: images}
                # 计算特征值
                features = sess.run(feature, feed_dict=feed_dict)
                codes_batch = features['PreLogits'].reshape(features['PreLogits'].shape[0],2048)
                print(codes_batch)
                # 将结果放入到codes数组中
                if codes is None:
                    codes = codes_batch
                else:
                    codes = np.concatenate((codes, codes_batch))# codes.shape = (xxx, 2048)
                    print(codes.shape)

                # 清空数组准备下一个batch的计算
                batch = []
                print('{} images processed'.format(ii))

    np.save('codes.npy',codes)
    with open('labels.csv', 'w') as f:
        writer = csv.writer(f, delimiter='\n')
        writer.writerow(labels)

这样就把全部五种花的图片通过inception-v3得到最后2048维的特征向量。这里注意要把is_training设为False,不然跑出来的特征根本不能用。

执行以上数据处理还需要这个功能模块 utils.py

import skimage
import skimage.io
import skimage.transform
import numpy as np


# synset = [l.strip() for l in open('synset.txt').readlines()]


# returns image of shape [299, 299, 3]
# [height, width, depth]
def load_image(path):
    # load image
    img = skimage.io.imread(path)
    img = img / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()
    # print "Original Image Shape: ", img.shape
    # we crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    # resize to 224, 224
    resized_img = skimage.transform.resize(crop_img, (299, 299))
    return resized_img


# returns the top1 string
def print_prob(prob, file_path):
    synset = [l.strip() for l in open(file_path).readlines()]

    # print prob
    pred = np.argsort(prob)[::-1]

    # Get top1 label
    top1 = synset[pred[0]]
    print(("Top1: ", top1, prob[pred[0]]))
    # Get top5 label
    top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)]
    print(("Top5: ", top5))
    return top1


def load_image2(path, height=None, width=None):
    # load image
    img = skimage.io.imread(path)
    img = img / 255.0
    if height is not None and width is not None:
        ny = height
        nx = width
    elif height is not None:
        ny = height
        nx = img.shape[1] * ny / img.shape[0]
    elif width is not None:
        nx = width
        ny = img.shape[0] * nx / img.shape[1]
    else:
        ny = img.shape[0]
        nx = img.shape[1]
    return skimage.transform.resize(img, (ny, nx))

接下来就是训练了 train.py

import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import StratifiedShuffleSplit
import csv
import tensorflow as tf

def get_batches(x, y, n_batches=10):
    """ 这是一个生成器函数,按照n_batches的大小将数据划分了小块 """
    batch_size = len(x)//n_batches
    
    for ii in range(0, n_batches*batch_size, batch_size):
        # 如果不是最后一个batch,那么这个batch中应该有batch_size个数据
        if ii != (n_batches-1)*batch_size:
            X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size] 
        # 否则的话,那剩余的不够batch_size的数据都凑入到一个batch中
        else:
            X, Y = x[ii:], y[ii:]
        # 生成器语法,返回X和Y
        yield X, Y

codes = np.load('codes.npy')
# print(codes)
with open('labels.csv','r') as f:
    reader = csv.reader(f)
    labels = [i for i in reader if i != []]
# print(labels)
lb = MultiLabelBinarizer()
lb.fit(labels)

labels_vecs = lb.transform(labels)

ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)

train_idx, val_idx = next(ss.split(codes, labels))

half_val_len = int(len(val_idx)/2)
val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]

train_x, train_y = codes[train_idx], labels_vecs[train_idx]
val_x, val_y = codes[val_idx], labels_vecs[val_idx]
test_x, test_y = codes[test_idx], labels_vecs[test_idx]

print("Train shapes (x, y):", train_x.shape, train_y.shape)
print("Validation shapes (x, y):", val_x.shape, val_y.shape)
print("Test shapes (x, y):", test_x.shape, test_y.shape)

# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]],name="input")
tf.add_to_collection('input', inputs_)
# 标签数据的维度
labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])

# 加入一个256维的全连接的层
fc = tf.contrib.layers.fully_connected(inputs_, 256)

# 加入一个5维的全连接层
logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)

# 计算cross entropy值
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)

# 计算损失函数
cost = tf.reduce_mean(cross_entropy)

# 采用用得最广泛的AdamOptimizer优化器
optimizer = tf.train.AdamOptimizer().minimize(cost)

# 得到最后的预测分布
predicted = tf.nn.softmax(logits,name="predicted")
tf.add_to_collection('predicted', predicted)
# 计算准确度
correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# 运行多少轮次
epochs = 10
# 统计训练效果的频率
iteration = 0
# 保存模型的保存器
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        for x, y in get_batches(train_x, train_y):
            feed = {inputs_: x,
                    labels_: y}
            # 训练模型
            loss, _ = sess.run([cost, optimizer], feed_dict=feed)
            # print("Epoch: {}/{}".format(e+1, epochs),
            #       "Iteration: {}".format(iteration),
            #       "Training loss: {:.5f}".format(loss))
            iteration += 1
            
            if iteration % 10 == 0:
                feed = {inputs_: val_x,
                        labels_: val_y}
                val_acc = sess.run(accuracy, feed_dict=feed)
                # 输出用验证机验证训练进度
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Acc: {:.4f}".format(val_acc))
    # 保存模型
    saver.save(sess, "train_dir/flowers.ckpt")

    # 测试模型
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint('train_dir'))
        
        feed = {inputs_: test_x,
                labels_: test_y}
        test_acc = sess.run(accuracy, feed_dict=feed)
        print("Test accuracy: {:.4f}".format(test_acc))

训练结果:

Epoch: 0/10 Iteration: 10 Validation Acc: 0.8283
Epoch: 1/10 Iteration: 20 Validation Acc: 0.8937
Epoch: 2/10 Iteration: 30 Validation Acc: 0.8937
Epoch: 3/10 Iteration: 40 Validation Acc: 0.8992
Epoch: 4/10 Iteration: 50 Validation Acc: 0.9074
Epoch: 5/10 Iteration: 60 Validation Acc: 0.9128
Epoch: 6/10 Iteration: 70 Validation Acc: 0.9155
Epoch: 7/10 Iteration: 80 Validation Acc: 0.9183
Epoch: 8/10 Iteration: 90 Validation Acc: 0.9183
Epoch: 9/10 Iteration: 100 Validation Acc: 0.9155
Test accuracy: 0.9128

每次的训练结果不一样,但也大差不差。

最后,写了一个测试单张图片的程序 test.py

import numpy as np
import tensorflow as tf
import utils
import tensorflow.contrib.slim as slim
# 加载通过TensorFlow-Slim定义好的inception_v3模型。
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3 # 这就是models—master文件中的.py文件

# 在运行时需要先自行从Google下载inception_v3.ckpt文件。
CKPT_FILE = 'datasets/inception_v3.ckpt'

photo = '1.jpg'

def process(photo):
    with tf.Session() as sess:
        # 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。
        input_ = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_images')

        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            logits, feature = inception_v3.inception_v3(
                input_, num_classes = 1001,is_training=False)

        # 初始化没有加载进来的变量。
        init = tf.global_variables_initializer()
        sess.run(init)

        # 定义加载Google训练好的Inception-v3模型的Saver。
        load_fn = slim.assign_from_checkpoint_fn(
        CKPT_FILE,
        slim.get_model_variables(),
        ignore_missing_vars=True)

        load_fn(sess)
    
        img = utils.load_image(photo)
        feed_dict = {input_: img.reshape((1, 299, 299, 3))}
        # 计算特征值
        features = sess.run(feature, feed_dict=feed_dict)
        codes_batch = features['PreLogits'].reshape(features['PreLogits'].shape[0],2048)
        print(codes_batch.shape)

        return codes_batch

with tf.Session() as sess:
    saver = tf.train.import_meta_graph("train_dir/flowers.ckpt.meta")
    saver.restore(sess,'train_dir/flowers.ckpt')
    
    graph = tf.get_default_graph()
    
    # 输入数据的维度
    # inputs_ = graph.get_operation_by_name('input').outputs[0]
    inputs_ = tf.get_collection('input')[0]

    codes_batch = process(photo)

    feed = {inputs_: codes_batch}
    predicted = graph.get_operation_by_name('predicted').outputs[0]
    # predicted = tf.get_collection('predicted')[0]
    test = sess.run(predicted, feed_dict=feed)
    print(test)

1.jpg从网上随便找一张向日葵的图片测试,得到以下结果:

[[2.2303613e-03 1.4069825e-04 1.3377503e-04 9.9734050e-01 1.5470626e-04]]

第四个代表向日葵,可以识别。

结束

本人小白一个,有什么不对的地方希望不要误导了各位看官。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值