训练网络:
# -*- coding: utf-8 -*-
import os
import numpy as np
import tensorflow as tf
from parse_recorder_file import get_data
from AlexNet import AlexNet
#from vgg16 import vgg_16
N_CLASSES = 2 # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
IMG_W = 128 # 重新定义图片的大小,图片如果过大则训练比较慢
IMG_H = 128
BATCH_SIZE = 32 #每批数据的大小
MAX_STEP = 6 # 训练的步数,应当 >= 10000
keep_prob=0.5
def run_training():
logs_train_dir = 'E:/Python/tensorflow/train_log/'
data_train_dir = 'E:/Python/tensorflow/model/'
train_batch, train_label_batch = get_data('./train.tfrecords', IMG_W, True)
train_logits=AlexNet(train_batch, keep_prob, N_CLASSES)
# train_logits = vgg_16(train_batch, 0.5)
with tf.name_scope('Loss'):
train_loss1=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=train_logits, labels=train_label_batch)
train_loss = tf.reduce_mean(train_loss1,name='loss')
tf.summary.scalar('loss', train_loss)
with tf.name_scope('train_op'):
train_op=tf.train.AdamOptimizer(learning_rate=0.00005).minimize(train_loss)
correct = tf.nn.in_top_k(train_logits, train_label_batch, 1)
correct = tf.cast(correct, tf.float16)
with tf.name_scope('accuracy'):
train_acc=tf.reduce_mean(correct,name='acc')
tf.summary.scalar('acc', train_acc)
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
# sess.run(tf.global_variables_initializer())
ckpt=tf.train.get_checkpoint_state('E:/Python/tensorflow/model/')
if ckpt and ckpt.model_checkpoint_path:
print(ckpt.model_checkpoint_path)
saver.restore(sess,'E:/Python/tensorflow/model/model.ckpt-5')
else:
sess.run(tf.global_variables_initializer())
try:
for step in np.arange(MAX_STEP):
a,b=sess.run([train_batch, train_label_batch])
_, tra_loss, tra_acc,log = sess.run([train_op, train_loss, train_acc,train_logits])
# if step % 50 == 0:
print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)
if step % 5 == 0 or (step + 1) == MAX_STEP:
# 每隔2000步保存一下模型,模型保存在 checkpoint_path 中
checkpoint_path = os.path.join(data_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
sess.close()
run_training()
Alexnet框架:
import tensorflow as tf
import numpy as np
def AlexNet(X,KEEP_PROB,NUM_CLASSES):
"""Create the network graph."""
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv1 = conv(X,[5,5,3,64],[64], 1, 1,name='conv1')
norm1 = lrn(conv1, 2, 1e-05, 0.75,name='norm1')
pool1 = max_pool(norm1, 2, 2, 2, 2,name='pool1') ##64*64*64
# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups
conv2 = conv(pool1,[ 5, 5, 64, 128],[128] ,1, 1,name='conv2')
norm2 = lrn(conv2, 2, 1e-05, 0.75,name='norm2')
pool2 = max_pool(norm2, 2, 2, 2, 2,name='pool2') ##32*32*128
# 3rd Layer: Conv (w ReLu)
conv3 = conv(pool2, [3, 3, 128, 256],[256], 1, 1,name='conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, [3, 3, 256, 512],[512], 1, 1,name='conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, [3, 3, 512, 512],[512], 1, 1,name='conv5')
pool5 = max_pool(conv5, 2, 2, 2, 2,name='pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 16*16*512])
fc6 = fc(flattened, [16*16*512,1024],[1024],name='fc6')
fc6=tf.nn.relu(fc6)
dropout6 = dropout(fc6, KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, [1024,2048],[2048],name='fc7')
fc7=tf.nn.relu(fc7)
dropout7 = dropout(fc7, KEEP_PROB)
# 8th Layer: FC and return unscaled activations
fc8 = fc(dropout7, [2048,NUM_CLASSES],[NUM_CLASSES],name='fc8')
return fc8
def conv(x, kernel_size, bias_size, stride_y, stride_x,name):
with tf.variable_scope(name) as scope:
weights = tf.get_variable('weights',
shape=kernel_size,
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=bias_size,
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(x, weights, strides=[1, stride_y, stride_x, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases,name=scope.name)
return pre_activation
def fc(x, kernel_size, bias_size,name):
"""Create a fully connected layer."""
with tf.variable_scope(name) as scope:
weights = tf.get_variable('weights',
shape=kernel_size,
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=bias_size,
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.add(tf.matmul(x, weights), biases,name=scope.name)
return softmax_linear
def max_pool(x, filter_height, filter_width, stride_y, stride_x,name,padding='SAME'):
"""Create a max pooling layer."""
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides=[1, stride_y, stride_x, 1],
padding=padding,name=name)
def lrn(x, radius, alpha, beta,name, bias=1.0):
"""Create a local response normalization layer."""
return tf.nn.local_response_normalization(x, depth_radius=radius,
alpha=alpha, beta=beta,
bias=bias,name=name)
def dropout(x, keep_prob):
"""Create a dropout layer."""
return tf.nn.dropout(x, keep_prob)
再配合之前的tfrecoder文件,迭代10000次,准确率在训练时有90%以上,但在测试时,准确率为80%,需要注意的点是,在模型读取restore时,对框架的命名需合理,不然会报错