Tensorflow实现两个隐藏层的Softmax分类器
Softmax分类器介绍
之前用Tensorflow实现Logistic回归模型的博客里面讲解了Logistic,最后顺便提了一下Softmax模型,并给出了一个实例。Softmax是用于分类过程,用来实现多分类的,简单来说,它把一些输出的神经元映射到(0-1)之间的实数,并且归一化保证和为1,从而使得多分类的概率之和也刚好为1。不同于Logistic回归模型,Logistic回归是用于二分类,输出结果非0即1,而Softmax是用于多分类问题,输出结果属于哪个类是以概率的形式给出来的。比如mnist数据集上面的一个例子,输入数据是8,用softmax回归模型判断可能会给出属于不同数字的概率:
这是一种较为通俗的解释,当然我们也可以直接从这个名字入手去解释,Softmax可以分为soft和max,max也就是最大值,假设有两个变量a,b。如果a>b,则max为a,反之为b。那么在分类问题里面,如果只有max,输出的分类结果只有a或者b,是个非黑即白的结果。但是在现实情况下,我们希望输出的是取到某个分类的概率,或者说,我们希望分值大的那一项被经常取到,而分值较小的那一项也有一定的概率偶尔被取到,所以我们就应用到了soft的概念,即最后的输出是每个分类被取到的概率。
Softmax函数
求特征:
i
i
i代表第i类,
j
j
j代表一张图片的第
j
j
j个像素。
W
W
W是权重,
b
b
b是偏置。在网络的最后一层是一个线性输出层:
z
j
=
∑
j
w
j
,
i
x
i
+
b
j
z_{j}=\sum_{j}w_{j,i}x_{i}+b_{j}
zj=j∑wj,ixi+bj
求softmax:接下来对所有特征计算softmax,首先是计算一个指数
e
x
p
exp
exp函数,然后在对其进行标准化,保证所有类别输出的概率和为1.其中判定为第
i
i
i类的概率可以由下面公式得到:
s
o
f
t
m
a
x
(
x
)
j
=
e
x
p
(
z
j
)
∑
j
e
x
p
(
x
j
)
softmax(x)_{j}=\frac{exp(z_{j})}{\sum_{j}exp(x_{j})}
softmax(x)j=∑jexp(xj)exp(zj)
Tensorflow实现两个隐藏层的Softmax分类器
下面用Tensorflow实现下图所示的两个隐藏层的softmax网络,其中输入
x
x
x先经过隐藏层1(hidden1)和隐藏层2(hidden2),然后在经过线性输出层得到预测输出logits,最后把logits变成概率分布,
如果用的是mnist数据集,最后一层就有十个神经单元,最终输出十个概率分布:
最终得到的计算图如下:
最后给出tensorflow实现代码。
import os.path
import tensorflow as tf
import os
import sys
from six.moves import xrange
import time
import argparse
from tensorflow.examples.tutorials.mnist import input_data
import TwoLayer_softmax
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
#定义模型的超参数
FLAGS=None
def fill_feed_dict(data_set,image_pl,label_pl):
image_feed,label_feed=data_set.next_batch(FLAGS.batch_size,FLAGS.fake_data)
feed_dict={image_pl:image_feed,label_pl:label_feed}
return feed_dict
def do_eval(sess,eval_correct,image_placeholder,label_placeholder,data_set):
#运行一个回合的评估过程
true_count=0
steps_per_epoch=data_set.num_examples//FLAGS.batch_size #每个回合的执行步数
num_examples=steps_per_epoch*FLAGS.batch_size #样本总量
#累加每个批次样本中预测正确的样本数量
for step in xrange(steps_per_epoch):
feed_dict=fill_feed_dict(data_set,image_placeholder,label_placeholder)
true_count+=sess.run(eval_correct,feed_dict=feed_dict)
#所有批次上面的精确度
precision=float(true_count)/num_examples
print('Num examples: %d Num correct: %d Precision @ 1:%0.04f' % (num_examples,true_count,precision))
def run_training():
data_sets=input_data.read_data_sets(FLAGS.input_data_dir,FLAGS.fake_data)
with tf.Graph().as_default():
image_palceholder, labels_placeholder =TwoLayer_softmax.placeholder_inputs(FLAGS.batch_size)
logits=TwoLayer_softmax.inference(image_palceholder,FLAGS.hidden1,FLAGS.hidden2)
loss=TwoLayer_softmax.loss(logits,labels_placeholder)
trainOp=TwoLayer_softmax.training(loss,FLAGS.learning_rate)
eval_correct=TwoLayer_softmax.evaluate(logits,labels_placeholder)
merge_summary = tf.summary.merge_all()
init=tf.global_variables_initializer()
saver=tf.train.Saver()
with tf.Session() as sess:
summary_writer=tf.summary.FileWriter(FLAGS.log_dir,graph=tf.get_default_graph())
sess.run(init)
for step in xrange(FLAGS.max_step):
start_time=time.time()
feed_dict=fill_feed_dict(data_sets.train,image_palceholder,labels_placeholder)
_,loss_value=sess.run([trainOp,loss],feed_dict=feed_dict)
duration=time.time()-start_time
if step%100==0:
print('Step %d: loss = %.2f(%.3f sec)'%(step,loss_value,duration))
summaries_str=sess.run(merge_summary,feed_dict=feed_dict)
summary_writer.add_summary(summaries_str,global_step=step)
summary_writer.flush()
if (step+1)%1000==0 or (step+1)==FLAGS.max_step:
checkpoint_file=os.path.join(FLAGS.log_dir,'model.ckpt')
saver.save(sess,checkpoint_file,global_step=step)
print('Training Data Eval:')
do_eval(sess,
eval_correct,
image_palceholder,
labels_placeholder,
data_sets.train
)
print('Validation Data Eval:')
do_eval(sess,
eval_correct,
image_palceholder,
labels_placeholder,
data_sets.validation
)
print('Test Data Eval:')
do_eval(sess,
eval_correct,
image_palceholder,
labels_placeholder,
data_sets.test
)
def main(_):
if tf.gfile.Exists(FLAGS.log_dir):
tf.gfile.DeleteRecursively(FLAGS.log_dir)
tf.gfile.MakeDirs(FLAGS.log_dir)
run_training()
if __name__=='__main__':
parser=argparse.ArgumentParser()
parser.add_argument(
'--learning_rate',
type=float,
default=0.5,
help='initial learning rate'
)
parser.add_argument(
'--max_step',
type=int,
default=2000,
help='Number of steps to run trainer.'
)
parser.add_argument(
'--hidden1',
type=int,
default=128,
help='Number of units in hidden layer 1'
)
parser.add_argument(
'--hidden2',
type=int,
default=32,
help='Number of units in hidden layer 2'
)
parser.add_argument(
'--batch_size',
type=int,
default=100,
help='Batch size.Must divide evenly into the dataset size.'
)
parser.add_argument(
'--input_data_dir',
type=str,
default='mnist_data/',
help='Directory to put the input data'
)
parser.add_argument(
'--log_dir',
type=str,
default='LOG_Twohidden_fullconnect',
help='Directory to input the log data'
)
parser.add_argument(
'--fake_data',
default=False,
help='If true,uses fake data for unit testing',
action='store_true'
)
FLAGS,unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
#文件名字为TwoLayer_softmax.py
import tensorflow as tf
import os
import math
from tensorflow.examples.tutorials.mnist import input_data
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
#mnist=input_data.read_data_sets('mnist_data/',one_hot=True)
IMAGE_SIZE=28
IMAGE_PIXELS=IMAGE_SIZE*IMAGE_SIZE
#batch_size=50
#hidden1_units=20
#hidden2_units=15
#learning_rate=0.01
NUM_Class=10
def placeholder_inputs(batch_size):
image_palceholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
return image_palceholder,labels_placeholder
def inference(image,hidden1_units,hidden2_units):
with tf.name_scope('hidden1'):
w=tf.Variable(tf.random.truncated_normal([IMAGE_PIXELS,hidden1_units], #mean-2*stddev~mean+2*stddev
stddev=1.0/math.sqrt(float(IMAGE_PIXELS))),name='Weight')
# 以weights矩阵的行数规范化标准差,就是让weights矩阵的每一列都服从0均值截断正态分布,这样不会给输入信号加入人为的偏置
b=tf.Variable(tf.zeros([hidden1_units]),name='bias')
hidden1=tf.nn.relu(tf.add(tf.matmul(image,w),b))
with tf.name_scope('hidden2'):
w=tf.Variable(tf.random.truncated_normal([hidden1_units,hidden2_units],
stddev=1.0/math.sqrt(float(hidden1_units))),name='weight')
b=tf.Variable(tf.zeros([hidden2_units]),name='bias')
hidden2=tf.nn.relu(tf.add(tf.matmul(hidden1,w),b))
with tf.name_scope('softmax_linear'):
w=tf.Variable(tf.random.truncated_normal([hidden2_units,NUM_Class],
stddev=1.0/math.sqrt(float(hidden2_units))),name='weight')
b=tf.Variable(tf.zeros([NUM_Class]),name='bias')
logits=tf.add(tf.matmul(hidden2,w),b)
return logits
def loss(logits,labels):
labels=tf.to_int64(labels)
cross_entrypy=tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits,name='xentrypy')
#把logist转化为softmax上面的概率分布
return tf.reduce_mean(cross_entrypy,name='xentrypy_mean')
def training(loss,learning_rate):
with tf.name_scope('scalar_summaries'):
tf.summary.scalar('learning_rate',learning_rate)
tf.summary.scalar('loss',loss)
Optimizer=tf.train.GradientDescentOptimizer(learning_rate)
global_step=tf.Variable(0,name='global_step',trainable=False)
trainOp=Optimizer.minimize(loss,global_step)
return trainOp
def evaluate(logits,labels):
correct=tf.nn.in_top_k(predictions=logits,targets=labels,k=1)
return tf.reduce_sum(input_tensor=tf.cast(correct,tf.int32))
# image_palceholder,labels_placeholder=placeholder_inputs(batch_size)
# logits=inference(image_palceholder,hidden1_units,hidden2_units)
# batch_loss=loss(logits,labels=labels_placeholder)
# train_batch=training(batch_loss,learning_rate=learning_rate)
# correct_counts=evaluate(logits=logits,labels=labels_placeholder)
# Init=tf.global_variables_initializer()
#
# print('计算图已经写入!在Tensorboard中查看!')
# writer=tf.summary.FileWriter(logdir='LOG_TwoLayer_Softmax',graph=tf.get_default_graph())
# writer.flush()