批量归一化:
由于在深层网络中,不同层的分布都不一样,会导致训练时出现饱和的问题。而批量归一化就是为了缓解这个问题提出的。而且在实际应用中,批量归一化的收敛非常快,并且具有很强的泛化能力,某种情况下完全可以替代正则化和弃权。
下面介绍如何在手写数字识别中使用批量归一化函数
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data from tensorflow.contrib.layers.python.layers import batch_norm x_input=tf.placeholder(dtype=tf.float32,shape=[None,784]) y_input=tf.placeholder(dtype=tf.float32,shape=[None,10]) train=tf.placeholder(dtype=tf.bool) x_image=tf.reshape(x_input,[-1,28,28,1]) #变成一张28*28像素的一个通道的图片 conv1=tf.Variable(tf.truncated_normal([3,3,1,64],mean=0,stddev=0.1)) #定义卷积核 convb1=tf.Variable(tf.zeros([64])+0.1) #定义偏执 #卷积后的进行批量归一化 ################################# batch_norm_layer1=batch_norm(tf.add(tf.nn.conv2d(x_image,conv1,padding="SAME"),convb1), decay=0.9,updates_collections=None,is_training=train) #train=1表示训练, ################################# #激活函数输出 convlayer1=tf.nn.relu(batch_norm_layer1) poollayer1=tf.nn.max_pool(convlayer1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME") #池化 conv2=tf.Variable(tf.truncated_normal([3,3,64,32],mean=0,stddev=0.1)) convb2=tf.Variable(tf.zeros([32])+0.1) ###################################### batch_norm_layer2=batch_norm(tf.add(tf.nn.conv2d(poollayer1,conv2,padding="SAME"),convb2), decay=0.9,updates_collections=None,is_training=train) ###################################### convlayer2=tf.nn.relu(batch_norm_layer2) poollayer2=tf.nn.max_pool(convlayer2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME") #print(poollayer2) #得到平铺后的个数shape=(?, 7, 7, 32) x_nn=tf.reshape(poollayer2,[-1,7*7*32]) #转化为可用于全连接神经网络的输入 w1=tf.Variable(tf.truncated_normal([7*7*32,512],mean=0,stddev=0.1)) b1=tf.Variable(tf.zeros([512])+0.1) layer1=tf.nn.tanh(tf.add(tf.matmul(x_nn,w1),b1)) #分类 w2=tf.Variable(tf.truncated_normal([512,256],mean=0,stddev=0.1)) b2=tf.Variable(tf.zeros([256])+0.1) layer2=tf.nn.tanh(tf.add(tf.matmul(layer1,w2),b2)) w3=tf.Variable(tf.truncated_normal([256,10],mean=0,stddev=0.1)) b3=tf.Variable(tf.zeros([10])+0.1) layer3=tf.add(tf.matmul(layer2,w3),b3) logit=layer3 loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_input,logits=logit)) train_step=tf.train.GradientDescentOptimizer(0.1).minimize(loss) precise=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logit,1),tf.argmax(y_input,1)),dtype=tf.float32)) mnist=input_data.read_data_sets("MNIST_data",one_hot=True) batch_size=1000 n_batch_size=mnist.train.num_examples//batch_size with tf.Session() as sess: init=tf.global_variables_initializer() sess.run(init) for i in range(10): for j in range(n_batch_size): x_train_input,y_train_input=mnist.train.next_batch(batch_size) sess.run(train_step,feed_dict={x_input:x_train_input,y_input:y_train_input,train:True}) precise_train=sess.run(precise,{x_input:x_train_input,y_input:y_train_input,train:True}) loss_p=sess.run(loss,{x_input:x_train_input,y_input:y_train_input,train:True}) print(precise_train) print(loss_p)