根据第九步学习的算法,解一CNN网络,在以前做的全连接层前增加一卷积层。
注意:在误差反馈计算过程中,用“FULL”卷积计算误差的传递、用“VALID”相关(tf特有的卷积,计算卷积时核不翻转180)计算卷积层的权重灵敏度,仅适用于前馈采用“VALID”的CNN。理解CNN的概念比记住公式要重要的多!
开始卷积时,FULL核内边界对齐,SAME核中心对齐,VALID核外边界对齐。
另外,深层网络,开始训练时,速度很慢!
# coding=utf-8
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
import numpy as np
import tensorflow as tf
import scipy.signal as signal
logs_path=r'c:/temp/log_mnist_softmax'
learning_rate=5 #当>0.05时误差很大
training_epochs=200
trainData_in=np.array([[1.0,1.0,0.0,0.0],\
[0.0,0.0,0.0,0.0]])
trainData_out=np.array([[0.0,1.0],\
[1.0,0.0]])
testData_in=np.array([[0.0,0.0,0.0,1.0]])
testData_out=np.array([[1.0,0.0]])
trainData_in=np.reshape(trainData_in,[-1,2,2,1])#np not tf
testData_in=np.reshape(testData_in,[-1,2,2,1])#
print(np.shape(trainData_in))
print(np.shape(trainData_out))
###CNN:
def dsigmoid(z):
dsigmoid=tf.nn.sigmoid(z)*(1-tf.nn.sigmoid(z))
return dsigmoid
def weight_variable(shape):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial=tf.constant(0.0,shape=shape)
return tf.Variable(initial)
def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='VALID')
def max_pool_2X2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
def get_deltai(k,i,j,deltaj,kij):
deltaj=deltaj[k,:,:,j]
kij=kij[:,:,i,j]
return signal.convolve2d(deltaj, kij,'full')
def nabla_kij(k,i,j,xi,deltaj):
deltaj=deltaj[k,:,:,j]
deltaj=np.reshape(deltaj,[list(deltaj.shape)[0],list(deltaj.shape)[1],1,1])
xi=xi[k:k+1,:,:,i:i+1]
return tf.nn.conv2d(xi,deltaj,strides=[1,1,1,1],padding='VALID')
#define placeholder for inputs to network:
x_input=tf.placeholder(tf.float32, [None,2,2,1], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,2],name='y_desired')
#net:
#conv1 layer:
w_conv1=weight_variable([1,1,1,2])#注意节点数!
b_conv1=bias_variable([2])#注意节点数
z_conv1=conv2d(x_input,w_conv1)+b_conv1
conv1=tf.nn.sigmoid(z_conv1)#(None,2,2,2)!!!sigmoid 对0和1输出差别很小,开始学习慢!
#full_connect layer:
#flat::
w_fuc1=weight_variable([2*2*2,3])#注意节点数!
b_fuc1=bias_variable([3])
conv1_flat=tf.reshape(conv1,[-1,w_fuc1.get_shape().as_list()[0]])#注意节点数!
z_fuc1=tf.matmul(conv1_flat,w_fuc1)+b_fuc1##
fuc1=tf.nn.sigmoid(z_fuc1)
#full connect output layer:
w_fuco=weight_variable([3,2])
b_fuco=bias_variable([2])
z_fuco=tf.matmul(fuc1,w_fuco)+b_fuco#?fuc1_drop
y_output=tf.nn.softmax(z_fuco,name='y_output')
#cost:
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output)) #交叉熵均值
feed_dict_trainData={x_input:trainData_in,\
y_desired:trainData_out,\
}
feed_dict_testData={x_input:testData_in,\
y_desired:testData_out,\
}
###
#train_step=tf.train.AdamOptimizer(0.05).minimize(lossFun_crossEntropy)
###
tf.summary.scalar('cost',lossFun_crossEntropy)
summary_op=tf.summary.merge_all()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
for epoch in range(training_epochs):
###
#_,summary=sess.run([train_step,summary_op],feed_dict=feed_dict_trainData)
#print('Epoch',epoch)
#print('Cost_trainData:',lossFun_crossEntropy.eval\
# (feed_dict=feed_dict_trainData))
###
#cross-entropy+softmax BP:
wconv1_temp,bconv1_temp,zconv1_temp,conv1_temp,conv1_flat_temp,\
wfuc1_temp,bfuc1_temp,zfuc1_temp,fuc1_temp,\
wfuco_temp,bfuco_temp,zfuco_temp,y_output_temp,cost_temp=\
sess.run([w_conv1,b_conv1,z_conv1,conv1,conv1_flat,\
w_fuc1,b_fuc1,z_fuc1,fuc1,\
w_fuco,b_fuco,z_fuco,y_output,lossFun_crossEntropy],\
feed_dict=feed_dict_trainData)
delta=y_output_temp-trainData_out #BP1
nabla_b=delta.sum(axis=0)#在列方向上求和delta #BP3
nabla_w=np.dot(fuc1_temp.transpose(),delta) #BP4
dSigmod_z1=sess.run(dsigmoid(zfuc1_temp))
delta=np.dot(delta,wfuco_temp.transpose())*dSigmod_z1 #BP2!!!
nabla_b1=delta.sum(axis=0)#在列方向上求和delta #BP3
nabla_w1=np.dot(conv1_flat_temp.transpose(),delta) #BP4
####conv,nabla:
dSigmoid_z_conv1flat=sess.run(dsigmoid(zconv1_temp.reshape([-1,w_fuc1.get_shape().as_list()[0]])))#注意节点数!
delta=np.dot(delta,wfuc1_temp.transpose())*dSigmoid_z_conv1flat
delta=delta.reshape(list(zconv1_temp.shape))
nabla_b_conv1=np.sum(delta,(0,1,2))
nabla_k=np.zeros(list(wconv1_temp.shape))
for k in range(len(delta)):
for i in range(list(nabla_k.shape)[2]):
for j in range(list(nabla_k.shape)[3]):
nablakij=sess.run(nabla_kij(k,i,j,trainData_in,delta))
nabla_k[:,:,i,j]+=nablakij[0,:,:,0]
####
m,n=np.shape(trainData_out)
update_w1=tf.assign(w_fuc1,wfuc1_temp-learning_rate/m/n*nabla_w1)
update_b1=tf.assign(b_fuc1,bfuc1_temp-learning_rate/m/n*nabla_b1)
update_w=tf.assign(w_fuco,wfuco_temp-learning_rate/m/n*nabla_w)
update_b=tf.assign(b_fuco,bfuco_temp-learning_rate/m/n*nabla_b)
update_bconv1=tf.assign(b_conv1,bconv1_temp-learning_rate/m/n*nabla_b_conv1)
update_wconv1=tf.assign(w_conv1,wconv1_temp-learning_rate/m/n*nabla_k)
sess.run(update_wconv1)
sess.run(update_bconv1)
sess.run(update_w1)
sess.run(update_b1)
sess.run(update_w)
sess.run(update_b)
#####
summary=sess.run(summary_op,feed_dict=feed_dict_trainData)
logs_writer.add_summary(summary,epoch)
print('Epoch',epoch)
print('Cost_trainData:',lossFun_crossEntropy.eval\
(feed_dict=feed_dict_trainData))
print('Cost_testData:',lossFun_crossEntropy.eval\
(feed_dict=feed_dict_testData))
print('Done')
try_input=testData_in[0]
try_desired=testData_out[0]
print(try_desired)
print(y_output.eval(feed_dict={x_input:[try_input]}))