浅层神经网络:
神经网络的输出
矩阵公式:输出=激活函数(输入x权重+偏差)
多层感知器识别手写数字:
关键点:
- input:[None,784]
- output:[None,10]
- 隐藏层:256
- 如何随机初始化参数
- loss函数如何计算
随机初始化
weight:
np.random.randn() or np.random.uniform() # 正态分布打破对称性
bias:
初始化为0是可行的。
深层神经网络:
深层神经网络
为什么深层的网络在很多问题上比浅层的好?
- 前几层学习低层次简单特征
- 后几层结合多个简单特征,探测复杂特征
深层的网络隐藏单元数量相对较少,隐藏层数目较多,如果浅层的网络想要达到同样的 计算结果则需要指数级增长的单元数量才能达到。
参数VS超参数
- 学习率
- 梯度下降循环数量
- 隐藏层数
- 隐藏层单元数目
- 激活函数选择
应用深度学习领域,一个很大程度基于经验的过程,凭经验的过程通俗来说,就是试直到你找到合适的数值。
改善深层神经网络:
关于训练集、验证集、测试集的划分。
大数据时代,测试集的主要目的是正确评估分类器的性能,
所以,如果拥有百万数据,我们只需要 1000 条数据,便足以评估单个分类器,并且准确评估该分类器的性能.
98%,1%,1%.
验证集和测试集要确保同一分布
数据归一化:
机器学习模型使用梯度下降法求最优解时,归一化往往非常有必要,否则很难收敛甚至不能收敛,一般归一化操作有两种:
1.最值归一化
2. 均值标准差归一化
交叉验证集:
正则化:
1. 岭回归和lasso回归
2. dropout
3. 数据扩增
4. early stopping(提早停止训练神经网络)
为什么正则化可以减少过拟合?
直观上理解就是如果正则化设置得足够大,权重矩阵
被设置为接近于 0 的值,直观
理解就是把多隐藏单元的权重设为 0,于是基本上消除了这些隐藏单元的影响。
梯度消失、梯度爆炸:
解决:随机初始化神经网络参数。
如何初始化神经网络权重参数?
relu激活函数:
w[i] = np.random.randn(shape)*np.sqrt(2/n[i-1]) # n[i-1]:上一层的输入特征数,w[i]这一层的权重系数
#### np.sqrt(1/n[i-1])
#### np.sqrt(2/(n[i-1]+n[i]))
梯度调试(只在调试的时候使用):
采用双边误差检验时,我们使用双边误差,(f(θ+x)-f(θ-x))/2x,因为单边误差(f(θ+x)/x)不够准确。
如果不正确,程序可能有bug需要你去解决。。。
Adam优化算法:
系统地组织超参调试过程的技巧:
学习率α>隐藏层节点数>mini_batch size >隐藏层数>学习次数
大量阅读别人的案例。
自己实现的DNN封装:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
class MlpClassifier():
"""DNN Classifier 二分类"""
def __init__(self, hiddenNodes, hiddenDeep=3):
"""隐藏层节点数,隐藏层层数"""
self.hiddenNodes = hiddenNodes
self.hiddenDeep = hiddenDeep
def fit(self, trainX, trainY, AdamStep, learnRate=0.1,testX=None,testY=None):
"""trainY must be one-hot"""
trainX,validationX,trainY,validationY = train_test_split(trainX,trainY,test_size=0.1)
self.input_ = trainX.shape[1]
self.output_ = 2
self.trainX_ = trainX
self.trainY_ = trainY
self.AdamStep = AdamStep
self.learnRate = learnRate
dataInput = tf.placeholder(tf.float32, shape=(None, self.input_))
labelInput = tf.placeholder(tf.float32, shape=(None, self.output_))
var = locals()
for i in range(1, self.hiddenDeep + 1):
if self.hiddenDeep == 1:
"""深度为1时特殊"""
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.input_, 1)) * tf.sqrt(2 / self.input_))
var["b" + str(i)] = tf.Variable(tf.zeros((1, 1)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(dataInput, var["w" + str(i)]), var["b" + str(i)]))
break
if i == 1:
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.input_, self.hiddenNodes)) * tf.sqrt(2 / self.input_))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.hiddenNodes)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(dataInput, var["w" + str(i)]), var["b" + str(i)]))
elif i == self.hiddenDeep:
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.hiddenNodes, 1)) * tf.sqrt(2 / self.hiddenNodes))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.output_)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(var["layer" + str(i - 1)], var["w" + str(i)]), var["b" + str(i)]))
else:
var["w" + str(i)] = tf.Variable(
tf.random_uniform((self.hiddenNodes, self.hiddenNodes)) * tf.sqrt(2 / self.hiddenNodes))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.hiddenNodes)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(var["layer" + str(i - 1)], var["w" + str(i)]), var["b" + str(i)]))
result = tf.nn.softmax(var["layer" + str(self.hiddenDeep)],axis=1) # [None,2]
loss = tf.reduce_sum(-labelInput*tf.log(result)) # 交叉熵损失函数
train = tf.train.AdamOptimizer(learnRate).minimize(loss)
# session
ratios = []
validations = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for i in range(AdamStep):
sess.run(train,feed_dict={dataInput:trainX,labelInput:trainY})
trainYHat = sess.run(result,feed_dict={dataInput:trainX})
ratio = np.sum(np.argmax(trainY,axis=1)==np.argmax(trainYHat,axis=1))/trainY.shape[0]
ratios.append(ratio)
validationYHat = sess.run(result,feed_dict={dataInput:validationX})
validation = np.sum(np.argmax(validationY,axis=1)==np.argmax(validationYHat,axis=1))/validationY.shape[0]
validations.append(validation)
# predict
if testY is not None and testX is not None:
YHat = sess.run(result, feed_dict={dataInput: testX})
ratio = np.sum(np.argmax(testY,axis=1)==np.argmax(YHat,axis=1))/testY.shape[0]
return ratio
elif testY is None and testX is not None:
YHat = sess.run(result, feed_dict={dataInput: testX})
return np.argmax(YHat,axis=1)
elif testY is None and testX is None:
x = [i for i in range(1,AdamStep+1)]
plt.plot(x,ratios,color="blue",label="train")
plt.plot(x,validations,color="yellow",label="validation")
plt.ylim(0,1)
plt.legend()
plt.show()
else:
print("输入格式错误")
def predict(self,testX,AdamStep):
YHat = self.fit(self.trainX_,self.trainY_,AdamStep,self.learnRate,testX=testX)
return YHat
def score(self,testX,testY,AdamStep):
score = self.fit(self.trainX_,self.trainY_,AdamStep,self.learnRate,testX=testX,testY=testY)
return score
class MlpRegression():
"""DNN Regression"""
def __init__(self, hiddenNodes, hiddenDeep=3):
"""隐藏层节点数,隐藏层层数"""
self.hiddenNodes = hiddenNodes
self.hiddenDeep = hiddenDeep
def fit(self, trainX, trainY, AdamStep, learnRate=0.1,testX=None,testY=None):
"""trainY must be one-hot"""
trainX,validationX,trainY,validationY = train_test_split(trainX,trainY,test_size=0.1)
self.input_ = trainX.shape[1]
self.output_ = 1
self.trainX_ = trainX
self.trainY_ = trainY
self.AdamStep = AdamStep
self.learnRate = learnRate
dataInput = tf.placeholder(tf.float32, shape=(None, self.input_))
labelInput = tf.placeholder(tf.float32, shape=(None, self.output_))
var = locals()
for i in range(1, self.hiddenDeep + 1):
if self.hiddenDeep == 1:
"""深度为1时特殊"""
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.input_, 1)) * tf.sqrt(2 / self.input_))
var["b" + str(i)] = tf.Variable(tf.zeros((1, 1)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(dataInput, var["w" + str(i)]), var["b" + str(i)]))
break
if i == 1:
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.input_, self.hiddenNodes)) * tf.sqrt(2 / self.input_))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.hiddenNodes)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(dataInput, var["w" + str(i)]), var["b" + str(i)]))
elif i == self.hiddenDeep:
var["w" + str(i)] = tf.Variable(tf.random_uniform((self.hiddenNodes, 1)) * tf.sqrt(2 / self.hiddenNodes))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.output_)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(var["layer" + str(i - 1)], var["w" + str(i)]), var["b" + str(i)]))
else:
var["w" + str(i)] = tf.Variable(
tf.random_uniform((self.hiddenNodes, self.hiddenNodes)) * tf.sqrt(2 / self.hiddenNodes))
var["b" + str(i)] = tf.Variable(tf.zeros((1, self.hiddenNodes)))
var["layer" + str(i)] = tf.nn.relu(tf.add(tf.matmul(var["layer" + str(i - 1)], var["w" + str(i)]), var["b" + str(i)]))
result = var["layer"+str(self.hiddenDeep)]
loss = tf.reduce_mean(tf.square(labelInput-result)) # 均方根损失函数
train = tf.train.AdamOptimizer(learnRate).minimize(loss)
# session
ratios = []
validations = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for i in range(AdamStep):
sess.run(train,feed_dict={dataInput:trainX,labelInput:trainY})
trainYHat = sess.run(result,feed_dict={dataInput:trainX})
ratio = r2_score(trainY,trainYHat)
ratios.append(ratio)
validationYHat = sess.run(result,feed_dict={dataInput:validationX})
validation = r2_score(validationY,validationYHat)
validations.append(validation)
# predict
if testY is not None and testX is not None:
YHat = sess.run(result, feed_dict={dataInput: testX})
ratio = r2_score(testY,YHat)
return ratio
elif testY is None and testX is not None:
YHat = sess.run(result, feed_dict={dataInput: testX})
return YHat
elif testY is None and testX is None:
x = [i for i in range(1,AdamStep+1)]
plt.plot(x,ratios,color="blue",label="train")
plt.plot(x,validations,color="yellow",label="validation")
plt.ylim(0,1)
plt.legend()
plt.show()
else:
print("输入格式错误")
def predict(self,testX,AdamStep):
YHat = self.fit(self.trainX_,self.trainY_,AdamStep,self.learnRate,testX=testX)
return YHat
def score(self,testX,testY,AdamStep):
score = self.fit(self.trainX_,self.trainY_,AdamStep,self.learnRate,testX=testX,testY=testY)
return score