主要参考:《深度学习入门:基于python的理论和实现》
文章目录
激活函数
sigmoid
def sigmoid(x):
return 1/(1+np.exp(-x))
relu
def relu(x):
return np.maximum(0,x)
softmax
def softmax(a):
exp_a = np.exp(a)
sum_exp_a = np.sum(exp_a)
y = exp_a/sum_exp_a
return y
better softmax
def better_softmax(a):
c = np.max(a)
exp_a = np.exp(a-c)
sum_exp_a = np.sum(exp_a)
y = exp_a/sum_exp_a
return y
损失函数
mean squared error
def mean_squared_error(y,t):
return 0.5 * np.sum((y-t)**2)
cross entropy error
def cross_entropy_error(y,t):
return -np.sum(t*np.log(y))
detailed cross entropy error
给出一个比较具体的做法,其中label是类别,输入的logit是模型输出的logit。
首先,label要转成one-hot的形式。
def np_onehot(nc, label):
tmp = np.arange(nc)
tmp = tmp[None,:]
true_label = label[:,None]
ans = tmp == true_label
return ans.astype(int)
label = np_onehot(4,label)
def np_softmax(arr):
assert len(arr.shape)==2
arr_exp = np.exp(arr)
arr_sum = np.sum(arr_exp, axis=1)
arr_sum = arr_sum[:,None]
return arr_exp / arr_sum
soft_logit = np_softmax(logit)
log_logit = np.log(soft_logit)
res = - log_logit * label
loss = np.mean(np.sum(res,axis=1))
梯度下降
SGD
class SGD:
def __init__(self, lr = 0.01):
self.lr = lr
def update(self, params, grads):
for key in params.keys():
params[key] -= self.lr * grads[key]
Momentum
class Momentum:
# w<- w+v
# v<- alpha v - lr * gd
def __init__(self, lr = 0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {
}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
params += self.v[key]
Nesterov
class Nesterov:
# 先用速度更新,再用梯度更新
def __init__(self, lr = 0.01, momentum = 0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {
}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
params[key] += self.momentum*self.momentum*self.v[key]
params[key] -= (1+self.momentum)*self.lr*grads[key]
AdaGrad
class AdaGrad:
# 学习率衰减
def __init__(self, lr = 0.01):
self.lr = lr
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {
}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key] += grads[key]*grads[key]
params[key] -= self.lr * grads[key]/ np.sqrt(self.h[key])
RMSProp
class RMSprop:
def __init__(self, lr=0.01,decay_rate = 0.99):
self.lr = lr
self.decay_rate = decay_rate
self.h = None
def update(self,params, grads):
if self.h is None:
self.h = {
}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key]*= self.decay_rate
self.h[key] += (1-self.decay_rate)*grads[key]&grads

本文介绍了深度学习中常用的激活函数(sigmoid、ReLU、softmax、better_softmax),损失函数(均方误差、交叉熵),以及优化算法(SGD、Momentum、Nesterov、AdaGrad、RMSProp、Adam)。还详细讲解了神经网络层(如乘法、加法、卷积、池化等)和正则化方法(如批量归一化和dropout)。最后讨论了精度和召回率计算。
最低0.47元/天 解锁文章
854

被折叠的 条评论
为什么被折叠?



