摘要
本文使用纯 Python 和 PyTorch 对比实现循环神经网络LSTM及其反向传播.
相关
配套代码, 请参考文章 :
长短期记忆网络LSTMCell单元详解及反向传播的梯度求导
文章索引 :
https://blog.youkuaiyun.com/oBrightLamp/article/details/85067981
正文
1. LSTMCell 类
文件目录 : vanilla_nn/lstmcell.py
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class LSTMCell:
def __init__(self, weight_ih, weight_hh, bias_ih, bias_hh):
self.weight_ih = weight_ih
self.weight_hh = weight_hh
self.bias_ih = bias_ih
self.bias_hh = bias_hh
self.dc_prev = None
self.dh_prev = None
self.weight_ih_grad_stack = []
self.weight_hh_grad_stack = []
self.bias_ih_grad_stack = []
self.bias_hh_grad_stack = []
self.x_stack = []
self.dx_list = []
self.dh_prev_stack = []
self.h_prev_stack = []
self.c_prev_stack = []
self.h_next_stack = []
self.c_next_stack = []
self.input_gate_stack = []
self.forget_gate_stack = []
self.output_gate_stack = []
self.cell_memory_stack = []
def __call__(self, x, h_prev, c_prev):
a_vector = np.dot(x, self.weight_ih.T) + np.dot(h_prev, self.weight_hh.T)
a_vector += self.bias_ih + self.bias_hh
h_size = np.shape(h_prev)[1]
a_i = a_vector[:, h_size * 0:h_size * 1]
a_f = a_vector[:, h_size * 1:h_size * 2]
a_c = a_vector[:, h_size * 2:h_size * 3]
a_o = a_vector[:, h_size * 3:]
input_gate = sigmoid(a_i)
forget_gate = sigmoid(a_f)
cell_memory = np.tanh(a_c)
output_gate = sigmoid(a_o)
c_next = (forget_gate * c_prev) + (input_gate * cell_memory)
h_next = output_gate * np.tanh(c_next)
self.x_stack.append(x)
self.h_prev_stack.append(h_prev)
self.c_prev_stack.append(c_prev)
self.c_next_stack.append(c_next)
self.h_next_stack.append(h_next)
self.input_gate_stack.append(input_gate)
self.forget_gate_stack.append(forget_gate)
self.output_gate_stack.append(output_gate)
self.cell_memory_stack.append(cell_memory)
self.dc_prev = np.zeros_like(c_next)
self.dh_prev = np.zeros_like(h_next)
return