声明:代码主要参考https://github.com/wzg16/tensorflow-convlstm-cell/blob/master/cell.py
并按下图的将对应的变量名修改,文章对LSTM介绍的很通俗易懂,建议大家去看https://zhuanlan.zhihu.com/p/32085405
个人更喜欢按以下方式称呼各个门:
z_f: 遗忘门,因为这个门负责从c中遗忘掉某些东西
z_i:记忆门,因为这个门负责从当前的输入信息 z 中提取需要记忆的信息
z: 输入信息融合,这不是个门。这是一个把(h,x)进行融合变换的过程。虽然公式与其他的门的计算公式一样。
z_o: 输出门,因为这个门负责从c中提取需要输出的信息

cell.py
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications. 在RNNCell的基础上定义子类
Reference:
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
"""
"""
"""
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
"""
:param shape: 图像的尺寸,shape=[height,width]
:param filters: 标量,kernel的个数,输出的hidden\cell的通道数
:param kernel: kernel_size, shape=[kernel_height,kernel_width]
:param forget_bias: 遗忘门
:param activation: 激活函数
:param normalize: 是否执行层标准化
:param peephole:
:param data_format: 数据的格式是'channels_last':[N,H,W,C] 还是'channels_first': [C,N,H,W]
:param reuse:
"""
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel # 卷积核,是一个张亮
self._filters = filters # 卷积核的个数,标量,代表cell/state的通道数
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole # 是否让门层接受细胞状态的输入
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters]) # 张量的shape ,channel数放在最后
self._feature_axis = self._size.ndims # 张量的维数
self._data_format = None # 数据的格式,主要指channel的排列顺序
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0 # 特征轴,channel所在轴
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
@property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
@property
def output_size(self):
return self._size
def call(self, x, state):
"""
c: cell state
h: hidden state
x: input-tensor
:param x:
:param state:
:return:
"""
c, h = state
# 串联
x = tf.concat([x, h], axis=self._feature_axis) # x: 输入门的结果,沿channel通道串联,shape=[N,H,W,C]
# 卷积 + 偏置
input_channel = x.shape[-1].value # n: 输入的通道数
output_channel = 4 * self._filters if self._filters > 1 else 4 # m:卷积后的通道数
W = tf.get_variable('kernel', self._kernel + [input_channel, output_channel]) # 卷积核定义
moment_ = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format) # 输入门的结果
if not self._normalize:
moment_ += tf.get_variable('bias', [output_channel], initializer=tf.zeros_initializer())
# 生成各个门控
z, z_i, z_f, z_o = tf.split(moment_, 4, axis=self._feature_axis) # 把y沿channel通道等分成4份
if self._peephole: # 让门层接受细胞状态的输入, c.shape=[]
z_i += tf.get_variable('W_ci', c.shape[1:]) * c
z_f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize: # 层标准化,有利于加快收敛
z = tf.contrib.layers.layer_norm(z)
z_i = tf.contrib.layers.layer_norm(z_i)
z_f = tf.contrib.layers.layer_norm(z_f)
# 记忆门与遗忘门的最终状态,激活到[0,1],0表示完全遗忘,1表示完全记忆
z_f = tf.sigmoid(z_f + self._forget_bias) # 遗忘门
z_i = tf.sigmoid(z_i) # 记忆门
c_new = c * z_f + z_i * self._activation(z) # 更新细胞状态,遗忘部分信息,增加一部分新的信息
if self._peephole: # c.shape=?,让细胞状态参与输出门
z_o += tf.get_variable('W_co', c_new.shape[1:]) * c_new
if self._normalize:
z_o = tf.contrib.layers.layer_norm(z_o)
c_new = tf.contrib.layers.layer_norm(c_new)
z_o = tf.sigmoid(z_o) # 输出门,归一化到[0,1]
h_new = z_o * self._activation(c_new) # 输出门
state = tf.nn.rnn_cell.LSTMStateTuple(c_new, h_new)
return h_new, state
class ConvGRUCell(tf.nn.rnn_cell.RNNCell):
"""A GRU cell with convolutions instead of multiplications."""
def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None):
super(ConvGRUCell, self).__init__(_reuse=reuse)
self._filters = filters
self._kernel = kernel
self._activation = activation
self._normalize = normalize
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
@property
def state_size(self):
return self._size
@property
def output_size(self):
return self._size
def call(self, x, h):
channels = x.shape[self._feature_axis].value
with tf.variable_scope('gates'):
inputs = tf.concat([x, h], axis=self._feature_axis)
n = channels + self._filters
m = 2 * self._filters if self._filters > 1 else 2
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
if self._normalize:
r, u = tf.split(y, 2, axis=self._feature_axis)
r = tf.contrib.layers.layer_norm(r)
u = tf.contrib.layers.layer_norm(u)
else:
y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
r, u = tf.split(y, 2, axis=self._feature_axis)
r, u = tf.sigmoid(r), tf.sigmoid(u)
with tf.variable_scope('candidate'):
inputs = tf.concat([x, r * h], axis=self._feature_axis)
n = channels + self._filters
m = self._filters
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
if self._normalize:
y = tf.contrib.layers.layer_norm(y)
else:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
h = u * h + (1 - u) * self._activation(y)
return h, h
代码的运行与测试:
test.py
import tensorflow as tf
batch_size = 32
timesteps = 100
shape = [640, 480] # [height,width]
kernel = [3, 3] # 卷积核的尺寸
channels = 3 # input_channel
filters = 12 # output_channel
# Create a placeholder for videos.
# 对输入的格式需求,inputs.shape=[batch_size,time_steps,height,width,input_channels]
# 比通常的 CNN多了一个time_step维度,放在batch_size 后面
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
# Add the ConvLSTM step.
# tf.nn.dynamic_rnn 会自动识别 inputs 中的 time_steps,并设定网络
from cell import ConvLSTMCell
cell = ConvLSTMCell(shape, filters, kernel)
outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)
# outputs中包含了每个time_step的输出,outputs.shape=[batch_size,time_steps,height,width,output_channels]
# state 是一个tuple,包含了最后一个timestep的Ct与ht,其中ht与outputs的最后一个timestep的输出相同
# state.c : Ct,state.c.shape=[batch_size,height,width,output_channel]
# state.h : ht,state.h.shape=[batch_size,height,width,output_channel]
# There's also a ConvGRUCell that is more memory efficient.
from cell import ConvGRUCell
cell = ConvGRUCell(shape, filters, kernel)
outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)
# It's also possible to enter 2D input or 4D input instead of 3D.
shape = [100]
kernel = [3]
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
cell = ConvLSTMCell(shape, filters, kernel)
outputs, state = tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)
shape = [50, 50, 50]
kernel = [1, 3, 5]
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
cell = ConvGRUCell(shape, filters, kernel)
outputs, state= tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)
代码已测试,可以运行。关于 outputs, state = tf.nn.dynamic_rnn的更多参数解释与输出说明可以参考https://blog.youkuaiyun.com/Strive_For_Future/article/details/103605290,文章讲的还不错
1763

被折叠的 条评论
为什么被折叠?



