convLSTM-tensorflow：LSTM理解

最新推荐文章于 2024-08-16 08:27:04 发布

原创最新推荐文章于 2024-08-16 08:27:04 发布 · 2.3k 阅读

21 ·

CC 4.0 BY-SA版权

部署运行你感兴趣的模型镜像

声明：代码主要参考https://github.com/wzg16/tensorflow-convlstm-cell/blob/master/cell.py

并按下图的将对应的变量名修改，文章对LSTM介绍的很通俗易懂，建议大家去看https://zhuanlan.zhihu.com/p/32085405

个人更喜欢按以下方式称呼各个门：

z_f：遗忘门，因为这个门负责从c中遗忘掉某些东西

z_i：记忆门，因为这个门负责从当前的输入信息 z 中提取需要记忆的信息

z：输入信息融合，这不是个门。这是一个把（h,x）进行融合变换的过程。虽然公式与其他的门的计算公式一样。

z_o: 输出门，因为这个门负责从c中提取需要输出的信息

cell.py

import tensorflow as tf

class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
  """A LSTM cell with convolutions instead of multiplications. 在RNNCell的基础上定义子类
  Reference:
    Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
  """
  """
  
  """

  def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
    """

    :param shape: 图像的尺寸,shape=[height,width]
    :param filters: 标量，kernel的个数，输出的hidden\cell的通道数
    :param kernel: kernel_size, shape=[kernel_height,kernel_width]
    :param forget_bias: 遗忘门
    :param activation: 激活函数
    :param normalize: 是否执行层标准化
    :param peephole:
    :param data_format: 数据的格式是'channels_last':[N,H,W,C] 还是'channels_first': [C,N,H,W]
    :param reuse:
    """
    super(ConvLSTMCell, self).__init__(_reuse=reuse)
    self._kernel = kernel  # 卷积核，是一个张亮
    self._filters = filters # 卷积核的个数，标量，代表cell/state的通道数
    self._forget_bias = forget_bias
    self._activation = activation
    self._normalize = normalize
    self._peephole = peephole # 是否让门层接受细胞状态的输入
    if data_format == 'channels_last':
        self._size = tf.TensorShape(shape + [self._filters])  # 张量的shape ，channel数放在最后
        self._feature_axis = self._size.ndims  # 张量的维数
        self._data_format = None        # 数据的格式，主要指channel的排列顺序
    elif data_format == 'channels_first':
        self._size = tf.TensorShape([self._filters] + shape)
        self._feature_axis = 0  # 特征轴，channel所在轴
        self._data_format = 'NC'
    else:
        raise ValueError('Unknown data_format')

  @property
  def state_size(self):
    return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)

  @property
  def output_size(self):
    return self._size

  def call(self, x, state):
    """
    c: cell state
    h: hidden state
    x: input-tensor
    :param x:
    :param state:
    :return:
    """
    c, h = state

    # 串联
    x = tf.concat([x, h], axis=self._feature_axis) # x: 输入门的结果，沿channel通道串联，shape=[N,H,W,C]

    # 卷积 + 偏置
    input_channel = x.shape[-1].value                          # n: 输入的通道数
    output_channel = 4 * self._filters if self._filters > 1 else 4  # m:卷积后的通道数
    W = tf.get_variable('kernel', self._kernel + [input_channel, output_channel]) # 卷积核定义
    moment_ = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format) # 输入门的结果
    if not self._normalize:
      moment_ += tf.get_variable('bias', [output_channel], initializer=tf.zeros_initializer())

    # 生成各个门控
    z, z_i, z_f, z_o = tf.split(moment_, 4, axis=self._feature_axis) # 把y沿channel通道等分成4份

    if self._peephole: # 让门层接受细胞状态的输入， c.shape=[]
      z_i += tf.get_variable('W_ci', c.shape[1:]) * c
      z_f += tf.get_variable('W_cf', c.shape[1:]) * c

    if self._normalize: # 层标准化，有利于加快收敛
      z = tf.contrib.layers.layer_norm(z)
      z_i = tf.contrib.layers.layer_norm(z_i)
      z_f = tf.contrib.layers.layer_norm(z_f)

    # 记忆门与遗忘门的最终状态，激活到[0,1],0表示完全遗忘，1表示完全记忆
    z_f = tf.sigmoid(z_f + self._forget_bias)  # 遗忘门
    z_i = tf.sigmoid(z_i)                      # 记忆门
    c_new = c * z_f + z_i * self._activation(z)    # 更新细胞状态，遗忘部分信息，增加一部分新的信息

    if self._peephole: # c.shape=?,让细胞状态参与输出门
      z_o += tf.get_variable('W_co', c_new.shape[1:]) * c_new

    if self._normalize:
      z_o = tf.contrib.layers.layer_norm(z_o)
      c_new = tf.contrib.layers.layer_norm(c_new)

    z_o = tf.sigmoid(z_o) # 输出门，归一化到[0,1]
    h_new = z_o * self._activation(c_new) # 输出门

    state = tf.nn.rnn_cell.LSTMStateTuple(c_new, h_new)

    return h_new, state


class ConvGRUCell(tf.nn.rnn_cell.RNNCell):
  """A GRU cell with convolutions instead of multiplications."""

  def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None):
    super(ConvGRUCell, self).__init__(_reuse=reuse)
    self._filters = filters
    self._kernel = kernel
    self._activation = activation
    self._normalize = normalize
    if data_format == 'channels_last':
        self._size = tf.TensorShape(shape + [self._filters])
        self._feature_axis = self._size.ndims
        self._data_format = None
    elif data_format == 'channels_first':
        self._size = tf.TensorShape([self._filters] + shape)
        self._feature_axis = 0
        self._data_format = 'NC'
    else:
        raise ValueError('Unknown data_format')

  @property
  def state_size(self):
    return self._size

  @property
  def output_size(self):
    return self._size

  def call(self, x, h):
    channels = x.shape[self._feature_axis].value

    with tf.variable_scope('gates'):
      inputs = tf.concat([x, h], axis=self._feature_axis)
      n = channels + self._filters
      m = 2 * self._filters if self._filters > 1 else 2
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        r, u = tf.split(y, 2, axis=self._feature_axis)
        r = tf.contrib.layers.layer_norm(r)
        u = tf.contrib.layers.layer_norm(u)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
        r, u = tf.split(y, 2, axis=self._feature_axis)
      r, u = tf.sigmoid(r), tf.sigmoid(u)

    with tf.variable_scope('candidate'):
      inputs = tf.concat([x, r * h], axis=self._feature_axis)
      n = channels + self._filters
      m = self._filters
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        y = tf.contrib.layers.layer_norm(y)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
      h = u * h + (1 - u) * self._activation(y)

    return h, h

代码的运行与测试：

test.py

import tensorflow as tf

batch_size = 32
timesteps = 100    
shape = [640, 480] # [height,width]
kernel = [3, 3] # 卷积核的尺寸
channels = 3 # input_channel
filters = 12 # output_channel

# Create a placeholder for videos. 
# 对输入的格式需求，inputs.shape=[batch_size,time_steps,height,width,input_channels]
# 比通常的 CNN多了一个time_step维度，放在batch_size 后面
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])

# Add the ConvLSTM step.
# tf.nn.dynamic_rnn 会自动识别 inputs 中的 time_steps，并设定网络
from cell import ConvLSTMCell
cell = ConvLSTMCell(shape, filters, kernel)
outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)
# outputs中包含了每个time_step的输出，outputs.shape=[batch_size,time_steps,height,width,output_channels]
# state 是一个tuple，包含了最后一个timestep的Ct与ht，其中ht与outputs的最后一个timestep的输出相同
# state.c : Ct,state.c.shape=[batch_size,height,width,output_channel]
# state.h : ht,state.h.shape=[batch_size,height,width,output_channel]

# There's also a ConvGRUCell that is more memory efficient.
from cell import ConvGRUCell
cell = ConvGRUCell(shape, filters, kernel)
outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)

# It's also possible to enter 2D input or 4D input instead of 3D.
shape = [100]
kernel = [3]
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
cell = ConvLSTMCell(shape, filters, kernel)
outputs, state = tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)

shape = [50, 50, 50]
kernel = [1, 3, 5]
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
cell = ConvGRUCell(shape, filters, kernel)
outputs, state= tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)

代码已测试，可以运行。关于 outputs, state = tf.nn.dynamic_rnn的更多参数解释与输出说明可以参考https://blog.youkuaiyun.com/Strive_For_Future/article/details/103605290，文章讲的还不错

您可能感兴趣的与本文相关的镜像