pytorch版CLSTM

最新推荐文章于 2025-03-27 14:35:36 发布
原创最新推荐文章于 2025-03-27 14:35:36 发布 · 1.5k 阅读
1 ·
CC 4.0 BY-SA版权
文章标签：
#clstm #pytorch
pytorch 专栏收录该内容
16 篇文章
订阅专栏
本文深入解析了Contextual LSTM模型的设计与实现，详细介绍了其在序列预测任务中的应用，特别是如何通过融合上下文信息来增强模型的预测能力。
# -*-coding:utf-8-*-
"""
@author:taoshouzheng
@time:2019/8/8 15:19
@email:tsz1216@sina.com
"""

import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.nn import init
from torch import Tensor
from torch.autograd import Variable
from torch import optim
import math
import numpy as np
import random


# 先定义Contextual LSTM的一个cell
class ContextualLSTMCell(nn.Module):
    """basic Contextual LSTM Cell"""

    def __init__(self, input_size, hidden_size, contextual_type, bias=True):
        super(ContextualLSTMCell, self).__init__()
        # 输入的维度
        self.input_size = input_size
        # 隐状态的维度
        self.hidden_size = hidden_size
        # 上下文类型
        self.contextual_type = contextual_type
        # 是否有偏置项
        self.bias = bias

        # input gate parameter
        self.w_ii = Parameter(Tensor(hidden_size, input_size))
        self.w_hi = Parameter(Tensor(hidden_size, hidden_size))
        self.w_ci = Parameter(Tensor(hidden_size, hidden_size))
        self.w_bi = Parameter(Tensor(hidden_size, contextual_type))
        self.bias_i = Parameter(Tensor(hidden_size, 1))

        # forget gate parameter
        self.w_if = Parameter(Tensor(hidden_size, input_size))
        self.w_hf = Parameter(Tensor(hidden_size, hidden_size))
        self.w_cf = Parameter(Tensor(hidden_size, hidden_size))
        self.w_bf = Parameter(Tensor(hidden_size, contextual_type))
        self.bias_f = Parameter(Tensor(hidden_size, 1))

        # cell memory parameter
        self.w_ic = Parameter(Tensor(hidden_size, input_size))
        self.w_hc = Parameter(Tensor(hidden_size, hidden_size))
        self.w_bc = Parameter(Tensor(hidden_size, contextual_type))
        self.bias_c = Parameter(Tensor(hidden_size, 1))

        # output gate parameter
        self.w_io = Parameter(Tensor(hidden_size, input_size))
        self.w_ho = Parameter(Tensor(hidden_size, hidden_size))
        self.w_co = Parameter(Tensor(hidden_size, hidden_size))
        self.w_bo = Parameter(Tensor(hidden_size, contextual_type))
        self.bias_o = Parameter(Tensor(hidden_size, 1))

        self.reset_parameters()

    # 初始化参数：一种参数初始化策略
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for parameter in self.parameters():
            init.uniform_(parameter, -stdv, stdv)

    # 定义前向传播过程
    def forward(self, x, h, c, b):
        """
        :param x: 当前时刻的输入
        :param h: 上一时刻的隐状态
        :param c: 上一时刻的记忆单元
        :param b: 当前时刻输入的上下文
        :return:
        """
        # input gate
        ci = torch.sigmoid(self.w_ii @ x + self.w_hi @ h + self.w_ci @ c + self.w_bi @ b + self.bias_i)

        # forget gate
        cf = torch.sigmoid(self.w_if @ x + self.w_hf @ h + self.w_cf @ c + self.w_bf @ b + self.bias_f)

        # cell memory
        cc = cf * c + ci * torch.tanh(self.w_ic @ x + self.w_hc @ h + self.w_bc @ b + self.bias_c)

        # output gate
        co = torch.sigmoid(self.w_io @ x - self.w_ho @ h + self.w_co @ c + self.w_bo @ b + self.bias_o)

        # hidden state
        ch = co * torch.tanh(cc)

        return ch, cc

    # 初始化隐状态h和记忆单元c
    def init_state(self, batch_size, hidden_size):
        h_init = Variable(torch.rand(batch_size, hidden_size).t())
        c_init = Variable(torch.rand(batch_size, hidden_size).t())
        return h_init, c_init


# 定义完整的Contextual LSTM模型
class ContextualLSTM(nn.Module):
    """Contextual LSTM model"""

    def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, bias=True):

        super(ContextualLSTM, self).__init__()

        # 序列长度
        self.num_steps = num_steps
        # 网络层数
        self.num_layers = num_layers
        # 输入维度
        self.input_size = input_size
        # 隐状态维度
        self.hidden_size = hidden_size
        # 上下文类型
        self.contextual_type = contextual_type
        # 是否有偏置
        self.bias = bias

        # 所有层的Contextual LSTM cell组成的列表
        self._all_layers = []
        for k in range(self.num_layers):
            layer_name = 'cell{}'.format(k)
            cell = ContextualLSTMCell(self.input_size, self.hidden_size, self.contextual_type, self.bias)
            setattr(self, layer_name, cell)
            self._all_layers.append(cell)

    # Contextual LSTM模型的前向传播逻辑
    def forward(self, inputs, contexts):

        # 用于存放内部状态的列表
        internal_state = []
        # 用于存放最顶层的Contextual LSTM的输出隐状态
        outputs = []

        # 横向，遍历所有的time step
        for step in range(self.num_steps):

            # ????????这个地方有问题
            x_step = inputs[step].t()  # 当前时刻的输入
            context_step = contexts[step].t()  # 当前时刻的上下文one-hot编码

            # 纵向，遍历所有的layer
            for layer in range(self.num_layers):

                layer_name = 'cell{}'.format(layer)

                if step == 0:
                    batch_size = inputs[step].size()[0]

                    h, c = getattr(self, layer_name).init_state(batch_size=batch_size, hidden_size=self.hidden_size)
                    internal_state.append((h, c))

                # 前向传播
                (h, c) = internal_state[layer]
                x_step, c_new = getattr(self, layer_name)(x_step, h, c, context_step)
                internal_state[layer] = (x_step, c_new)

            # 每一个时间步存储一个
            outputs.append(x_step)

        # 这里返回最后一个时间步、最顶层Contextual LSTM层输出的隐状态和内部记忆
        return outputs, (x_step, c_new)


class MyModel(nn.Module):
    """Classifier Model"""

    def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, output_size, bias=True):
        super(MyModel, self).__init__()

        self.num_steps = num_steps
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.contextual_type = contextual_type
        self.output_size = output_size
        self.bias = bias

        self.contextual_lstm = ContextualLSTM(self.num_steps, self.num_layers, self.input_size, self.hidden_size,
                                              self.contextual_type, self.bias)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, inputs, contexts):
        output, (h_final, c_final) = self.contextual_lstm(inputs, contexts)
        result = torch.sigmoid(self.linear(h_final.t()))
        return result


if __name__ == '__main__':

    my_model = MyModel(num_steps=5, num_layers=1, input_size=10, hidden_size=20, contextual_type=4, output_size=1,
                       bias=True)
    print(my_model)

    for name, parameter in my_model.named_parameters():
        print(name, parameter)

    inputs = Variable(torch.rand(5, 200, 10))

    context = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
    lines = []
    line = []
    for i in range(5):
        line = []
        for j in range(200):
            item = random.choice(context)
            line.append(item)
        lines.append(line)

    content = np.array(lines)
    contexts = Variable(torch.FloatTensor(content))
    print(contexts.shape)

    # 标签
    label = []
    for i in range(200):
        label.append(random.choice([0, 1]))
    label = np.array(label, dtype=np.int)

    target = Variable(torch.FloatTensor(label).unsqueeze(1))

    criterion = nn.BCELoss(reduction='mean')
    optimizer = optim.SGD(my_model.parameters(), lr=0.001, momentum=0.9)

    EPOCH = 50

    for i in range(EPOCH):
        output = my_model(inputs, contexts)

        loss = criterion(output, target)

        print('epoch', i + 1, ':', loss)

        loss.backward()

        optimizer.step()

# for name, parameter in my_model.named_parameters():
#  print(name, parameter)