# -*-coding:utf-8-*-
"""
@author:taoshouzheng
@time:2019/8/8 15:19
@email:tsz1216@sina.com
"""
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.nn import init
from torch import Tensor
from torch.autograd import Variable
from torch import optim
import math
import numpy as np
import random
# 先定义Contextual LSTM的一个cell
class ContextualLSTMCell(nn.Module):
"""basic Contextual LSTM Cell"""
def __init__(self, input_size, hidden_size, contextual_type, bias=True):
super(ContextualLSTMCell, self).__init__()
# 输入的维度
self.input_size = input_size
# 隐状态的维度
self.hidden_size = hidden_size
# 上下文类型
self.contextual_type = contextual_type
# 是否有偏置项
self.bias = bias
# input gate parameter
self.w_ii = Parameter(Tensor(hidden_size, input_size))
self.w_hi = Parameter(Tensor(hidden_size, hidden_size))
self.w_ci = Parameter(Tensor(hidden_size, hidden_size))
self.w_bi = Parameter(Tensor(hidden_size, contextual_type))
self.bias_i = Parameter(Tensor(hidden_size, 1))
# forget gate parameter
self.w_if = Parameter(Tensor(hidden_size, input_size))
self.w_hf = Parameter(Tensor(hidden_size, hidden_size))
self.w_cf = Parameter(Tensor(hidden_size, hidden_size))
self.w_bf = Parameter(Tensor(hidden_size, contextual_type))
self.bias_f = Parameter(Tensor(hidden_size, 1))
# cell memory parameter
self.w_ic = Parameter(Tensor(hidden_size, input_size))
self.w_hc = Parameter(Tensor(hidden_size, hidden_size))
self.w_bc = Parameter(Tensor(hidden_size, contextual_type))
self.bias_c = Parameter(Tensor(hidden_size, 1))
# output gate parameter
self.w_io = Parameter(Tensor(hidden_size, input_size))
self.w_ho = Parameter(Tensor(hidden_size, hidden_size))
self.w_co = Parameter(Tensor(hidden_size, hidden_size))
self.w_bo = Parameter(Tensor(hidden_size, contextual_type))
self.bias_o = Parameter(Tensor(hidden_size, 1))
self.reset_parameters()
# 初始化参数:一种参数初始化策略
def reset_parameters(self):
stdv = 1.0 / math.sqrt(self.hidden_size)
for parameter in self.parameters():
init.uniform_(parameter, -stdv, stdv)
# 定义前向传播过程
def forward(self, x, h, c, b):
"""
:param x: 当前时刻的输入
:param h: 上一时刻的隐状态
:param c: 上一时刻的记忆单元
:param b: 当前时刻输入的上下文
:return:
"""
# input gate
ci = torch.sigmoid(self.w_ii @ x + self.w_hi @ h + self.w_ci @ c + self.w_bi @ b + self.bias_i)
# forget gate
cf = torch.sigmoid(self.w_if @ x + self.w_hf @ h + self.w_cf @ c + self.w_bf @ b + self.bias_f)
# cell memory
cc = cf * c + ci * torch.tanh(self.w_ic @ x + self.w_hc @ h + self.w_bc @ b + self.bias_c)
# output gate
co = torch.sigmoid(self.w_io @ x - self.w_ho @ h + self.w_co @ c + self.w_bo @ b + self.bias_o)
# hidden state
ch = co * torch.tanh(cc)
return ch, cc
# 初始化隐状态h和记忆单元c
def init_state(self, batch_size, hidden_size):
h_init = Variable(torch.rand(batch_size, hidden_size).t())
c_init = Variable(torch.rand(batch_size, hidden_size).t())
return h_init, c_init
# 定义完整的Contextual LSTM模型
class ContextualLSTM(nn.Module):
"""Contextual LSTM model"""
def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, bias=True):
super(ContextualLSTM, self).__init__()
# 序列长度
self.num_steps = num_steps
# 网络层数
self.num_layers = num_layers
# 输入维度
self.input_size = input_size
# 隐状态维度
self.hidden_size = hidden_size
# 上下文类型
self.contextual_type = contextual_type
# 是否有偏置
self.bias = bias
# 所有层的Contextual LSTM cell组成的列表
self._all_layers = []
for k in range(self.num_layers):
layer_name = 'cell{}'.format(k)
cell = ContextualLSTMCell(self.input_size, self.hidden_size, self.contextual_type, self.bias)
setattr(self, layer_name, cell)
self._all_layers.append(cell)
# Contextual LSTM模型的前向传播逻辑
def forward(self, inputs, contexts):
# 用于存放内部状态的列表
internal_state = []
# 用于存放最顶层的Contextual LSTM的输出隐状态
outputs = []
# 横向,遍历所有的time step
for step in range(self.num_steps):
# ????????这个地方有问题
x_step = inputs[step].t() # 当前时刻的输入
context_step = contexts[step].t() # 当前时刻的上下文one-hot编码
# 纵向,遍历所有的layer
for layer in range(self.num_layers):
layer_name = 'cell{}'.format(layer)
if step == 0:
batch_size = inputs[step].size()[0]
h, c = getattr(self, layer_name).init_state(batch_size=batch_size, hidden_size=self.hidden_size)
internal_state.append((h, c))
# 前向传播
(h, c) = internal_state[layer]
x_step, c_new = getattr(self, layer_name)(x_step, h, c, context_step)
internal_state[layer] = (x_step, c_new)
# 每一个时间步存储一个
outputs.append(x_step)
# 这里返回最后一个时间步、最顶层Contextual LSTM层输出的隐状态和内部记忆
return outputs, (x_step, c_new)
class MyModel(nn.Module):
"""Classifier Model"""
def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, output_size, bias=True):
super(MyModel, self).__init__()
self.num_steps = num_steps
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.contextual_type = contextual_type
self.output_size = output_size
self.bias = bias
self.contextual_lstm = ContextualLSTM(self.num_steps, self.num_layers, self.input_size, self.hidden_size,
self.contextual_type, self.bias)
self.linear = nn.Linear(self.hidden_size, self.output_size)
def forward(self, inputs, contexts):
output, (h_final, c_final) = self.contextual_lstm(inputs, contexts)
result = torch.sigmoid(self.linear(h_final.t()))
return result
if __name__ == '__main__':
my_model = MyModel(num_steps=5, num_layers=1, input_size=10, hidden_size=20, contextual_type=4, output_size=1,
bias=True)
print(my_model)
for name, parameter in my_model.named_parameters():
print(name, parameter)
inputs = Variable(torch.rand(5, 200, 10))
context = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
lines = []
line = []
for i in range(5):
line = []
for j in range(200):
item = random.choice(context)
line.append(item)
lines.append(line)
content = np.array(lines)
contexts = Variable(torch.FloatTensor(content))
print(contexts.shape)
# 标签
label = []
for i in range(200):
label.append(random.choice([0, 1]))
label = np.array(label, dtype=np.int)
target = Variable(torch.FloatTensor(label).unsqueeze(1))
criterion = nn.BCELoss(reduction='mean')
optimizer = optim.SGD(my_model.parameters(), lr=0.001, momentum=0.9)
EPOCH = 50
for i in range(EPOCH):
output = my_model(inputs, contexts)
loss = criterion(output, target)
print('epoch', i + 1, ':', loss)
loss.backward()
optimizer.step()
# for name, parameter in my_model.named_parameters():
# print(name, parameter)
pytorch版CLSTM
最新推荐文章于 2025-03-27 14:35:36 发布
本文深入解析了Contextual LSTM模型的设计与实现,详细介绍了其在序列预测任务中的应用,特别是如何通过融合上下文信息来增强模型的预测能力。
1389

被折叠的 条评论
为什么被折叠?



