直接看模型部分代码。
class BiLSTM_Attention(nn.Module):
def __init__(self, vocab_size, embedding_dim, num_hiddens, num_layers):
super(BiLSTM_Attention, self).__init__()
# embedding之后的shape: torch.Size([200, 8, 300])
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings = self.word_embeddings.from_pretrained(
vectors, freeze=False)
# bidirectional设为True即得到双向循环神经网络
self.encoder = nn.LSTM(input_size=embedding_dim,
hidden_size=num_hiddens,
num_layers=num_layers,
batch_first=False,
bidirectional=True)
# 初始时间步和最终时间步的隐藏状态作为全连接层输入
self.w_omega = nn.Parameter(torch.Tensor(
num_hiddens * 2, num_hiddens * 2))
self.u_omega = nn.Parameter(torch.Tensor(num_hiddens * 2, 1))
self.decoder =