1.代码实现
#导包
import torch
from torch import nn
import dltools
#加载数据需要用到的声明变量
batch_size, max_len = 1, 64
#获取训练数据迭代器、词汇表
train_iter, vocab = dltools.load_data_wiki(batch_size, max_len)
#其余都是二维数组
#tokens, segments, valid_lens(一维), pred_position, mlm_weights, mlm, nsp(一维)对应每条数据i中包含的数据
for i in train_iter: #遍历迭代器
break #只遍历一条数据
[tensor([[ 3, 25, 0, 4993, 0, 24, 4, 26, 13, 2, 158, 20, 5, 73, 1399, 2, 9, 813, 9, 987, 45, 26, 52, 46, 53, 158, 2, 5, 3140, 5880, 9, 543, 6, 6974, 2, 2, 315, 6, 8, 5, 8698, 8, 17229, 9, 308, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), tensor([[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), tensor([47.]), tensor([[ 9, 15, 26, 32, 34, 35, 45, 0, 0, 0]]), tensor([[1., 1., 1., 1., 1., 1., 1., 0., 0., 0.]]), tensor([[ 484, 1288, 20, 6, 2808, 9, 18, 0, 0, 0]]), tensor([0])]
#创建BERT网络模型
net = dltools.BERTModel(len(vocab), num_hiddens=128, norm_shape=[128],
ffn_num_input=128, ffn_num_hiddens=256, num_heads=2,
num_layers=2, dropout=0.2, key_size=128, query_size=128,
value_size=128, hid_in_features=128, mlm_in_fea