模型原理:
利用LSTM模型,用前面的单词预测下一个单词。例如 I like the world.
我们在这句话的首尾加上<s>和</s>,<s>代表开始</s>代表结束。 Input data 是 <s> I like the
world.,labels 为 I like the world.</s>.
代码:
import torch
if torch.cuda.is_available():
# Tell PyTorch to use the GPU.
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1070
# -*- coding:utf-8 -*-
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from collections import defaultdict, Counter
def set_random_seed(seed):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
set_random_seed(6688)
建立词典
from collections import Counter
data_path = 'F:/data/homework/'
text = open(data_path +'bobsue.voc.txt',encoding='utf-8').read()
def build_vocab(sents, max_words=50000):
word_counts = Counter()
for word in sents:
word_counts[word] += 1
itos = [w for w, c in word_counts.most_common(max_words)]
itos = itos + ["UNK"]
stoi = {w:i for i, w in enumerate(itos)}
return itos, stoi
tokenize = lambda x: x.split()
vob = tokenize(text)
itos, stoi = build_vocab(vob)
导入数据
class Corpus:
def __init__(self, data_path, sort_by_len=False):
self.vocab = vob
self.sort_by_len = sort_by_len
self.train_data = self.tokenize(data_path + 'train.txt')
self.valid_data = self.tokenize(data_path + 'dev.txt')
self.test_data = self.tokenize(data_path + 'test.txt')
def tokenize(self, text_path):
with open(text_path) as f:
index_data = [] # 索引数据,存储每个样本的单词索引列表
for s in f.readlines():
index_data.append(
self.sentence_to_index(s)
)
if self.sort_by_len: # 为了提升训练速度,可以考虑将样本按照长度排序,这样可以减少padding
index_data = sorted(index_data, key=lambda x: len(x), reverse=True)
return index_data
def sentence_to_index(self, s):
return [stoi[w] for w in s.split()]
def index_to_sentence(self, x):
return ' '.join([itos[i] for i in x])
corpus = Corpus(data_path, sort_by_len=False)
构建数据集
#定义语言模型的DataSet
class BobSueLMDataSet(torch.utils.data.Dataset):
def __init__(self, index_data):
self.index_data = index_data
def __getitem__(self, i):
# 根据语言模型定义,这里我们要用前n-1个单词预测后n-1个单词
return self.index_data[i][:-1], self.index_data[i][1:]
def __len__(self):
return len(self.index_data)
train_set = BobSueLMDataSet(corpus.train_data)
valid_data = BobSueLMDataSet(corpus.valid_data)
test_data = BobSueLMDataSet(corpus.test_data)
构建dataloader
def dataset_batches(batch):
# 这里输入的batch格式为[(input_1, target_1), ... ,(input_n, target_n)]
# 我们要将其格式转换为[(input_1, ... , input_n), (target_1, ... , target_n)]
batch = list(zip(*batch))
# 生成长度列表
lengths = torch.LongTensor([len(x) for x in batch[0]]).to(device)
# 对输入和目标进行padding
inputs = [torch.LongTensor(x).to(device) for x in batch[0]]
#inputs = torch.from_numpy(np.array(batch)).long().to(device)
inputs = nn.utils.rnn.pad_sequence(inputs, batch_first=True)
targets = [torch.LongTensor(x).to(device) for x in batch[1]]
targets = nn.utils.rnn.pad_sequence(targets, batch_first=True)
# 因为目标中不存在编号为0的单词,所以目标中为0的位置为padding,由此生成mask矩阵
mask = (inputs != 0).float().to(device)
# 在之后的训练中因为还要进行pack_padded_sequence操作,所以在这里按照长度降序排列
lengths, perm_index = lengths.sort(descending=True)
inputs = inputs[perm_index]
targets = targets[perm_index]
mask = mask[perm_index]
return inputs,targets,lengths, mask
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=8,
shuffle=True,
collate_fn=dataset_batches
)
valid_loader = torch.utils.data.DataLoader(
dataset=valid_data,
batch_size=8,
shuffle=True,
collate_fn=dataset_batches
)
test_loader = torch.utils.data.DataLoader(
dataset=test_data,
batch_size=8,
shuffle=True,
collate_fn=dataset_batches
)
设计语言模型网络架构
EMBED_SIZE, HIDDEN_SIZE, DROPOUT = 200,200,0.5
class LSTMLM(nn.Module):
"""语言模型网络架构
Args:
n_words: 词表中的单词数目
n_embed: 词向量维度
n_hidden: LSTM隐含状态的维度
dropout: Dropout概率
"""
def __init__(self, n_words, n_embed=EMBED_SIZE, n_hidden=HIDDEN_SIZE, dropout=DROPOUT):
super(LSTMLM, self).__init__()
self.drop = nn.Dropout(dropout)
self.embed = nn.Embedding(n_words, n_embed)
self.lstm = nn.LSTM(n_embed, n_hidden, batch_first=True)
self.linear = nn.Linear(n_hidden, n_words)
def forward(self, inputs,targets,lengths,mask):
# inputs shape: [batch_size, max_length]
# x_emb shape: [batch_size, max_length, embed_size]
x_emb = self.drop(self.embed(inputs))
x_emb = nn.utils.rnn.pack_padded_sequence(
x_emb, lengths, batch_first=True
)
# 这里LSTM的h_0,c_0使用全0的默认初始化,LSTM层经过后丢弃
x_out, _ = self.lstm(x_emb)
# x_out shape: (batch_size, max_length, hidden_size)
x_out, _ = nn.utils.rnn.pad_packed_sequence(
x_out, batch_first=True
)
x_out = self.linear(x_out)
return x_out
模型初始化,设置优化器为SGD
model = LSTMLM(len(corpus.vocab), 200, 200,0.5)
model.to(device)
# 设置 loss function
criterion = torch.nn.CrossEntropyLoss().to(device)
# 设置 optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=0.03)
import time
start = time.time()
from tensorboardX import SummaryWriter
writer= SummaryWriter('tensorboard_logs/SGD/')
def train_model(model, train_loader , valid_loader ,patience, n_epochs):
best_accuracy = 0
# to track the training loss as the model trains
# initialize the early_stopping object
early_stopping = 0
total_correct, total_words = 0, 0
for epoch in range(1, n_epochs + 1):
starttime = time.time()
print("epoch: ",epoch)
###################
# train the model #
###################
model.train() # prep model for training
for inputs,targets,lengths, mask in train_loader:
outputs = model(inputs, targets, lengths, mask)
# 统计当前正确单词数
total_correct += ((outputs.argmax(dim=-1) == targets) * mask).sum().item()
# 统计当前总预测单词数
total_words += torch.sum(lengths).item()
# 计算模型Mask交叉熵损失
# outputs shape: (batch_size * max_len, vocab_size)
outputs = outputs.view(-1, outputs.size(2))
# targets shape: (batch_size * max_len)
targets = targets.view(-1)
# mask shape: (batch_size * max_len)
mask = mask.view(-1)
train_loss = criterion(outputs, targets) * mask
train_loss = torch.sum(train_loss) / torch.sum(mask)
# 反向传播
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
train_acc = total_correct/total_words
print("train_total_correct_words:",total_correct)
print("train_total_words:", total_words)
print("train_accuracy: = ",train_acc)
total_correct = 0
total_words = 0
######################
# validate the model #
######################
model.eval() # prep model for evaluation
for inputs,targets,lengths, mask in valid_loader:
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(inputs, targets, lengths, mask)
# 统计当前正确单词数
total_correct += ((outputs.argmax(dim=-1) == targets) * mask).sum().item()
# 统计当前总预测单词数
total_words += torch.sum(lengths).item()
# 计算模型Mask交叉熵损失
# outputs shape: (batch_size * max_len, vocab_size)
outputs = outputs.view(-1, outputs.size(2))
# targets shape: (batch_size * max_len)
targets = targets.view(-1)
# mask shape: (batch_size * max_len)
mask = mask.view(-1)
valid_loss = criterion(outputs, targets) * mask
valid_loss = torch.sum(valid_loss) / torch.sum(mask)
print("valid_total_correct_words:",total_correct)
print("valid_total_words:", total_words)
print("valid_accuary:",total_correct/total_words)
valid_acc = total_correct/total_words
total_correct = 0
total_words = 0
tensorboard_logs = {'train_loss':train_loss, 'train_acc':train_acc,'valid_loss':valid_loss, 'valid_acc':valid_acc}
writer.add_scalars('SGD',tensorboard_logs,epoch)
if best_accuracy < valid_acc:
best_accuracy = valid_acc
early_stopping = 0
torch.save(model.state_dict(), 'SGD_checkpoint.pt')
else:
early_stopping += 1
if early_stopping>patience:
print(f'早停在 ({epoch:.6f})...')
break
endtime = time.time()
dtime = endtime - starttime
print("epoch:运行时间:%.2f秒" % (dtime))
writer.close()
train_model(model, train_loader , valid_loader , 5, 30)
end = time.time()
dtime = end - start
print("epoch:运行时间:%.2f秒" % (dtime))
epoch: 1
train_total_correct_words: 5537.0
train_total_words: 65331
train_accuracy: = 0.08475302689381764
valid_total_correct_words: 720.0
valid_total_words: 7957
valid_accuary: 0.09048636420761594
epoch:运行时间:14.34秒
epoch: 2
train_total_correct_words: 9704.0
train_total_words: 65331
train_accuracy: = 0.14853591709908007
valid_total_correct_words: 1537.0
valid_total_words: 7957
valid_accuary: 0.19316325248209124
epoch:运行时间:14.91秒
epoch: 3
train_total_correct_words: 12992.0
train_total_words: 65331
train_accuracy: = 0.19886424515161255
valid_total_correct_words: 1664.0
valid_total_words: 7957
valid_accuary: 0.20912404172426793
epoch:运行时间:14.06秒
epoch: 4
train_total_correct_words: 13731.0
train_total_words: 65331
train_accuracy: = 0.21017587362813978
valid_total_correct_words: 1720.0
valid_total_words: 7957
valid_accuary: 0.21616187005152696
epoch:运行时间:14.50秒
epoch: 5
train_total_correct_words: 14143.0
train_total_words: 65331
train_accuracy: = 0.21648222130382208
valid_total_correct_words: 1751.0
valid_total_words: 7957
valid_accuary: 0.2200578107326882
epoch:运行时间:14.40秒
epoch: 6
train_total_correct_words: 14445.0
train_total_words: 65331
train_accuracy: = 0.22110483537677367
valid_total_correct_words: 1780.0
valid_total_words: 7957
valid_accuary: 0.2237024004021616
epoch:运行时间:13.80秒
epoch: 7
train_total_correct_words: 14776.0
train_total_words: 65331
train_accuracy: = 0.22617134285408153
valid_total_correct_words: 1854.0
valid_total_words: 7957
valid_accuary: 0.23300238783461102
epoch:运行时间:14.21秒
epoch: 8
train_total_correct_words: 15050.0
train_total_words: 65331
train_accuracy: = 0.23036537019179257
valid_total_correct_words: 1885.0
valid_total_words: 7957
valid_accuary: 0.23689832851577228
epoch:运行时间:13.86秒
epoch: 9
train_total_correct_words: 15348.0
train_total_words: 65331
train_accuracy: = 0.23492675758828122
valid_total_correct_words: 1907.0
valid_total_words: 7957
valid_accuary: 0.23966318964433833
epoch:运行时间:13.75秒
epoch: 10
train_total_correct_words: 15489.0
train_total_words: 65331
train_accuracy: = 0.23708499793359966
valid_total_correct_words: 1935.0
valid_total_words: 7957
valid_accuary: 0.24318210380796781
epoch:运行时间:13.73秒
epoch: 11
train_total_correct_words: 15724.0
train_total_words: 65331
train_accuracy: = 0.2406820651757971
valid_total_correct_words: 1975.0
valid_total_words: 7957
valid_accuary: 0.24820912404172427
epoch:运行时间:13.72秒
epoch: 12
train_total_correct_words: 15949.0
train_total_words: 65331
train_accuracy: = 0.24412606572683718
valid_total_correct_words: 2013.0
valid_total_words: 7957
valid_accuary: 0.25298479326379286
epoch:运行时间:14.24秒
epoch: 13
train_total_correct_words: 16133.0
train_total_words: 65331
train_accuracy: = 0.2469424928441322
valid_total_correct_words: 2030.0
valid_total_words: 7957
valid_accuary: 0.2551212768631394
epoch:运行时间:13.84秒
epoch: 14
train_total_correct_words: 16288.0
train_total_words: 65331
train_accuracy: = 0.2493150265570709
valid_total_correct_words: 2047.0
valid_total_words: 7957
valid_accuary: 0.2572577604624859
epoch:运行时间:13.93秒
epoch: 15
train_total_correct_words: 16537.0
train_total_words: 65331
train_accuracy: = 0.2531263871668886
valid_total_correct_words: 2066.0
valid_total_words: 7957
valid_accuary: 0.2596455950735202
epoch:运行时间:13.75秒
epoch: 16
train_total_correct_words: 16644.0
train_total_words: 65331
train_accuracy: = 0.25476420076227213
valid_total_correct_words: 2073.0
valid_total_words: 7957
valid_accuary: 0.26052532361442754
epoch:运行时间:14.08秒
epoch: 17
train_total_correct_words: 16858.0
train_total_words: 65331
train_accuracy: = 0.25803982795303915
valid_total_correct_words: 2097.0
valid_total_words: 7957
valid_accuary: 0.2635415357546814
epoch:运行时间:14.65秒
epoch: 18
train_total_correct_words: 16939.0
train_total_words: 65331
train_accuracy: = 0.25927966815141357
valid_total_correct_words: 2118.0
valid_total_words: 7957
valid_accuary: 0.26618072137740356
epoch:运行时间:14.32秒
epoch: 19
train_total_correct_words: 17057.0
train_total_words: 65331
train_accuracy: = 0.26108585510707016
valid_total_correct_words: 2131.0
valid_total_words: 7957
valid_accuary: 0.2678145029533744
epoch:运行时间:14.50秒
epoch: 20
train_total_correct_words: 17249.0
train_total_words: 65331
train_accuracy: = 0.264024735577291
valid_total_correct_words: 2147.0
valid_total_words: 7957
valid_accuary: 0.26982531104687696
epoch:运行时间:14.67秒
epoch: 21
train_total_correct_words: 17279.0
train_total_words: 65331
train_accuracy: = 0.26448393565076306
valid_total_correct_words: 2170.0
valid_total_words: 7957
valid_accuary: 0.2727158476812869
epoch:运行时间:15.20秒
epoch: 22
train_total_correct_words: 17384.0
train_total_words: 65331
train_accuracy: = 0.26609113590791506
valid_total_correct_words: 2176.0
valid_total_words: 7957
valid_accuary: 0.27346990071635036
epoch:运行时间:14.55秒
epoch: 23
train_total_correct_words: 17515.0
train_total_words: 65331
train_accuracy: = 0.2680963095620762
valid_total_correct_words: 2210.0
valid_total_words: 7957
valid_accuary: 0.2777428679150434
epoch:运行时间:14.76秒
epoch: 24
train_total_correct_words: 17650.0
train_total_words: 65331
train_accuracy: = 0.27016270989270025
valid_total_correct_words: 2192.0
valid_total_words: 7957
valid_accuary: 0.275480708809853
epoch:运行时间:14.17秒
epoch: 25
train_total_correct_words: 17673.0
train_total_words: 65331
train_accuracy: = 0.2705147632823621
valid_total_correct_words: 2206.0
valid_total_words: 7957
valid_accuary: 0.2772401658916677
epoch:运行时间:14.00秒
epoch: 26
train_total_correct_words: 17814.0
train_total_words: 65331
train_accuracy: = 0.2726730036276806
valid_total_correct_words: 2216.0
valid_total_words: 7957
valid_accuary: 0.2784969209501068
epoch:运行时间:13.98秒
epoch: 27
train_total_correct_words: 17864.0
train_total_words: 65331
train_accuracy: = 0.27343833708346726
valid_total_correct_words: 2234.0
valid_total_words: 7957
valid_accuary: 0.2807590800552972
epoch:运行时间:13.80秒
epoch: 28
train_total_correct_words: 17926.0
train_total_words: 65331
train_accuracy: = 0.2743873505686428
valid_total_correct_words: 2226.0
valid_total_words: 7957
valid_accuary: 0.27975367600854595
epoch:运行时间:14.19秒
epoch: 29
train_total_correct_words: 18005.0
train_total_words: 65331
train_accuracy: = 0.27559657742878574
valid_total_correct_words: 2246.0
valid_total_words: 7957
valid_accuary: 0.2822671861254242
epoch:运行时间:14.85秒
epoch: 30
train_total_correct_words: 18166.0
train_total_words: 65331
train_accuracy: = 0.2780609511564189
valid_total_correct_words: 2234.0
valid_total_words: 7957
valid_accuary: 0.2807590800552972
epoch:运行时间:14.83秒
epoch:运行时间:428.50秒
模型初始化,设置优化器为Adam
model = LSTMLM(len(corpus.vocab), 200, 200,0.5)
model.to(device)
# 设置 loss function
criterion = torch.nn.CrossEntropyLoss().to(device)
# 设置 optimizer
optimizer = torch.optim.Adam(model.parameters())
训练模型
import time
start = time.time()
from tensorboardX import SummaryWriter
writer= SummaryWriter('tensorboard_logs/Adam/')
def train_model(model, train_loader , valid_loader ,patience, n_epochs):
best_accuracy = 0
# to track the training loss as the model trains
# initialize the early_stopping object
early_stopping = 0
total_correct, total_words = 0, 0
for epoch in range(1, n_epochs + 1):
starttime = time.time()
print("epoch: ",epoch)
###################
# train the model #
###################
model.train() # prep model for training
for inputs,targets,lengths, mask in train_loader:
outputs = model(inputs, targets, lengths, mask)
# 统计当前正确单词数
total_correct += ((outputs.argmax(dim=-1) == targets) * mask).sum().item()
# 统计当前总预测单词数
total_words += torch.sum(lengths).item()
# 计算模型Mask交叉熵损失
# outputs shape: (batch_size * max_len, vocab_size)
outputs = outputs.view(-1, outputs.size(2))
# targets shape: (batch_size * max_len)
targets = targets.view(-1)
# mask shape: (batch_size * max_len)
mask = mask.view(-1)
train_loss = criterion(outputs, targets) * mask
train_loss = torch.sum(train_loss) / torch.sum(mask)
# 反向传播
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
train_acc = total_correct/total_words
print("train_total_correct_words:",total_correct)
print("train_total_words:", total_words)
print("train_accuracy: = ",train_acc)
total_correct = 0
total_words = 0
######################
# validate the model #
######################
model.eval() # prep model for evaluation
for inputs,targets,lengths, mask in valid_loader:
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(inputs, targets, lengths, mask)
# 统计当前正确单词数
total_correct += ((outputs.argmax(dim=-1) == targets) * mask).sum().item()
# 统计当前总预测单词数
total_words += torch.sum(lengths).item()
# 计算模型Mask交叉熵损失
# outputs shape: (batch_size * max_len, vocab_size)
outputs = outputs.view(-1, outputs.size(2))
# targets shape: (batch_size * max_len)
targets = targets.view(-1)
# mask shape: (batch_size * max_len)
mask = mask.view(-1)
valid_loss = criterion(outputs, targets) * mask
valid_loss = torch.sum(valid_loss) / torch.sum(mask)
print("valid_total_correct_words:",total_correct)
print("valid_total_words:", total_words)
print("valid_accuary:",total_correct/total_words)
valid_acc = total_correct/total_words
total_correct = 0
total_words = 0
tensorboard_logs = {'train_loss':train_loss, 'train_acc':train_acc,'valid_loss':valid_loss, 'valid_acc':valid_acc}
writer.add_scalars('Adam',tensorboard_logs,epoch)
if best_accuracy < valid_acc:
best_accuracy = valid_acc
early_stopping = 0
torch.save(model.state_dict(), 'Adam_checkpoint.pt')
else:
early_stopping += 1
if early_stopping>patience:
print(f'早停在 ({epoch:.6f})...')
break
endtime = time.time()
dtime = endtime - starttime
print("epoch:运行时间:%.2f秒" % (dtime))
writer.close()
train_model(model, train_loader , valid_loader , 5, 30)
end = time.time()
dtime = end - start
print("epoch:运行时间:%.2f秒" % (dtime))
epoch: 1
train_total_correct_words: 15271.0
train_total_words: 65331
train_accuracy: = 0.2337481440663697
valid_total_correct_words: 2209.0
valid_total_words: 7957
valid_accuary: 0.27761719240919946
epoch:运行时间:16.91秒
epoch: 2
train_total_correct_words: 18142.0
train_total_words: 65331
train_accuracy: = 0.27769359109764125
valid_total_correct_words: 2343.0
valid_total_words: 7957
valid_accuary: 0.2944577101922835
epoch:运行时间:17.91秒
epoch: 3
train_total_correct_words: 19074.0
train_total_words: 65331
train_accuracy: = 0.29195940671350507
valid_total_correct_words: 2400.0
valid_total_words: 7957
valid_accuary: 0.30162121402538644
epoch:运行时间:17.01秒
epoch: 4
train_total_correct_words: 19738.0
train_total_words: 65331
train_accuracy: = 0.3021230350063523
valid_total_correct_words: 2471.0
valid_total_words: 7957
valid_accuary: 0.3105441749403041
epoch:运行时间:17.58秒
epoch: 5
train_total_correct_words: 20348.0
train_total_words: 65331
train_accuracy: = 0.31146010316694983
valid_total_correct_words: 2455.0
valid_total_words: 7957
valid_accuary: 0.30853336684680155
epoch:运行时间:17.46秒
epoch: 6
train_total_correct_words: 20802.0
train_total_words: 65331
train_accuracy: = 0.31840933094549295
valid_total_correct_words: 2502.0
valid_total_words: 7957
valid_accuary: 0.3144401156214654
epoch:运行时间:16.96秒
epoch: 7
train_total_correct_words: 21174.0
train_total_words: 65331
train_accuracy: = 0.3241034118565459
valid_total_correct_words: 2497.0
valid_total_words: 7957
valid_accuary: 0.31381173809224583
epoch:运行时间:16.58秒
epoch: 8
train_total_correct_words: 21535.0
train_total_words: 65331
train_accuracy: = 0.32962911940732575
valid_total_correct_words: 2522.0
valid_total_words: 7957
valid_accuary: 0.31695362573834357
epoch:运行时间:17.03秒
epoch: 9
train_total_correct_words: 21979.0
train_total_words: 65331
train_accuracy: = 0.33642528049471154
valid_total_correct_words: 2515.0
valid_total_words: 7957
valid_accuary: 0.3160738971974362
epoch:运行时间:17.31秒
epoch: 10
train_total_correct_words: 22275.0
train_total_words: 65331
train_accuracy: = 0.34095605455296873
valid_total_correct_words: 2559.0
valid_total_words: 7957
valid_accuary: 0.3216036194545683
epoch:运行时间:16.66秒
epoch: 11
train_total_correct_words: 22513.0
train_total_words: 65331
train_accuracy: = 0.34459904180251333
valid_total_correct_words: 2549.0
valid_total_words: 7957
valid_accuary: 0.3203468643961292
epoch:运行时间:16.64秒
epoch: 12
train_total_correct_words: 22867.0
train_total_words: 65331
train_accuracy: = 0.3500176026694831
valid_total_correct_words: 2536.0
valid_total_words: 7957
valid_accuary: 0.31871308282015837
epoch:运行时间:16.73秒
epoch: 13
train_total_correct_words: 23171.0
train_total_words: 65331
train_accuracy: = 0.35467083008066613
valid_total_correct_words: 2557.0
valid_total_words: 7957
valid_accuary: 0.3213522684428805
epoch:运行时间:17.06秒
epoch: 14
train_total_correct_words: 23371.0
train_total_words: 65331
train_accuracy: = 0.3577321639038129
valid_total_correct_words: 2547.0
valid_total_words: 7957
valid_accuary: 0.32009551338444137
epoch:运行时间:16.86秒
epoch: 15
train_total_correct_words: 23818.0
train_total_words: 65331
train_accuracy: = 0.36457424499854585
valid_total_correct_words: 2568.0
valid_total_words: 7957
valid_accuary: 0.3227346990071635
epoch:运行时间:16.88秒
epoch: 16
train_total_correct_words: 24152.0
train_total_words: 65331
train_accuracy: = 0.3696866724832009
valid_total_correct_words: 2548.0
valid_total_words: 7957
valid_accuary: 0.3202211888902853
epoch:运行时间:16.94秒
epoch: 17
train_total_correct_words: 24319.0
train_total_words: 65331
train_accuracy: = 0.3722428862255285
valid_total_correct_words: 2515.0
valid_total_words: 7957
valid_accuary: 0.3160738971974362
epoch:运行时间:16.66秒
epoch: 18
train_total_correct_words: 24489.0
train_total_words: 65331
train_accuracy: = 0.3748450199752032
valid_total_correct_words: 2541.0
valid_total_words: 7957
valid_accuary: 0.3193414603493779
epoch:运行时间:16.83秒
epoch: 19
train_total_correct_words: 24790.0
train_total_words: 65331
train_accuracy: = 0.37945232737903906
valid_total_correct_words: 2533.0
valid_total_words: 7957
valid_accuary: 0.3183360563026266
epoch:运行时间:16.48秒
epoch: 20
train_total_correct_words: 25091.0
train_total_words: 65331
train_accuracy: = 0.3840596347828749
valid_total_correct_words: 2523.0
valid_total_words: 7957
valid_accuary: 0.3170793012441875
epoch:运行时间:17.19秒
epoch: 21
train_total_correct_words: 25373.0
train_total_words: 65331
train_accuracy: = 0.3883761154735118
valid_total_correct_words: 2507.0
valid_total_words: 7957
valid_accuary: 0.3150684931506849
早停在 (21.000000)...
epoch:运行时间:362.91秒
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-4gGLqquF-1594534900809)(attachment:image.png)]
由图可见, Adam 优化器训练模型收敛速度比 SGD要快很多. Adam 只用了3个epoch val_acc 就达到了30%以上.
在测试集上预测
correct_to_wrong_num = dict()
correct_to_wrong = dict()
def prediction(test_loader,count_wrong_words):
best_model = LSTMLM(len(corpus.vocab), EMBED_SIZE, HIDDEN_SIZE, DROPOUT)
best_model.to(device)
best_model.load_state_dict(torch.load('Adam_checkpoint.pt'))
best_model.eval()
total_correct, total_words = 0, 0
for inputs, targets, lengths, mask in test_loader:
# forward pass: compute predicted outputs by passing inputs to the model
outputs = best_model(inputs, targets, lengths, mask)
# 统计当前正确单词数
total_correct += ((outputs.argmax(dim=-1) == targets) * mask).sum().item()
batches_sentences = (outputs.argmax(dim=-1) == targets) * mask
if count_wrong_words:
for i,sentence in enumerate(batches_sentences):
for j,word in enumerate(sentence):
if j > lengths[i]:
break
if word == 0:
wrong_word = itos[outputs.argmax(dim=-1)[i][j].item()]
if itos[targets[i][j].item()] not in correct_to_wrong:
correct_to_wrong_num[itos[targets[i][j].item()]] = 1
correct_to_wrong[itos[targets[i][j].item()]] = [wrong_word]
else:
correct_to_wrong[itos[targets[i][j].item()]] = correct_to_wrong[itos[targets[i][j].item()]]+[wrong_word]
correct_to_wrong_num[itos[targets[i][j].item()]] += 1
# 统计当前总预测单词数
total_words += torch.sum(lengths).item()
accuary = total_correct / total_words
return accuary
count_wrong_words = False
accuary = prediction(test_loader,count_wrong_words)
print("accuary: ",accuary)
accuary: 0.3175331927038094
输出预测错误的单词
for i,item in enumerate(correct_to_wrong):
print("num: ",i)
print("词: ",item," 预测值: ",set(correct_to_wrong[item]))
if i == 100:
break
num: 0
词: However 预测值: {'He'}
num: 1
词: Bob 预测值: {',', 'Sue', 'to', 'walking', 'drove', 'and', 'agreed', 'the', 'he', 'they', 'him', 'loved', 'if', 'asked', 'work', 'his', 'friends', 'had', 'eye', 'it', 'go', 'a', 'He', 'out', 'she', '.', 'much', 'of', 'yes', 'bought', 'good', 'invited', 'her', 'went', 'was', 'see', 'that', 'but'}
num: 2
词: learned 预测值: {'decided', 'would', 'got', 'could', 'was', 'had'}
num: 3
词: that 预测值: {',', 'to', 'what', 'Bob', 'and', 'about', 'the', 'he', 'they', 'him', 'better', 'if', 'in', 'work', 'speeding', 'offer', 'his', 'lonely', 'at', 'it', 'anyone', 'a', 'out', 'she', "'s", 'for', 'how', '.', 'of', 'when', 'bought', 'her', 'homework', 'home', 'yes'}
num: 4
词: get 预测值: {'to', 'know', 'the', 'him', 'give', 'would', 'be', 'have', 'care', 'his', 'miss', 'wait', 'want', 'eat', 'take', 'not', 'come', 'enjoy', 'go', 'seen', 'out', 'look', 'make', 'do', 'see', 'bought', 'her', 'hear', 'find', 'feel', 'buy', 'that', 'say', 'bake'}
num: 5
词: a 预测值: {'up', 'mother', 'own', 'school', 'him', 'healthy', 'TV', 'enough', 'care', 'months', 'half', 'any', 'some', 'himself', 'losing', 'hours', '.', 'for', 'of', 'hard', 'was', 'anything', 'extremely', 'now', 'what', 'Bob', 'and', 'happy', 'most', 'with', 'fast', 'boring', 'other', 'too', 'town', 'down', 'very', 'no', 'gone', 'at', 'it', 'go', "'s", 'safe', 'well', 'nervous', 'time', 'see', 'into', 'trouble', 'prom', 'but', ',', 'Sue', 'to', 'lost', 'away', 'bike', 'play', 'fighting', 'they', 'better', 'one', 'fun', 'take', 'not', 'late', 'been', 'lots', 'their', 'money', 'going', 'lessons', 'coworkers', 'bad', 'something', 'the', 'he', 'television', 'so', 'in', 'videos', 'his', 'began', 'advice', 'out', 'she', 'there', 'together', 'until', 'how', 'her', 'went', 'then', 'ready', 'home', 'that'}
num: 6
词: shot 预测值: {'to', 'all', 'house', 'time', 'bike', 'went', 'was', 'pet', 'more', 'ball', 'new'}
num: 7
词: She 预测值: {'He'}
num: 8
词: had 预测值: {',', 'wasn', 'lost', 'takes', 'to', 'ever', 'Bob', 'didn', 'liked', 'wanted', 'he', 'mother', 'convinced', 'get', 'day', 'told', 'would', 'one', 'could', 'suggested', 'both', 'took', 'is', 'decided', 'found', 'been', 'met', 'grabbed', 'she', 'said', "'s", 'turned', 'walked', 'realized', '.', 'has', 'of', 'were', 'bought', 'noticed', 'got', 'left', 'went', 'turns', 'was', 'then', 'managed'}
num: 9
词: brought 预测值: {'to', 'told', 'were', 'decided', 'got', 'went', 'was', 'had'}
num: 10
词: picture 预测值: {'lot', 'whole', 'shirt', 'time', 'grocery', 'long', 'doll', '.'}
num: 11
词: family 预测值: {'dog', 'mom', 'Bob', 'movie', 'he', 'mother', 'own', 'dad', 'parents', 'day', 'house', 'other', 'teacher', 'life', 'be', 'friends', 'father', 'score', 'test', 'grandmother', 'week', 'wife', 'race', 'husband', '.', 'rent', 'friend'}
num: 12
词: with 预测值: {',', 'to', 'alone', 'happy', 'and', 'the', 'up', '5', 'before', 'against', 'party', 'leaving', 'later', 'during', 'his', 'down', 'on', 'late', 'it', 'a', 'out', 'video', 'for', '.', 'how', 'local', 'of', 'hard', 'her', 'was', 'home', 'falling', 'that', 'but'}
num: 13
词: <s> 预测值: {'<s>'}
num: 14
词: Sue 预测值: {',', 'to', 'ever', 'Bob', 'and', 'the', 'up', 'he', 'they', 'him', 'if', 'his', 'possible', 'very', 'took', 'it', 'a', 'He', 'she', 'for', '.', 'sick', 'of', 'came', 'bought', 'left', 'her', 'went', 'was', 'home', 'that', 'yes'}
num: 15
词: realized 预测值: {',', 'told', 'decided', 'found', 'noticed', 'could', 'was', 'he', 'began', 'had'}
num: 16
词: back 预测值: {'to', 'over', 'away', 'car', 'and', 'up', 'along', 'he', 'still', 'own', 'in', 'moving', 'care', 'friends', 'backyard', 'both', 'down', 'on', 'go', 'a', 'out', 'together', 'for', '.', 'around', 'bought', 'home', 'but'}
num: 17
词: His 预测值: {'He'}
num: 18
词: neighbor 预测值: {'man', 'house', 'water', 'dog', 'car', 'lake', 'one', 'body', 'movie', 'result', 'store', 'friends', 'mother', 'girl'}
num: 19
词: helped 预测值: {'told', 'noticed', 'went', 'was', 'ran', 'had', '.'}
num: 20
词: start 预测值: {'take', 'do', 'go', 'the', 'assignment', 'be', 'turn', 'find', 'get', '.', 'new'}
num: 21
词: the 预测值: {'up', 'still', 'get', 'him', 'school', 'enough', 'first', 'care', 'any', 'store', 'sure', 'himself', 'hours', '.', 'for', 'make', 'of', 'hard', 'water', 'do', 'rent', 'loudly', 'Bob', 'and', 'happy', 'practicing', 'most', 'with', 'too', 'friends', 'town', 'at', 'it', 'go', 'a', "'s", 'well', 'old', 'nervous', 'time', 'when', 'bought', 'see', 'into', 'trouble', 'find', 'buy', 'funny', ',', 'Sue', 'to', 'over', 'grass', 'better', 'asked', 'moving', 'dry', 'be', 'night', 'not', 'late', 'He', 'back', 'lots', 'their', 'books', 'addition', 'going', "'t", 'mean', 'everywhere', 'walking', 'bad', 'about', 'something', 'he', 'so', 'day', 'work', 'his', 'coffee', 'began', 'bored', 'this', 'out', 'she', 'waiting', 'how', 'were', 'ten', 'her', 'went', 'ready', 'really', 'home', 'that', 'yes'}
num: 22
词: car 预测值: {',', 'neighbor', 'month', 'dog', 'neighbors', 'mom', 'lake', 'laptop', 'bike', 'up', 'accident', 'mother', 'own', 'essay', 'number', 'class', 'new', 'power', 'snake', 'ingredients', 'house', 'life', 'list', 'classroom', 'way', 'bus', 'person', 'friends', 'best', 'park', 'grandmother', 'night', 'band', 'large', 'game', 'job', 'week', 'room', 'arm', 'a', 'store', 'business', 'salad', 'suit', 'siblings', 'recipe', '.', 'next', 'man', 'her', 'few', 'girl', 'long'}
num: 23
词: decided 预测值: {'Sue', 'agreed', 'the', 'didn', 'liked', 'he', 'told', 'asked', 'would', 'could', 'suggested', 'began', 'had', 'both', 'found', 'been', 'met', 'she', "'s", 'turned', 'walked', 'realized', 'were', 'got', 'went', 'turns', 'was', 'find'}
num: 24
词: big 预测值: {'great', 'math', 'dog', 'happy', 'mother', 'class', 'day', 'year', 'long', 'new', 'cold', 'piece', 'first', 'friends', 'bite', 'test', 'poor', 'a', 'small', '.', 'lot', 'going', 'trouble', 'few', 'date', 'street'}
num: 25
词: dinner 预测值: {'job', 'week', 'great', 'lot', 'to', 'house', 'it', 'food', 'enough', 'the', 'a', 'her', 'been', 'his', 'he', 'test', 'school', '.'}
num: 26
词: 's 预测值: {',', 'if', 'to', 'Sue', 'told', 'decided', 'Bob', 'and', '6', 'didn', 'went', 'was', 'she', 'had', 'that', 'both', '.'}
num: 27
词: son 预测值: {'job', 'power', 'great', 'dog', 'sister', 'house', 'mom', 'arm', 'mother', 'child', 'own', 'friend', 'new'}
num: 28
词: Christmas 预测值: {'job', 'great', 'local', 'the', 'a', 'her', 'He', 'walk', 'his', 'he', 'own', 'day', '.'}
num: 29
词: ! 预测值: {'to', 'of', 'ever', 'this', 'and', 'about', 'have', 'the', 'a', 'night', 'his', 'that', "'s", 'band', 'than', '.'}
num: 30
词: went 预测值: {',', 'Sue', 'lost', 'to', 'tried', 'Bob', 'didn', 'wanted', 'he', 'day', 'told', 'asked', 'fed', 'would', 'could', 'began', 'are', 'had', 'both', 'decided', 'found', 'picked', 'met', 'she', "'s", 'made', 'turned', 'returned', '.', 'were', 'came', 'bought', 'got', 'opened', 'was'}
num: 31
词: gym 预测值: {'restaurant', 'to', 'house', 'dog', 'car', 'store', 'friends', 'mother', 'park', '.'}
num: 32
词: . 预测值: {'ever', 'up', 'mother', 'him', 'house', 'TV', 'asleep', 'enough', 'during', 'first', 'why', 'anyone', 'card', 'more', 'special', 'for', 'of', 'early', 'ring', 'days', 'was', 'able', 'long', 'know', 'alone', 'mom', 'Bob', 'and', 'with', 'fit', 'than', 'department', 'other', 'friends', 'favorite', 'bit', 'at', 'missing', 'it', 'go', 'share', 'a', "'s", 'as', 'time', 'into', 'food', 'cook', 'across', 'writing', 'find', 'friend', 'but', ',', 'Sue', 'to', 'over', 'away', 'report', 'done', 'one', 'be', 'have', 'bills', 'minutes', 'night', 'not', 'money', 'much', 'going', "'t", 'named', '!', 'hear', 'feel', 'about', 'something', 'the', 'before', 'he', 'girls', 'so', 'day', 'year', 'in', 'videos', 'work', 'life', 'his', 'doing', 'want', 'out', 'she', 'there', 'together', 'how', 'all', 'off', 'her', 'else', 'home', 'that'}
num: 33
词: So 预测值: {'He'}
num: 34
词: will 预测值: {'to', 'decided', 'would', 'was', 'had'}
num: 35
词: go 预测值: {'know', 'prepare', 'the', 'listen', 'quit', 'own', 'get', 'give', 'told', 'be', 'have', 'his', 'miss', 'afford', 'want', 'believe', 'take', 'not', 'come', 'found', 'make', 'were', "'t", 'do', 'see', 'bought', 'cook', 'always', 'her', 'tell', 'was', 'went', 'find', 'watch', 'buy', 'say'}
num: 36
词: bed 预测值: {'house', 'ground', 'way', 'room', 'the', 'wallet', 'her', 'store', 'his', 'phone', 'bathroom', 'class', '.', 'pair'}
num: 37
词: , 预测值: {'Sue', 'to', 'lost', 'away', 'Bob', 'and', 'about', 'up', 'the', 'liked', 'he', 'mother', 'with', 'better', 'later', 'cream', 'one', 'be', 'friends', 'his', '</s>', 'every', 'on', 'surprise', 'very', 'down', 'at', 'card', 'a', 'out', 'she', 'money', 'special', 'for', '.', 'as', 'of', 'alarm', 'into', 'left', 'her', 'went', 'was', 'ready', 'that'}
num: 38
词: so 预测值: {'to', 'Bob', 'and', 'happy', 'about', 'the', 'worth', 'none', 'before', 'he', 'with', 'year', 'terrible', 'in', 'too', 'his', 'upset', 'games', 'every', 'very', 'no', 'gone', 'it', 'a', 'together', 'for', '.', 'afraid', 'of', 'hard', 'going', 'nervous', 'time', 'into', 'her', 'ready', 'like', 'but', 'funny'}
num: 39
词: can 预测值: {'perfect', 'house', 'were', 'decided', 'and', 'could', 'was', 'he', 'bite', 'had', 'realized', '.'}
num: 40
词: be 预测值: {'know', 'never', 'the', 'get', 'need', 'have', 'miss', 'not', 'come', 'go', 'finish', '.', 'make', "'t", 'see', 'always', 'was', 'find', 'buy', 'say'}
num: 41
词: for 预测值: {',', 'to', 'what', 'away', 'Bob', 'and', 'about', 'up', 'the', '5', 'with', 'year', 'him', 'if', 'in', 'place', 'too', 'care', 'his', 'friends', 'every', 'on', 'at', 'it', 'this', 'forward', 'card', 'a', 'team', 'out', 'she', "'s", 'together', 'until', 'money', '.', 'all', 'of', 'hard', 'around', 'everyday', 'into', 'everywhere', 'rent', 'her', 'lessons', 'that'}
num: 42
词: another 预测值: {',', 'to', 'what', 'it', 'Bob', 'the', 'a', 'her', 'his', 'out', 'find', 'him', 'for', '.'}
num: 43
词: day 预测值: {',', 'dog', 'rest', 'own', 'essay', 'class', 'year', 'than', 'power', 'country', 'morning', 'friends', 'his', 'phone', 'best', 'night', 'week', 'book', 'store', 'wife', 'money', '.', 'next', 'lot', 'of', 'city', 'time', 'was', 'date'}
num: 44
词: of 预测值: {',', 'over', 'to', 'and', 'about', 'before', 'he', 'cats', 'day', 'weeks', 'friends', 'his', 'times', 'it', 'she', 'more', 'money', '.', 'off', '!', 'her', 'went', 'was', 'kids', 'friend', 'that'}
num: 45
词: fun 预测值: {'great', 'to', 'lot', 'gone', 'it', 'time', 'Bob', 'enough', 'a', 'been', 'his', 'she', 'more', 'money'}
num: 46
词: All 预测值: {'He'}
num: 47
词: she 预测值: {',', 'wasn', 'Sue', 'to', 'ever', 'Bob', 'and', 'about', 'something', 'the', 'didn', 'he', 'they', 'than', 'better', 'in', 'minutes', 'down', 'very', 'it', 'but', 'a', 'He', 'out', 'video', 'for', 'how', '.', 'much', 'well', 'of', 'bought', 'seeing', 'went', 'her', 'was', 'else', 'that', 'yes'}
num: 48
词: could 预测值: {'to', 'broke', 'didn', 'wanted', 'he', 'get', 'day', 'loved', 'if', 'told', 'would', 'had', 'night', 'needed', 'she', 'were', 'got', 'went', 'was'}
num: 49
词: do 预测值: {'car', 'the', 'play', 'turn', 'convinced', 'get', 'be', 'have', 'care', 'his', 'miss', 'want', 'not', 'cry', 'go', 'a', 'look', '.', 'make', 'all', "'t", 'see', 'her', 'hear', 'find', 'buy', 'bake'}
num: 50
词: was 预测值: {',', 'wasn', 'Sue', 'to', 'what', 'takes', 'Bob', 'and', 'the', 'didn', 'ran', 'he', 'wanted', 'they', 'parents', 'day', 'art', 'in', 'told', 'would', 'fired', 'could', 'his', 'began', 'had', 'took', 'is', 'not', 'decided', 'kept', 'found', 'started', 'it', 'rang', 'met', 'she', 'said', "'s", 'made', 'realized', '.', 'were', 'came', 'into', 'bought', 'got', 'noticed', 'went', 'opened', 'her', 'arrived', 'saw', 'that', 'insisted', 'but'}
num: 51
词: ask 预测值: {'take', 'do', 'get', 'go', 'have', 'the', 'be', 'miss', 'see', 'afford', 'more'}
num: 52
词: about 预测值: {',', 'to', 'mom', 'Bob', 'and', 'meal', 'the', 'he', 'so', 'if', 'terrible', 'his', 'very', 'it', 'anyone', 'a', 'sure', 'out', 'she', '.', 'of', 'wrong', 'her', 'that'}
num: 53
词: He 预测值: {'He', 'he'}
num: 54
词: walked 预测值: {',', 'to', 'Bob', 'and', 'wanted', 'they', 'told', 'could', 'began', 'had', 'both', 'kept', 'decided', 'met', 'complained', 'grabbed', 'she', "'s", 'realized', 'were', 'bought', 'got', 'laughed', 'went', 'was'}
num: 55
词: while 预测值: {',', 'to', 'month', 'over', 'Bob', 'and', 'he', 'that', 'long', 'him', '.'}
num: 56
词: when 预测值: {'to', 'what', 'and', 'about', 'the', 'up', 'he', 'year', 'why', 'at', 'it', 'a', "'s", 'for', '.', 'of', 'ring', 'bought', 'her', 'that'}
num: 57
词: it 预测值: {',', 'Sue', 'to', 'bad', 'away', 'Bob', 'and', 'about', 'the', 'up', '5', 'none', 'he', 'against', 'so', 'him', 'work', 'one', 'care', 'his', 'fun', 'not', 'this', 'any', 'a', 'lots', 'out', 'she', 'getting', 'money', '.', 'how', 'much', 'hard', 'bought', 'left', 'her', 'went', 'anything', 'home', 'that', 'yes'}
num: 58
词: started 预测值: {'took', 'all', 'asked', 'were', 'told', 'bought', 'and', 'got', 'went', 'they', 'was', 'he', 'began', 'see', 'pulled', '.'}
num: 59
词: When 预测值: {'He'}
num: 60
词: looked 预测值: {'asked', 'told', 'were', "'t", 'found', 'would', 'bought', 'got', 'noticed', 'always', 'could', 'went', 'was', 'had', 'night', '.'}
num: 61
词: him 预测值: {'Sue', 'to', 'Bob', 'up', 'the', 'everyone', 'he', 'them', 'be', 'his', 'it', 'being', 'a', 'out', 'surgery', '.', 'how', 'much', 'all', 'wrong', 'good', 'her', 'was'}
num: 62
词: he 预测值: {',', 'Sue', 'to', 'what', 'dog', 'walking', 'ever', 'Bob', 'and', 'about', 'from', 'the', 'didn', 'still', 'they', 'get', 'if', 'in', 'terrible', 'asked', 'work', 'his', 'grandmother', 'at', 'it', 'desk', 'a', 'He', 'grabbed', 'out', 'she', 'for', '.', 'how', 'next', 'of', 'hard', 'were', 'yes', "'t", 'into', 'bought', 'went', 'her', 'was', 'expensive', 'that', 'but'}
num: 63
词: at 预测值: {',', 'to', 'walking', 'away', 'falling', 'happy', 'and', 'agreed', 'about', 'up', 'allowed', 'the', 'fighting', 'surprised', 'he', 'with', 'so', 'better', 'in', 'offer', 'by', 'during', 'bus', 'his', 'upset', 'planning', 'down', 'grades', 'on', 'both', 'run', 'not', 'it', 'a', 'she', "'s", 'more', 'for', 'as', '.', 'well', 'all', 'of', 'were', 'going', 'came', 'around', 'everywhere', 'her', 'was', 'else', 'friend', 'able', 'home', 'studying', 'dating', 'that'}
num: 64
词: her 预测值: {',', 'now', 'Sue', 'to', 'what', 'ever', 'Bob', 'and', 'about', 'something', 'the', 'up', 'horses', 'he', 'television', 'they', 'get', 'so', 'him', 'day', 'fast', 'if', 'in', 'work', 'be', 'enough', 'his', 'friends', 'town', 'sent', 'began', 'on', 'very', 'not', 'it', 'anyone', 'go', 'a', 'out', 'she', 'more', 'for', '.', 'how', 'of', 'weight', 'early', 'see', 'into', 'bought', 'time', 'seeing', 'went', 'was', 'anything', 'home', 'buy', 'that'}
num: 65
词: friend 预测值: {'dog', 'neighbors', 'mom', 'car', 'laptop', 'up', 'mother', 'own', 'dad', 'parents', 'new', 'birthday', 'house', 'one', 'body', 'friends', 'favorite', 'job', 'boyfriend', 'small', 'wife', 'for', '.', 'next', 'lot', 'few', 'child', 'girl', 'large', 'clothes'}
num: 66
词: help 预测值: {'to', 'the', 'get', 'new', 'give', 'in', 'have', 'be', 'friends', 'family', 'wait', 'want', 'pay', 'go', 'a', 'money', 'make', '.', 'of', 'her', 'see', 'buy'}
num: 67
词: clean 预测值: {'of', 'come', 'fresh', 'see', 'happy', 'Bob', 'go', 'the', 'up', 'be', 'her', 'a', 'have', 'find', 'enter', 'money', 'new'}
num: 68
词: But 预测值: {'He'}
num: 69
词: knew 预测值: {'got', 'noticed', 'could', 'turns', 'went', 'was', 'wanted', 'only', 'had', 'needed', '.'}
num: 70
词: bored 预测值: {'missing', 'moving', 'nervous', 'tall', 'happy', 'and', 'a', 'went', 'so', '.'}
num: 71
词: buys 预测值: {'was'}
num: 72
词: phone 预测值: {'dog', 'bike', 'sweater', 'mother', 'own', 'number', 'day', 'pair', 'power', 'new', 'house', 'life', 'bus', 'bite', 'grandmother', 'job', 'book', 'same', 'store', 'road', 'wife', 'porch', 'ground', 'lot', 'whole', 'friend', 'street'}
num: 73
词: cried 预测值: {'was', 'bought', 'could'}
num: 74
词: afterwards 预测值: {',', 'to', 'not', 'and', 'the', 'he', '.'}
num: 75
词: good 预测值: {'great', 'bad', 'happy', 'the', 'he', 'so', 'fast', 'new', 'better', 'terrible', 'his', 'friends', 'that', 'game', 'week', 'it', 'a', 'sure', 'race', 'sick', '.', 'well', 'lot', 'hard', 'going', 'nervous', 'extremely', 'food', 'cup', 'big', 'hospital', 'expensive', 'long', 'but'}
num: 76
词: to 预测值: {'allowed', 'up', 'didn', 'mother', 'own', 'get', 'him', 'cream', 'enough', 'attention', 'young', 'on', 'grades', 'dancing', 'special', 'for', '.', 'of', 'early', 'everyday', 'was', 'dating', 'mom', 'Bob', 'and', 'with', 'if', 'too', 'friends', 'down', 'very', 'at', 'it', 'a', 'past', 'around', 'time', 'into', 'across', 'but', ',', 'over', 'loose', 'dog', 'away', 'drove', 'bike', 'they', 'complete', 'one', 'fun', 'minutes', 'had', 'times', 'working', 'been', 'money', 'much', 'good', 'about', 'the', 'before', 'he', 'class', 'day', 'in', 'his', 'cereal', 'promised', 'out', 'she', 'door', 'together', 'waiting', 'how', 'all', 'her', 'else', 'home', 'that'}
num: 77
词: share 预测值: {'see', 'take', 'go'}
num: 78
词: candy 预测值: {'game', 'lot', 'whole', 'book', 'other', 'life', 'the', 'store', 'own', 'fire', 'ball', '.'}
num: 79
词: class 预测值: {'dog', 'the', 'movie', 'mother', 'own', 'essay', 'parents', 'number', 'day', 'school', 'house', 'other', 'teacher', 'country', 'his', 'friends', 'backyard', 'room', 'road', 'wife', 'leash', 'music', '.', 'addition', 'her', 'went', 'was', 'hospital', 'find', 'friend', 'long', 'fire'}
num: 80
词: Unfortunately 预测值: {'He'}
num: 81
词: has 预测值: {'told', 'decided', 'grateful', 'would', 'offered', 'looks', 'went', 'was', 'hired', 'had', 'realized'}
num: 82
词: no 预测值: {'now', 'Sue', 'to', 'Bob', 'the', 'he', 'so', 'have', 'his', 'not', 'it', 'a', 'been', 'thrilled', 'she', 'went', 'her', 'was', 'that', 'yes'}
num: 83
词: idea 预测值: {'A', 'time', 'storm', 'money', '.'}
num: 84
词: take 预测值: {'to', 'the', 'play', 'quit', 'get', 'him', 'be', 'come', 'go', 'a', 'stay', 'make', 'see', 'bought', 'always', 'was', 'hear', 'help', 'find', 'buy'}
num: 85
词: dog 预测值: {'mom', 'the', 'he', 'mother', 'own', 'class', 'day', 'ball', 'new', 'house', 'other', 'truck', 'morning', 'teacher', 'decision', 'vet', 'bus', 'first', 'friends', 'bite', 'second', 'large', 'job', 'game', 'train', 'a', 'store', 'wife', 'music', 'headphones', 'puppy', 'kitten', 'money', '.', 'perfect', 'man', 'whole', 'lot', 'black', 'city', 'alarm', 'food', 'friend', 'date'}
num: 86
词: been 预测值: {'to', 'gone', 'taken', 'seen', 'a', 'liked', 'promised'}
num: 87
词: going 预测值: {'to', 'the', 'he', 'so', 'cheap', 'moving', 'too', 'his', 'upset', 'had', 'skating', 'both', 'very', 'not', 'at', 'it', 'paying', 'this', 'go', 'out', 'waiting', '.', 'taking', 'all', 'nervous', 'time', 'her', 'able', 'falling', 'ask'}
num: 88
词: around 预测值: {',', 'afraid', 'him', 'to', 'in', 'of', 'away', 'into', 'everywhere', 'and', 'up', 'a', 'her', 'home', 'down', '.'}
num: 89
词: long 预测值: {'great', 'month', 'years', 'he', 'fast', 'ball', 'cold', 'new', 'his', 'possible', 'walk', 'very', 'it', 'paying', 'a', 'small', 'difficult', 'she', '.', 'lot', 'hard', 'few', 'cup', 'large', 'cruise'}
num: 90
词: took 预测值: {'Sue', 'Bob', 'looked', 'ran', 'wanted', 'he', 'fell', 'asked', 'told', 'could', 'his', 'began', 'had', 'decided', 'rode', 'met', 'said', "'s", '.', 'were', 'bought', 'left', 'noticed', 'went', 'was', 'saw'}
num: 91
词: many 预测值: {'no', 'Sue', 'to', 'it', 'work', 'enough', 'the', 'a', 'her', 'most', 'his', 'he', 'she', 'expensive', 'long', 'on', 'hours'}
num: 92
词: from 预测值: {',', 'him', 'to', 'off', 'into', 'and', 'got', 'the', 'up', 'her', 'a', 'out', 'walked', '.'}
num: 93
词: music 预测值: {'whole', 'dog', 'rest', 'morning', 'car', 'Bob', 'go', 'the', 'a', 'store', 'team', 'his', 'phone', 'race', 'favorite', 'test', 'highway', '.'}
num: 94
词: teacher 预测值: {'dog', 'mom', 'car', 'mother', 'day', 'new', 'power', 'first', 'family', 'best', 'favorite', 'test', 'backyard', '.', 'lot', 'cake', 'kids', 'classmates', 'friend', 'that'}
num: 95
词: trying 预测值: {'to', 'in', 'missing', 'nervous', 'her', 'fighting', 'find', 'so', '.'}
num: 96
词: not 预测值: {'to', 'happy', 'and', 'the', 'didn', 'he', 'so', 'leaving', 'too', 'be', 'his', 'upset', 'both', 'very', 'gone', 'missing', 'it', 'this', 'go', 'a', 'been', 'she', 'safe', 'for', '.', 'make', 'all', 'going', 'nervous', "'t", 'went', 'her', 'say'}
num: 97
词: hit 预测值: {',', 'found', 'go', 'a', 'went', 'was', 'grabbed', 'he', 'able', 'home', 'saw', 'get'}
num: 98
词: buy 预测值: {'find', 'to', 'take', 'not', 'do', 'go', 'the', 'be', 'leave', 'look', 'see', 'get', 'make'}
num: 99
词: one 预测值: {',', 'to', 'dog', 'loudly', 'and', 'laptop', 'the', 'up', 'he', 'with', 'parents', 'day', 'pair', 'truck', 'dress', 'enough', 'first', 'pie', 'his', 'upset', 'best', 'began', 'idea', 'had', 'game', 'it', 'matter', 'any', 'a', 'out', 'she', 'more', '.', 'of', 'nervous', 'kind', 'went', 'her', 'few', 'girl', '$'}
num: 100
词: himself 预测值: {'ever', 'when', 'this', 'the', 'a', 'play', 'his', 'anything', '.'}