class Zh2EnDataloader (BaseDataloader): def_init_(self,src_filename,trg_filename, src_vocab,trg_vocab,batch_size,shuffle,logger):
super()._init_()
self.src_filename = src_filename
self.trg_filename -trg_filename self.src_vocab= src_vocab
self.trg vocab= trg vocab
self.batch_size=batch_size self.shuffle= shuffle
self.logger-logger self.src_lines, self.trg_lines = self._read_data()
def_len_(self): return len(self.src_lines)
def_getitem_(self,index): src_data=self.src_lines[index]
trg_data =self.trg_lines[index]
max_src_len-0 max_trg_len=0
src_batch_id=[] trg_batch_id=[]
for src_tokens,trg_tokens in zip(src_data,trg_data):
max_src_len-len(src_tokens) if len(src_tokens)>max_src_len else max_src_len max_trg_len=len(trg_tokens) if len(trg_tokens) >max_trg_len else max_trg_len src_batch_id.append([self.src_vocab.word2id[word]
if word in self.src_vocab.word2id else self.src_vocab.word2id['cunk>' for word in src_tokens]) trg_batch_id.append([self.trg_vocab.word2id[word] if word in self.trg_vocab.word2id else self.trg_vocab.word2id['cunk>'] for word in trg_tokens])
src = torch.LongTensorself.batch_size,max_src_len).fill_(self.src_vocab.word2id[ '<pad>']) trg= torch.LongTensor(self.batch_size,max_trg_len).fill_(self.trg_vocab.word2id['<pad>'])
for i in range(self.batch_size): src[i, :len(src_batch_id[i])]= torch.LongTensor(src_batch_id[i])
trg[i,:len(trg_batch_id[i])]=torch.LongTensor(trg_batch_id[i])
return src,trg
def_read_data(self): self.logger.debug"-----------read data-----------")
with open(self.src_filename,'r',encoding='utf-8') as f:
src_lines=np.array(f.readlines())
with open(self.trg_filename,'r',encoding='utf-8')as f:
trg_lines-np.array(f.readlines())
assert len(src_lines)== len(trg_lines) if self.shuffle: idx = np.random.permutation(len(src_lines))
src_lines= src_lines[idx] trg_lines=trg_lines[idx]
self.logger.debug("{}and{}hasdata{}".
format(self.src_filename, self.trg_filename,len(src_lines))) return self._preprocess_data(src_lines,trg_lines)
def _preprocess_data(self, src_lines, trg_lines):
self.logger.debug("--- --------preprocess data--- ------")
src_lines =[['<sos>']+line.strip().split('t')+['<eos>'] for line in src_lines]
trg_lines-[['<sos>']+line.strip().split('\t')+['<eos>']forlinein trg_lines]
src_lines-[src_lines[i:i+self.batch_size] fori in range(0,len(src_lines),self.batch_size)]
trg_lines=[trg_lines[i:i+self.batch_size] fori in range(0,len(trg_lines),self.batch_size)]
return src_lines,trg_linesclass
Encoder(BaseModel): def_init_(self, vocab_size, h_dim, pf_dim, n_heads, n_layers, dropout, device, max_seq_len=200):
super()._init_()
self.n_layers-n_layers
self.h dim=h dim
self.device=device
self.word_embeddings = WordEmbeddings(vocab_size, h_dim)
self.pe=PositionEmbeddings(max_seq_len,h_dim)
self.layers-nn.ModuleList()
for i in range(n_layers): self.layers.append(EncoderLayer(h_dim,n_heads,pf_dim,dropout,device))
self.dropout=nn.Dropout(dropout)
self.scale = torch.sqrt(torch. Float Tensor([h_dim])).to(device)
def forward(self, src, src_mask): output = self.word_embeddings(src)*self.scale
src_len=src.shape[1]
pos - torch.arange(0, src_len).unsqueeze(0).repeat(src.shape[0],1).to(self.device)
output=self.dropout(output+self.pe(pos))
#output=self.pe(output)
for i in range(self.n_layers): output = self.layers[i](output, src_mask)
return outputclass
EncoderLayer(BaseModel): def_init_(self,h_dim, n_heads,pf_dim,dropout,device):
super()._init_()
self.attention = MultiHeadAttentionLayer(h_dim, n_heads, dropout, device)
self.attention_layer_norm= nn.LayerNorm(h_dim)
self.ff_layer_norm=nn.LayerNorm(h_dim)
self.positionwise_feedforward = PositionwisefeedforwardLayer(h_dim, pf_dim, dropout)
self.attention_dropout - nn.Dropout(dropout) self.ff_dropout-nn.Dropout(dropout)
def forward(self,src,src mask):
att_output = self.attention(src, src, src, src_mask)
#res
output - self.attention_layer_norm(src +self.attention_dropout(att_output))
ff_output = self.positionwise_feedforward(output)
output = self.ff_layer_norm(output + self.ff_dropoutff_output))
return outputclass
Decoder(BaseModel): def_init_(self,vocab_size,h_dim,pf_dim,n_heads,n_layers,dropout,device,max_seq_len=200):
super()._init_()
self.n_layers=n_layers
self.h dim=h dim
self.device = device self.word_embeddings = wordEmbeddingsvocab_size,h_dim)
#self.pe-PositionEncoder(h_dim,device,dropout-dropout) self.pe-PositionEmbeddings(max_seq_len,h_dim)
self.layers=nn.Modulelist()
self.dropout-nn.Dropout(dropout) self.scale - torch.sqrt(torch.FloatTensor[h_dim))).to(device)
for i in range(n_layers): self.layers.append(DecoderLayer(h_dim, pf_dim, n_heads, dropout,device))
def forward(self,target, encoder_output, src_mask,target_mask):
output=self.word_embeddings(target)*self.scale
tar_len=target.shape[1]
pos = torch.arange(,tar_len).unsqueeze().repeat(target.shape(0],1).to(self.device)
for i in range(self.n_layers):
output = self.layers[i](output, encoder_output, src_mask, target_mask)
return output
lass DecoderLayer(BaseModel): def_init_(self,h_dim,pf_dim,n_heads,dropout,device):
super()._init_()
self.self_attention = MultiHeadAttentiontayer(h_dim, n_heads, dropout, device) self.attention - Mult iHeadAttentionlayer(h_dim, n_heads, dropout,device) self.positionwise_feedforward = Positionwisefeedforwardlayer(h_dim, pf_dim, dropout)
self.self_attention_layer_norm= nn.LayerNorm(h_dim) self.attention_layer_norm= nn.LayerNorm(h_dim)
self.ff_layer_norm=nn.LayerNorm(h_dim)
self.self_attention_dropout = nn. Dropout (dropout) self.attention_dropout = nn.Dropout (dropout)
self.ff_dropout=nn.Dropout(dropout)
def forward(self,target,encoder_output,src_mask,target_mask):
self_attention_output - self.self_attention(target, target, target, target_mask)
output = self.self_attention_layer_norm(target + self,self_attention_dropout(self_attention_output
attention output = self.attentionoutput,encoder output,encoder output,srcmask)
output = self.attention_layer_normoutput + self.attention_dropout (attention_output))
ff_output= self.positionwise_feedforward(output)
output - self.ff_layer_norm(ff_output + self.ff_dropout(ff_output))
return output
class PositionwiseFeedforwardLayer (BaseModel):
def _init_(self, h_dim, pf_dim, dropout): super()._init_()
self.fc_1 = nn.Linear(h_dim, pf_dim)
self.fc_2 = nn.Linear(pf_dim, h_dim)
self.dropout = nn.Dropout (dropout)
def forward(self, inputs):
inputs = torch.relu(self.fc_1(inputs)) inputs = self.dropout(inputs) inputs = self.fc_2(inputs)
return inputs
class Transformer(BaseModel): def_init_(self, src_vocab_size, target_vocab_size, h_dim,
enc_pf_dim,dec_pf_dim,enc_n_layers,dec_nlayers,
super()._init_() self.encoder = Encoder enc_n_heads,dec_n_heads, enc_dropout,dec_dropout,device,**kwargs):
(src_vocab_size,h_dim, enc_pf_dim, enc_n_heads, enc_n_layers, enc_dropout, device) self.decoder = Decoder
(target_vocab_size,h_dim,dec_pf_dim,dec_n_heads,dec_n_layers,dec_dropout,device)
self.fc= nn.Linear(h_dim,target_vocab_size)
def forward(self, src,target,src_mask,target_mask):
encoder_output-self.encoder(src,src_mask)
output = self.decoder(target, encoder_output, src_mask, target_mask)
output-self.fc(output)
return outp
def_train_epoch(self, epoch): self.model.train()
total_loss=0
for idx,(src,trg) in enumerate(self.data_loader):
src= src.to(self.device)
trg-trg.to(self.device)
src_mask = make_src_mask(src, self.data_loader.src_vocab, self.device)
trg_mask = make_trg_mask(trg[:,:-1], self.data_loader.trg_vocab, self.device)
self.optimizer.zero_grad()
output = self.model(src, trg[:,:-1],src_mask,trg_mask)
# output=[batch_size,target_len-1,target_vocab_size] #trg-<sos>,token1,token2,token3,...
# output=token1,token2,token3,...,<eos>
output_dim= output.shape[-1]
output - output.contiguous().view(-1,output_dim) # output=[batch size *target_len-1,target_vocab_size]
trg = trg:,1:].contiguous().view(-1)
#target=[batch_size*targey_len-1]
loss = self.criterion(output,trg)
loss.backward()
#可调参数1可以改为其他值进行尝试
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) self.optimizer.step() total_loss+=loss.item() if idx % self.log_step ==0: self.logger.info('Train Epoch:{},{}/{}({:.of)%),Loss:{:.6f)'.format(epoch,
idx,
len(self.data_loader),
idx*100/len(self.data_loader), loss.item()
)def_valid_epoch(self):
self.model.eval()
val_loss=0
pred=[]
labels=[] with torch.no_grad():
for idx, (src, trg) in enumerate(self.valid_data_loader):
src= src.to(self.device)
trg - trg.to(self.device)
src_mask - make_src_mask(src, self.valid_data_loader.src_vocab,self.device) trg_mask = make_trg_mask(trg[:,:-1],self.data_loader.trg_vocab,self.device)
output = self.model(src,trg[:,:-1],src_mask,trg_mask)
output= F.log_softmax(output,dim=-1)
output_dim= output.shape[-1]
output = output.contiguous().view(-1,output_dim) # output=[batch size*target_len-1,target_vocab_size]
trg=trg[:,1:].contiguous().view(-1)
val_loss += self.criterion(output, trg)
return val_loss/len(self.valid_data_loader)
def translate_sentence(sentence,model,device, zh_vocab, en_vocab, zh_tokenizer, max_len -100):
model.eval()
print(tokens) tokens = zh_tokenizer.tokenizer(sentence) tokens=['<sos>']+tokens+['<eos>']
tokens=[zh_vocab.word2id[word] for word in tokens]
src_tensor = torch. LongTensor(tokens).unsqueeze(0).to(device)
src_mask =make_src_mask(src_tensor,zh_vocab,device)
with torch.no_grad():
trg=[en_vocab.word2id['<sos>']] for i in range(max_len): trg_tensor - torch.LongTensor(trg).unsqueeze(0).to(device) enc_src = model.encoder(src_tensor, src_mask)
trg_mask=make_trg_mask(trg_tensor,en_vocab,device)
with torch.no_grad():
output - model.decoder(trg_tensor,enc_src,src_mask,trg_mask) output= model.fc(output)
pred_token = output.argmax(2):,-1].item()
trg.append(pred_token)
if pred_ token == en_vocab.word2id['<eos>']:
break
return trg_tokens trg_tokens-[en_vocab.id2word[idx] for idx in trg]依据上述代码完成实验,给出完整代码
最新发布