tensor.permute(dim_index)

本文介绍如何使用PyTorch中的permute方法改变张量的维度排列,通过实例展示将三维张量从(2,3,5)调整为(5,2,3)的过程。
部署运行你感兴趣的模型镜像

改变张量维度排列

>>> x = torch.randn(2, 3, 5)
>>> x.size()
torch.Size([2, 3, 5])
>>> x.permute(2, 0, 1).size()
torch.Size([5, 2, 3])

 

您可能感兴趣的与本文相关的镜像

PyTorch 2.5

PyTorch 2.5

PyTorch
Cuda

PyTorch 是一个开源的 Python 机器学习库,基于 Torch 库,底层由 C++ 实现,应用于人工智能领域,如计算机视觉和自然语言处理

class Zh2EnDataloader (BaseDataloader): def_init_(self,src_filename,trg_filename, src_vocab,trg_vocab,batch_size,shuffle,logger): super()._init_() self.src_filename = src_filename self.trg_filename -trg_filename self.src_vocab= src_vocab self.trg vocab= trg vocab self.batch_size=batch_size self.shuffle= shuffle self.logger-logger self.src_lines, self.trg_lines = self._read_data() def_len_(self): return len(self.src_lines) def_getitem_(self,index): src_data=self.src_lines[index] trg_data =self.trg_lines[index] max_src_len-0 max_trg_len=0 src_batch_id=[] trg_batch_id=[] for src_tokens,trg_tokens in zip(src_data,trg_data): max_src_len-len(src_tokens) if len(src_tokens)>max_src_len else max_src_len max_trg_len=len(trg_tokens) if len(trg_tokens) >max_trg_len else max_trg_len src_batch_id.append([self.src_vocab.word2id[word] if word in self.src_vocab.word2id else self.src_vocab.word2id['cunk>' for word in src_tokens]) trg_batch_id.append([self.trg_vocab.word2id[word] if word in self.trg_vocab.word2id else self.trg_vocab.word2id['cunk>'] for word in trg_tokens]) src = torch.LongTensorself.batch_size,max_src_len).fill_(self.src_vocab.word2id[ '<pad>']) trg= torch.LongTensor(self.batch_size,max_trg_len).fill_(self.trg_vocab.word2id['<pad>']) for i in range(self.batch_size): src[i, :len(src_batch_id[i])]= torch.LongTensor(src_batch_id[i]) trg[i,:len(trg_batch_id[i])]=torch.LongTensor(trg_batch_id[i]) return src,trg def_read_data(self): self.logger.debug"-----------read data-----------") with open(self.src_filename,'r',encoding='utf-8') as f: src_lines=np.array(f.readlines()) with open(self.trg_filename,'r',encoding='utf-8')as f: trg_lines-np.array(f.readlines()) assert len(src_lines)== len(trg_lines) if self.shuffle: idx = np.random.permutation(len(src_lines)) src_lines= src_lines[idx] trg_lines=trg_lines[idx] self.logger.debug("{}and{}hasdata{}". format(self.src_filename, self.trg_filename,len(src_lines))) return self._preprocess_data(src_lines,trg_lines) def _preprocess_data(self, src_lines, trg_lines): self.logger.debug("--- --------preprocess data--- ------") src_lines =[['<sos>']+line.strip().split('t')+['<eos>'] for line in src_lines] trg_lines-[['<sos>']+line.strip().split('\t')+['<eos>']forlinein trg_lines] src_lines-[src_lines[i:i+self.batch_size] fori in range(0,len(src_lines),self.batch_size)] trg_lines=[trg_lines[i:i+self.batch_size] fori in range(0,len(trg_lines),self.batch_size)] return src_lines,trg_linesclass Encoder(BaseModel): def_init_(self, vocab_size, h_dim, pf_dim, n_heads, n_layers, dropout, device, max_seq_len=200): super()._init_() self.n_layers-n_layers self.h dim=h dim self.device=device self.word_embeddings = WordEmbeddings(vocab_size, h_dim) self.pe=PositionEmbeddings(max_seq_len,h_dim) self.layers-nn.ModuleList() for i in range(n_layers): self.layers.append(EncoderLayer(h_dim,n_heads,pf_dim,dropout,device)) self.dropout=nn.Dropout(dropout) self.scale = torch.sqrt(torch. Float Tensor([h_dim])).to(device) def forward(self, src, src_mask): output = self.word_embeddings(src)*self.scale src_len=src.shape[1] pos - torch.arange(0, src_len).unsqueeze(0).repeat(src.shape[0],1).to(self.device) output=self.dropout(output+self.pe(pos)) #output=self.pe(output) for i in range(self.n_layers): output = self.layers[i](output, src_mask) return outputclass EncoderLayer(BaseModel): def_init_(self,h_dim, n_heads,pf_dim,dropout,device): super()._init_() self.attention = MultiHeadAttentionLayer(h_dim, n_heads, dropout, device) self.attention_layer_norm= nn.LayerNorm(h_dim) self.ff_layer_norm=nn.LayerNorm(h_dim) self.positionwise_feedforward = PositionwisefeedforwardLayer(h_dim, pf_dim, dropout) self.attention_dropout - nn.Dropout(dropout) self.ff_dropout-nn.Dropout(dropout) def forward(self,src,src mask): att_output = self.attention(src, src, src, src_mask) #res output - self.attention_layer_norm(src +self.attention_dropout(att_output)) ff_output = self.positionwise_feedforward(output) output = self.ff_layer_norm(output + self.ff_dropoutff_output)) return outputclass Decoder(BaseModel): def_init_(self,vocab_size,h_dim,pf_dim,n_heads,n_layers,dropout,device,max_seq_len=200): super()._init_() self.n_layers=n_layers self.h dim=h dim self.device = device self.word_embeddings = wordEmbeddingsvocab_size,h_dim) #self.pe-PositionEncoder(h_dim,device,dropout-dropout) self.pe-PositionEmbeddings(max_seq_len,h_dim) self.layers=nn.Modulelist() self.dropout-nn.Dropout(dropout) self.scale - torch.sqrt(torch.FloatTensor[h_dim))).to(device) for i in range(n_layers): self.layers.append(DecoderLayer(h_dim, pf_dim, n_heads, dropout,device)) def forward(self,target, encoder_output, src_mask,target_mask): output=self.word_embeddings(target)*self.scale tar_len=target.shape[1] pos = torch.arange(,tar_len).unsqueeze().repeat(target.shape(0],1).to(self.device) for i in range(self.n_layers): output = self.layers[i](output, encoder_output, src_mask, target_mask) return output lass DecoderLayer(BaseModel): def_init_(self,h_dim,pf_dim,n_heads,dropout,device): super()._init_() self.self_attention = MultiHeadAttentiontayer(h_dim, n_heads, dropout, device) self.attention - Mult iHeadAttentionlayer(h_dim, n_heads, dropout,device) self.positionwise_feedforward = Positionwisefeedforwardlayer(h_dim, pf_dim, dropout) self.self_attention_layer_norm= nn.LayerNorm(h_dim) self.attention_layer_norm= nn.LayerNorm(h_dim) self.ff_layer_norm=nn.LayerNorm(h_dim) self.self_attention_dropout = nn. Dropout (dropout) self.attention_dropout = nn.Dropout (dropout) self.ff_dropout=nn.Dropout(dropout) def forward(self,target,encoder_output,src_mask,target_mask): self_attention_output - self.self_attention(target, target, target, target_mask) output = self.self_attention_layer_norm(target + self,self_attention_dropout(self_attention_output attention output = self.attentionoutput,encoder output,encoder output,srcmask) output = self.attention_layer_normoutput + self.attention_dropout (attention_output)) ff_output= self.positionwise_feedforward(output) output - self.ff_layer_norm(ff_output + self.ff_dropout(ff_output)) return output class PositionwiseFeedforwardLayer (BaseModel): def _init_(self, h_dim, pf_dim, dropout): super()._init_() self.fc_1 = nn.Linear(h_dim, pf_dim) self.fc_2 = nn.Linear(pf_dim, h_dim) self.dropout = nn.Dropout (dropout) def forward(self, inputs): inputs = torch.relu(self.fc_1(inputs)) inputs = self.dropout(inputs) inputs = self.fc_2(inputs) return inputs class Transformer(BaseModel): def_init_(self, src_vocab_size, target_vocab_size, h_dim, enc_pf_dim,dec_pf_dim,enc_n_layers,dec_nlayers, super()._init_() self.encoder = Encoder enc_n_heads,dec_n_heads, enc_dropout,dec_dropout,device,**kwargs): (src_vocab_size,h_dim, enc_pf_dim, enc_n_heads, enc_n_layers, enc_dropout, device) self.decoder = Decoder (target_vocab_size,h_dim,dec_pf_dim,dec_n_heads,dec_n_layers,dec_dropout,device) self.fc= nn.Linear(h_dim,target_vocab_size) def forward(self, src,target,src_mask,target_mask): encoder_output-self.encoder(src,src_mask) output = self.decoder(target, encoder_output, src_mask, target_mask) output-self.fc(output) return outp def_train_epoch(self, epoch): self.model.train() total_loss=0 for idx,(src,trg) in enumerate(self.data_loader): src= src.to(self.device) trg-trg.to(self.device) src_mask = make_src_mask(src, self.data_loader.src_vocab, self.device) trg_mask = make_trg_mask(trg[:,:-1], self.data_loader.trg_vocab, self.device) self.optimizer.zero_grad() output = self.model(src, trg[:,:-1],src_mask,trg_mask) # output=[batch_size,target_len-1,target_vocab_size] #trg-<sos>,token1,token2,token3,... # output=token1,token2,token3,...,<eos> output_dim= output.shape[-1] output - output.contiguous().view(-1,output_dim) # output=[batch size *target_len-1,target_vocab_size] trg = trg:,1:].contiguous().view(-1) #target=[batch_size*targey_len-1] loss = self.criterion(output,trg) loss.backward() #可调参数1可以改为其他值进行尝试 torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) self.optimizer.step() total_loss+=loss.item() if idx % self.log_step ==0: self.logger.info('Train Epoch:{},{}/{}({:.of)%),Loss:{:.6f)'.format(epoch, idx, len(self.data_loader), idx*100/len(self.data_loader), loss.item() )def_valid_epoch(self): self.model.eval() val_loss=0 pred=[] labels=[] with torch.no_grad(): for idx, (src, trg) in enumerate(self.valid_data_loader): src= src.to(self.device) trg - trg.to(self.device) src_mask - make_src_mask(src, self.valid_data_loader.src_vocab,self.device) trg_mask = make_trg_mask(trg[:,:-1],self.data_loader.trg_vocab,self.device) output = self.model(src,trg[:,:-1],src_mask,trg_mask) output= F.log_softmax(output,dim=-1) output_dim= output.shape[-1] output = output.contiguous().view(-1,output_dim) # output=[batch size*target_len-1,target_vocab_size] trg=trg[:,1:].contiguous().view(-1) val_loss += self.criterion(output, trg) return val_loss/len(self.valid_data_loader) def translate_sentence(sentence,model,device, zh_vocab, en_vocab, zh_tokenizer, max_len -100): model.eval() print(tokens) tokens = zh_tokenizer.tokenizer(sentence) tokens=['<sos>']+tokens+['<eos>'] tokens=[zh_vocab.word2id[word] for word in tokens] src_tensor = torch. LongTensor(tokens).unsqueeze(0).to(device) src_mask =make_src_mask(src_tensor,zh_vocab,device) with torch.no_grad(): trg=[en_vocab.word2id['<sos>']] for i in range(max_len): trg_tensor - torch.LongTensor(trg).unsqueeze(0).to(device) enc_src = model.encoder(src_tensor, src_mask) trg_mask=make_trg_mask(trg_tensor,en_vocab,device) with torch.no_grad(): output - model.decoder(trg_tensor,enc_src,src_mask,trg_mask) output= model.fc(output) pred_token = output.argmax(2):,-1].item() trg.append(pred_token) if pred_ token == en_vocab.word2id['<eos>']: break return trg_tokens trg_tokens-[en_vocab.id2word[idx] for idx in trg]依据上述代码完成实验,给出完整代码
最新发布
12-11
门控代码是从这一版改过来的import torch import torch.nn as nn import torch.nn.functional as F from basicsr.archs.arch_util import trunc_normal_ from basicsr.archs.modules.sparse_ops import SMM_QmK, SMM_AmV class DimensionAdapter(nn.Module): def __init__(self, in_dim, out_dim): super().__init__() self.in_dim = in_dim self.out_dim = out_dim self.adapter = nn.Linear(in_dim, out_dim) self._logged = False def forward(self, x): orig_shape = x.shape if len(orig_shape) == 3: if orig_shape[-1] != self.out_dim: if not self._logged: self._logged = True return self.adapter(x) return x elif len(orig_shape) == 4: if orig_shape[1] != self.out_dim: b, c, h, w = orig_shape if not self._logged: self._logged = True x = x.permute(0, 2, 3, 1).reshape(b, h * w, c) x = self.adapter(x) x = x.reshape(b, h, w, self.out_dim).permute(0, 3, 1, 2) return x return x return x class AdaptiveWindowAttention(nn.Module): """自适应窗口注意力,用于替换原始 WindowAttention。""" def __init__(self, dim, layer_id, window_size, num_heads, num_topk, qkv_bias=True, attn_drop=0., proj_drop=0., use_edge_gate=False, gate_mean_ratio=0.3, eps=1e-6): super().__init__() self.dim = dim self.layer_id = layer_id self.window_size = (window_size, window_size) if isinstance(window_size, int) else tuple(window_size) self.window_area = self.window_size[0] * self.window_size[1] self.num_heads = int(num_heads) self.head_dim = dim // self.num_heads self.scale = self.head_dim ** -0.5 self.eps = eps if isinstance(num_topk, (list, tuple)): self.topk = int(num_topk[layer_id]) else: self.topk = int(num_topk) table_size = (2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1) bias_channels = self.num_heads if self.dim > 100 else 1 self.relative_position_bias_table = nn.Parameter( torch.zeros(table_size, bias_channels)) trunc_normal_(self.relative_position_bias_table, std=0.02) self.softmax = nn.Softmax(dim=-1) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) self.use_edge_gate = use_edge_gate self.gate_mean_ratio = gate_mean_ratio if self.use_edge_gate: self.gate_blur = nn.AvgPool2d(3, stride=1, padding=1) self.edge_gate = nn.Sequential( nn.Conv2d(self.num_heads, self.num_heads, kernel_size=3, padding=1, groups=self.num_heads, bias=False), nn.BatchNorm2d(self.num_heads), nn.Sigmoid() ) else: self.gate_blur = None self.edge_gate = None def forward(self, x, pfa_list=None, rpi=None, mask=None, shift=0): b_, n, c4 = x.shape if c4 != 4 * self.dim: raise ValueError( f"[AdaptiveWindowAttention] 期望输入最后维度为 4*dim ({4 * self.dim}),实际为 {c4}。") qkvp = x.view(b_, n, 4, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4) q, k, v, v_lepe = qkvp[0], qkvp[1], qkvp[2], qkvp[3] q = q * self.scale pfa_values, pfa_indices = self._prepare_pfa_lists(pfa_list, shift) current_indices = pfa_indices[shift] if current_indices is not None and current_indices.shape[-1] > n: current_indices = None pfa_indices[shift] = None if current_indices is None: attn = self._dense_attention(q, k, rpi, mask, shift) else: attn = self._sparse_attention(q, k, current_indices, rpi) attn = self.softmax(attn) attn = self.attn_drop(attn) prev_values = pfa_values[shift] if prev_values is not None and prev_values.shape == attn.shape: attn = attn * prev_values attn = attn / (attn.sum(dim=-1, keepdim=True) + self.eps) gate_map = None if self.use_edge_gate: attn, gate_map = self._apply_edge_gate(attn, b_, n) if self.topk < attn.shape[-1]: top_vals, top_inds = torch.topk(attn, self.topk, dim=-1, largest=True, sorted=False) attn = top_vals # 关键:后续直接用 top-k pfa_values[shift] = attn.detach() # 不再转换浮点精度 if current_indices is None: pfa_indices[shift] = top_inds.detach() else: pfa_indices[shift] = torch.gather(current_indices, dim=-1, index=top_inds).detach() current_indices = pfa_indices[shift] else: pfa_values[shift] = attn.detach() if current_indices is None: pfa_indices[shift] = None current_indices = pfa_indices[shift] if current_indices is None: attn_out = torch.matmul(attn, v) else: topk = current_indices.shape[-1] attn_flat = attn.reshape(b_ * self.num_heads, n, topk) v_flat = v.reshape(b_ * self.num_heads, n, self.head_dim) smm_idx = current_indices.reshape(b_ * self.num_heads, n, topk).int() attn_out = SMM_AmV.apply(attn_flat, v_flat, smm_idx) attn_out = attn_out.reshape(b_, self.num_heads, n, self.head_dim) attn_out = attn_out + v_lepe attn_out = attn_out.transpose(1, 2).reshape(b_, n, self.dim) attn_out = self.proj(attn_out) attn_out = self.proj_drop(attn_out) return attn_out, pfa_values, pfa_indices, gate_map # 其余辅助函数保持不变 def _prepare_pfa_lists(self, pfa_list, shift): if pfa_list is None or not isinstance(pfa_list, list): pfa_values, pfa_indices = [], [] else: pfa_values, pfa_indices = pfa_list pfa_values = pfa_values if pfa_values is not None else [] pfa_indices = pfa_indices if pfa_indices is not None else [] while len(pfa_values) <= shift: pfa_values.append(None) while len(pfa_indices) <= shift: pfa_indices.append(None) return pfa_values, pfa_indices def _dense_attention(self, q, k, rpi, mask, shift): attn = torch.matmul(q, k.transpose(-2, -1)) if rpi is not None: bias = self._relative_position_bias(rpi, device=q.device) attn = attn + bias if shift and mask is not None: nw = mask.shape[0] attn = attn.view(attn.shape[0] // nw, nw, self.num_heads, attn.shape[2], attn.shape[3]) attn = attn + mask.unsqueeze(1).unsqueeze(0) attn = attn.view(-1, self.num_heads, attn.shape[-2], attn.shape[-1]) return attn def _sparse_attention(self, q, k, indices, rpi): b, heads, n, head_dim = q.shape topk = indices.shape[-1] q_flat = q.reshape(b * heads, n, head_dim) k_flat = k.reshape(b * heads, n, head_dim) smm_idx = indices.reshape(b * heads, n, topk).int() attn = SMM_QmK.apply(q_flat, k_flat, smm_idx).reshape(b, heads, n, topk) if rpi is not None: full_bias = self._relative_position_bias(rpi, device=q.device) full_bias = full_bias.expand(b, -1, -1, -1) bias = torch.gather(full_bias, dim=-1, index=indices) attn = attn + bias return attn def _relative_position_bias(self, rpi, device): bias = self.relative_position_bias_table[rpi.view(-1)] bias = bias.view(self.window_area, self.window_area, -1).permute(2, 0, 1).contiguous() if bias.shape[0] == 1: bias = bias.expand(self.num_heads, -1, -1) return bias.unsqueeze(0).to(device) def _apply_edge_gate(self, attn, b_, n): gate_map = attn.var(dim=-1, keepdim=True) gate_min = gate_map.min(dim=-2, keepdim=True)[0] gate_max = gate_map.max(dim=-2, keepdim=True)[0] gate_map = (gate_map - gate_min) / (gate_max - gate_min + self.eps) mean_map = attn.mean(dim=-1, keepdim=True) gate_map = (1 - self.gate_mean_ratio) * gate_map + self.gate_mean_ratio * mean_map if n == self.window_area and self.edge_gate is not None: gate_2d = gate_map.view(b_, self.num_heads, self.window_size[0], self.window_size[1]) gate_2d = self.gate_blur(gate_2d) gate_map = self.edge_gate(gate_2d).view(b_, self.num_heads, n, 1) else: gate_map = gate_map.transpose(-1, -2) gate_map = F.avg_pool1d(gate_map, kernel_size=3, stride=1, padding=1) gate_map = gate_map.transpose(-1, -2) attn = attn * (1.0 + gate_map) attn = attn / (attn.sum(dim=-1, keepdim=True) + self.eps) return attn, gate_map.detach()这是初版
09-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值