论文
本文的研究成果在项目的实现过程中起到了至关重要的作用。以下是本文的详细信息:
文章链接: VISION PERMUTATOR: A PERMUTABLE MLP-LIKE ARCHITECTURE FOR VISUAL RECOGNITION
模块
创新点
在多个方面进行了创新和改进,以下是项目的主要创新点:
代码
代码链接 https://github.com/houqb/VisionPermutator/blob/main
模块
核心算法和模型训练。
import torch.nn as nn
class VisionPermutator(nn.Module):
""" Vision Permutator
视觉排列器,是一个用于图像处理的神经网络模型
"""
def __init__(self, layers, img_size=224, patch_size=4, in_chans=3, num_classes=1000,
embed_dims=None, transitions=None, segment_dim=None, mlp_ratios=None, skip_lam=1.0,
qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.,
norm_layer=nn.LayerNorm, mlp_fn=WeightedPermuteMLP):
super().__init__()
self.num_classes = num_classes
# 将输入图像分割成小块,并进行初步的嵌入
self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dims[0])
network = []
# 构建整个网络的架构
for i in range(len(layers)):
# 添加基础模块到网络中
stage = basic_blocks(embed_dims[i], i, layers, segment_dim[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
qk_scale=qk_scale, attn_drop=attn_drop_rate, drop_path_rate=drop_path_rate, norm_layer=norm_layer, skip_lam=skip_lam,
mlp_fn=mlp_fn)
network.append(stage)
if i >= len(layers) - 1:
break
if transitions[i] or embed_dims[i] != embed_dims[i+1]:
# 如果需要转换,添加下采样层
patch_size = 2