[Releases · MzeroMiko/VMamba · GitHub](https://github.com/MzeroMiko/VMamba/releases)
安装torchvision等:
尝试版本1:wins上python=3.10, torch=1.13.1 成功
pip install torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
尝试版本2 :wins上python=3.10, torch=2.1.0
pip install torchvision==0.16.0+cu118 torchaudio==2.1.0 --extra-index-url https://download.pytorch.org/whl/cu118
模型pipline:
熟悉模型结构:
import os
from functools import partial
from typing import Callable
import torch
from torch import nn
from torch.utils import checkpoint
from mmengine.model import BaseModule
from mmdet.registry import MODELS as MODELS_MMDET
from mmseg.registry import MODELS as MODELS_MMSEG
def import_abspy(name="models", path="classification/"):
import sys
import importlib
path = os.path.abspath(path)
assert os.path.isdir(path)
sys.path.insert(0, path)
module = importlib.import_module(name)
sys.path.pop(0)
#打印
print("detection imported module: {}".format(module.__name__))
return module
def main():
build = import_abspy(
"models",
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"),
)
Backbone_VSSM = build.vmamba.Backbone_VSSM
class MM_VSSM(BaseModule, Backbone_VSSM):
def __init__(self, *args, **kwargs):
BaseModule.__init__(self)
Backbone_VSSM.__init__(self, *args, **kwargs)
# 测试初始化 MM_VSSM 类
model = MM_VSSM()
# 打印模型的类名,确保导入和初始化正确
print("Model class:", model.__class__.__name__)
print("Model:", model)
print(model.__dict__)
if __name__ == "__main__":
main()
print("Model:", model)
Model: MM_VSSM(
(patch_embed): Sequential( VMamba1427
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): Permute()
(2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
)
(layers): ModuleList(
(0): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0071428571827709675)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
)
(1): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.014285714365541935)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.02142857201397419)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
)
(2): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwi