caffe模型转pytorch模型

转换基本流程:

1)创建pytorch的网络结构模型;

2)利用caffe来读取其存储的预训练模型,用于读取caffe模型的参数;

3)遍历caffe加载的模型参数;

4)对一些指定的key值,需要进行相应的处理和转换;

5)对修改后的层名(key值),利用numpy之间的转换来实现加载;

6)对相应层进行参数(feature)进行比较;

以下以SE_Resnet50为例,将caffe上的模型转换到pytorch上;

1)创建pytorch的网络结构模型:


class Resnet50(Module):
    def __init__(self, embedding_size = 512, class_num=0):
        super(Resnet50, self).__init__()
        self.conv0 = Conv2d(3, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
        self.bn0 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        self.relu0 = PReLU(64)

        self.stage1_unit1_bn1 =  BatchNorm2d(64, eps=2e-5, momentum=0.9)#stage1_unit1_bn1
        self.stage1_unit1_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
        self.stage1_unit1_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)

        self.stage1_unit1_relu1 = PReLU(64)
        self.stage1_unit1_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1, 1), bias=False)
        self.stage1_unit1_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)

        # self.stage1_unit1_bn3_scale
        self.stage1_unit1_se_pool1 = AdaptiveAvgPool2d(1)
        # self.stage1_unit1_se_pool1 = AvgPool2d(3, stride=1)
        self.stage1_unit1_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit1_se_relu1 = PReLU(4)
        self.stage1_unit1_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit1_se_sigmoid = Sigmoid()

        self.stage1_unit1_conv1sc = Conv2d(64, out_channels=64, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
        self.stage1_unit1_sc = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit1_sc_scale #relu
        # self._plus0 =  #axpy

        self.stage1_unit2_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit2_bn1_scale
        self.stage1_unit2_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage1_unit2_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit2_bn2_scale
        self.stage1_unit2_relu1 = PReLU(64)
        self.stage1_unit2_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage1_unit2_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit2_bn3_scale
        self.stage1_unit2_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage1_unit2_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit2_se_relu1 = PReLU(4)
        self.stage1_unit2_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit2_se_sigmoid = Sigmoid()

        # self._plus1  #axpy
        self.stage1_unit3_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit3_bn1_scale
        self.stage1_unit3_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage1_unit3_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit3_bn2_scale
        self.stage1_unit3_relu1 = PReLU(64)
        self.stage1_unit3_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage1_unit3_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage1_unit3_bn3_scale
        self.stage1_unit3_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage1_unit3_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit3_se_relu1 = PReLU(4)
        self.stage1_unit3_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage1_unit3_se_sigmoid = Sigmoid()
        # self._plus2 #Axpy


        self.stage2_unit1_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
        # self.stage2_unit1_bn1_scale
        self.stage2_unit1_conv1 = Conv2d(64, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit1_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit1_bn2_scale
        self.stage2_unit1_relu1 = PReLU(128)
        self.stage2_unit1_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
        self.stage2_unit1_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit1_bn3_scale
        self.stage2_unit1_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage2_unit1_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit1_se_relu1 = PReLU(8)
        self.stage2_unit1_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit1_se_sigmoid = Sigmoid()

        self.stage2_unit1_conv1sc = Conv2d(64, out_channels=128, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
        self.stage2_unit1_sc = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit1_sc_scale
        # self._plus3   #axpy

        self.stage2_unit2_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit2_bn1_scale
        self.stage2_unit2_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit2_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit2_bn2_scale
        self.stage2_unit2_relu1 = PReLU(128)
        self.stage2_unit2_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit2_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit2_bn3_scale
        self.stage2_unit2_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage2_unit2_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit2_se_relu1 = PReLU(8)
        self.stage2_unit2_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit2_se_sigmoid = Sigmoid()
        # self._plus4

        self.stage2_unit3_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit3_bn1_scale
        self.stage2_unit3_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit3_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit3_bn2_scale
        self.stage2_unit3_relu1 = PReLU(128)
        self.stage2_unit3_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit3_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit3_bn3_scale
        self.stage2_unit3_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage2_unit3_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit3_se_relu1 = PReLU(8)
        self.stage2_unit3_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit3_se_sigmoid = Sigmoid()
        # self._plus5

        self.stage2_unit4_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit4_bn1_scale
        self.stage2_unit4_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit4_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit4_bn2_scale
        self.stage2_unit4_relu1 = PReLU(128)
        self.stage2_unit4_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage2_unit4_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage2_unit4_bn3_scale
        self.stage2_unit4_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage2_unit4_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit4_se_relu1 = PReLU(8)
        self.stage2_unit4_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
        self.stage2_unit4_se_sigmoid = Sigmoid()
        # self._plus6

        self.stage3_unit1_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
        # self.stage3_unit1_bn1_scale
        self.stage3_unit1_conv1 = Conv2d(128, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
        self.stage3_unit1_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
        # self.stage3_unit1_bn2_scale
        self.stage3_unit1_relu1 = PReLU(256)
        self.stage3_unit1_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
        self.stage3_unit1_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
        # self.stage3_unit1_bn3_scale
        self.stage3_unit1_se_pool1 = AdaptiveAvgPool2d(1)
        self.stage3_unit1_se_conv1 = Conv2d(2
Caffe模型转换PyTorch模型需要执行以下几个步骤: 1. 确保已安装PyTorchCaffe环境,以及相应的依赖项。 2. 使用Caffe模型保存为.prototxt和.caffemodel文件。这两个文件是描述网络结构和保存参数的文件。 3. 使用PyTorch的torch.utils.serialization.load_lua函数加载caffemodel文件,并将其转换PyTorch模型对象。 ```python import torch from torch.utils.serialization import load_lua # 加载Caffe模型 caffemodel = load_lua('path/to/model.caffemodel') # 转换PyTorch模型 pytorch_model = torch.nn.Sequential() for i, layer in enumerate(caffemodel.modules): if isinstance(layer, nn.Linear): pytorch_model.add_module(f'fc{i}', nn.Linear(layer.weight.size()[1], layer.weight.size()[0])) # 保存PyTorch模型 torch.save(pytorch_model.state_dict(), 'path/to/model.pth') ``` 4. 如果网络结构保存在.prototxt文件中,可以使用PyTorch的torch.utils.mlconve库中的convert函数将Caffe模型转换PyTorch模型。 ```python from torch.utils.mlconve import convert # 加载Caffe模型和.prototxt文件 caffemodel = 'path/to/model.caffemodel' prototxt = 'path/to/model.prototxt' # 转换PyTorch模型 pytorch_model = convert(prototxt, caffemodel) # 保存PyTorch模型 torch.save(pytorch_model.state_dict(), 'path/to/model.pth') ``` 以上是将Caffe模型转换PyTorch模型的简介。转换过程可能因模型的复杂性而有所不同,需要根据具体情况进行适量调整和修改。此外,还可以使用一些第三方库和工具进行模型转换
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

猫猫与橙子

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值