转换基本流程:
1)创建pytorch的网络结构模型;
2)利用caffe来读取其存储的预训练模型,用于读取caffe模型的参数;
3)遍历caffe加载的模型参数;
4)对一些指定的key值,需要进行相应的处理和转换;
5)对修改后的层名(key值),利用numpy之间的转换来实现加载;
6)对相应层进行参数(feature)进行比较;
以下以SE_Resnet50为例,将caffe上的模型转换到pytorch上;
1)创建pytorch的网络结构模型:
class Resnet50(Module):
def __init__(self, embedding_size = 512, class_num=0):
super(Resnet50, self).__init__()
self.conv0 = Conv2d(3, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
self.bn0 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
self.relu0 = PReLU(64)
self.stage1_unit1_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)#stage1_unit1_bn1
self.stage1_unit1_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
self.stage1_unit1_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
self.stage1_unit1_relu1 = PReLU(64)
self.stage1_unit1_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1, 1), bias=False)
self.stage1_unit1_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit1_bn3_scale
self.stage1_unit1_se_pool1 = AdaptiveAvgPool2d(1)
# self.stage1_unit1_se_pool1 = AvgPool2d(3, stride=1)
self.stage1_unit1_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit1_se_relu1 = PReLU(4)
self.stage1_unit1_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit1_se_sigmoid = Sigmoid()
self.stage1_unit1_conv1sc = Conv2d(64, out_channels=64, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage1_unit1_sc = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit1_sc_scale #relu
# self._plus0 = #axpy
self.stage1_unit2_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn1_scale
self.stage1_unit2_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit2_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn2_scale
self.stage1_unit2_relu1 = PReLU(64)
self.stage1_unit2_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit2_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn3_scale
self.stage1_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage1_unit2_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit2_se_relu1 = PReLU(4)
self.stage1_unit2_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit2_se_sigmoid = Sigmoid()
# self._plus1 #axpy
self.stage1_unit3_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn1_scale
self.stage1_unit3_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit3_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn2_scale
self.stage1_unit3_relu1 = PReLU(64)
self.stage1_unit3_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit3_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn3_scale
self.stage1_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage1_unit3_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit3_se_relu1 = PReLU(4)
self.stage1_unit3_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit3_se_sigmoid = Sigmoid()
# self._plus2 #Axpy
self.stage2_unit1_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn1_scale
self.stage2_unit1_conv1 = Conv2d(64, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit1_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn2_scale
self.stage2_unit1_relu1 = PReLU(128)
self.stage2_unit1_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
self.stage2_unit1_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn3_scale
self.stage2_unit1_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit1_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit1_se_relu1 = PReLU(8)
self.stage2_unit1_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit1_se_sigmoid = Sigmoid()
self.stage2_unit1_conv1sc = Conv2d(64, out_channels=128, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage2_unit1_sc = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_sc_scale
# self._plus3 #axpy
self.stage2_unit2_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn1_scale
self.stage2_unit2_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit2_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn2_scale
self.stage2_unit2_relu1 = PReLU(128)
self.stage2_unit2_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit2_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn3_scale
self.stage2_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit2_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit2_se_relu1 = PReLU(8)
self.stage2_unit2_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit2_se_sigmoid = Sigmoid()
# self._plus4
self.stage2_unit3_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn1_scale
self.stage2_unit3_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit3_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn2_scale
self.stage2_unit3_relu1 = PReLU(128)
self.stage2_unit3_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit3_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn3_scale
self.stage2_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit3_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit3_se_relu1 = PReLU(8)
self.stage2_unit3_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit3_se_sigmoid = Sigmoid()
# self._plus5
self.stage2_unit4_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn1_scale
self.stage2_unit4_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit4_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn2_scale
self.stage2_unit4_relu1 = PReLU(128)
self.stage2_unit4_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit4_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn3_scale
self.stage2_unit4_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit4_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit4_se_relu1 = PReLU(8)
self.stage2_unit4_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit4_se_sigmoid = Sigmoid()
# self._plus6
self.stage3_unit1_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn1_scale
self.stage3_unit1_conv1 = Conv2d(128, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit1_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn2_scale
self.stage3_unit1_relu1 = PReLU(256)
self.stage3_unit1_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
self.stage3_unit1_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn3_scale
self.stage3_unit1_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit1_se_conv1 = Conv2d(2