keras处理已保存模型中的自定义层（或其他自定义对象）

最新推荐文章于 2025-06-19 09:24:23 发布

原创最新推荐文章于 2025-06-19 09:24:23 发布 · 6.1k 阅读

20 ·

CC 4.0 BY-SA版权

keras&tensorflow学习记录专栏收录该内容

6 篇文章

订阅专栏

本文详细讲解了在Keras中如何正确保存和加载包含自定义层及自定义损失函数的模型，强调了自定义对象的必要性和正确设置自定义层参数的重要性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

这里针对model.save(filepath)所保存的模型的加载

官网中文文档:

如果要加载的模型包含自定义层或其他自定义类或函数，则可以通过 custom_objects 参数将它们传递给加载机制：

from keras.models import load_model
# 假设你的模型包含一个 AttentionLayer 类的实例
model = load_model('my_model.h5', custom_objects={'AttentionLayer': AttentionLayer})

这里需要注意如果损失函数是自定义的，也要加入custom_objects。

或者，你可以使用自定义对象作用域：

from keras.utils import CustomObjectScope

with CustomObjectScope({'AttentionLayer': AttentionLayer}):
    model = load_model('my_model.h5')

如:一段代码如下:

x = layers.Reshape((-1, 128))(x)
capsule = Capsule(10, 16, 3, True)(x)
output = layers.Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(capsule)
model = Model(inputs=input_image, outputs=output)

# Margin loss is used
model.compile(loss=margin_loss, optimizer='adam', metrics=['accuracy'])

Capsule为一个自定义的层，margin_loss为一个自定义的损失函数，Capsule.h5为所保存的模型，则加载模型时应该:

model = load_model('Capsule.h5',custom_objects={'Capsule': Capsule,'margin_loss':margin_loss})
或者
with CustomObjectScope({'Capsule': Capsule,'margin_loss':margin_loss}):
    model = load_model('Capsule.h5')

还有一点需要注意就是保存模型的时候所用的python版本和加载模型时所用的python版本要保持一致，否则可能会报错误:

SystemError: unknown opcode

另外还有一点就是自定义的层的__init__的参数应该要默认有初始化，否则可能会报错误:

TypeError: __init__() missing 2 required positional arguments: 'num_capsule' and 'dim_capsule'

例子来自

https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn_capsule.py

我在这个例子的后面加了一句:

model.save('Capsule.h5')

保存了模型为Capsule.h5

当使用model = load_model('Capsule.h5',custom_objects={'Capsule': Capsule,'margin_loss':margin_loss})加载模型时报错:TypeError: __init__() missing 2 required positional arguments: 'num_capsule' and 'dim_capsule'

在https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn_capsule.py中关于Capsule层定义如下:

class Capsule(layers.Layer):
    def __init__(self,
                 num_capsule,
                 dim_capsule,
                 routings=3,
                 share_weights=True,
                 activation='squash',
                 **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)

可以看到num_capsule和dim_capsule并没有默认值，所有才会报错

我将其修改为:

class Capsule(layers.Layer):
 
    # capsule = Capsule(10, 16, 3, True)(x)
    # x--->(m, 100, 128)
    def __init__(self,
                 num_capsule = 10,
                 dim_capsule = 16,
                 routings=3,
                 share_weights=True,
                 activation='squash',
                 **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule  # 10
        self.dim_capsule = dim_capsule  # 16
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)

完整测试代码（测试代码由https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn_capsule.py改编，运行环境python3.7.3+tensorflow1.13.1+keras2.2.4）

代码如下:

from __future__ import print_function
from keras.models import load_model
from keras import activations
from keras import backend as K
from keras import layers
from keras import utils
from keras.datasets import cifar10
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import CustomObjectScope

def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()  
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


def margin_loss(y_true, y_pred):
    lamb, margin = 0.5, 0.1
    return K.sum(y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * (
        1 - y_true) * K.square(K.relu(y_pred - margin)), axis=-1)


class Capsule(layers.Layer):
    def __init__(self,
                 num_capsule = 10,
                 dim_capsule = 16,
                 routings=3,
                 share_weights=True,
                 activation='squash',
                 **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule  
        self.dim_capsule = dim_capsule  
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)


    def build(self, input_shape):
        input_dim_capsule = input_shape[-1]  
        if self.share_weights:
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(1, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),  
                initializer='glorot_uniform',
                trainable=True)
        else:
            input_num_capsule = input_shape[-2]  
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(input_num_capsule, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),  
                initializer='glorot_uniform',
                trainable=True)

    def call(self, inputs, **kwargs):
      
        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)  
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])
        batch_size = K.shape(inputs)[0] 
       
        input_num_capsule = K.shape(inputs)[1] 
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))  

        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) 

        b = K.zeros_like(hat_inputs[:, :, :, 0]) 
       
        for i in range(self.routings):  
            c = K.softmax(b, 1)   
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))  
            if i < self.routings - 1:        
                b = K.batch_dot(o, hat_inputs, [2, 3])       
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)  
        return o

    def compute_output_shape(self, input_shape):
        return None, self.num_capsule, self.dim_capsule  


num_classes = 10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)
# print(x_train.shape)  # (50000, 32, 32, 3)
# print(y_train.shape)  # 50000, 10)
# print(x_test.shape)  # (10000, 32, 32, 3)
# print(y_test.shape)  # (10000, 10)

# model = load_model('Capsule.h5',custom_objects={'Capsule': Capsule,'margin_loss':margin_loss})
with CustomObjectScope({'Capsule': Capsule,'margin_loss':margin_loss}):
    model = load_model('Capsule.h5')
model.evaluate(x_test,y_test)

输出: