鉴于tensorflow1.15.0没有tf.keras.activations.gelu函数,所以需要添加gelu函数的定义。
以下代码为激活函数gelu的定义:
def gelu_(X):
return 0.5*X*(1.0 + math.tanh(0.7978845608028654*(X + 0.044715*math.pow(X, 3))))
class GELU(Layer):
'''
Gaussian Error Linear Unit (GELU), an alternative of ReLU
Y = GELU()(X)
----------
Hendrycks, D. and Gimpel, K., 2016. Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415.
Usage: use it as a tf.keras.Layer
'''
def __init__(self, trainable=False, **kwargs):
super(GELU, self).__init__(**kwargs)
self.supports_masking = True
self.trainable = trainable
def build(self, input_shape):
super(GELU, self).build(input_shape)
def call(self, inputs, mask=None):
return gelu_(inputs)
def get_config(self):
config = {'trainable': self.trainable}
base_config = super(GELU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
Attention:
如果出现报错的情况可以直接定义函数gelu:
def gelu(X):
return 0.5*X*(1.0 + math.tanh(0.7978845608028654*(X + 0.044715*math.pow(X, 3))))