import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Lambda, Concatenate
# 设置 GELU 激活函数
def set_gelu(activation_type):
if activation_type == 'tanh':
return tf.nn.gelu
else:
return tf.nn.gelu # 默认返回 Gelu 函数
# 自定义 CNN 特征提取层
def textcnn(inputs, kernel_initializer):
cnn1 = Conv1D(
256,
3,
strides=1,
padding='same',
activation='relu',
kernel_initializer=kernel_initializer
)(inputs) # shape=[batch_size,maxlen-2,256]
cnn1 = GlobalMaxPooling1D()(cnn1) # shape=[batch_size,256]
cnn2 = Conv1D(
256,
4,
strides=1,
padding='same',
activation='relu',
kernel_initializer=kernel_initializer
)(inputs)
cnn2 = GlobalMaxPooling1D()(cnn2)
cnn3 = Conv1D(
256,
5,
strides=1,
padding='same',
kernel_initializer=kernel_initializer
)(inputs)
cnn3 = GlobalMaxPooling1D()(cnn3)
output = Concatenate(axis=-1)([cnn1, cnn2, cnn3])
output = Dropout(0.2)(output)
return output
# 构建 BERT 模型
def build_bert_model(model_path, class_nums):
# 使用 transformers 库加载本地的 BERT 模型和分词器
tokenizer = BertTokenizer.from_pretrained(model_path)
bert_model = TFBertModel.from_pretrained(model_path, output_hidden_states=True)
# 通过 BertModel 获取输出
input_ids = layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
attention_mask = layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
# 获取 BERT 的输出,返回的是 [last_hidden_state, pooler_output]
# bert_output = bert_model(input_ids, attention_mask=attention_mask)
# all_token_embedding = bert_output[0] # [batch_size, maxlen-2, 768]
# cls_features = Lambda(lambda x: x[:, 0])(bert_output[0]) # 获取 [CLS] token 特征
# print("all_token_embedding:",all_token_embedding.shape)
##获取BERT的多层CLS特征
hidden_states = bert_model(input_ids, attention_mask=attention_mask).hidden_states
print("all_hidden_states:", len(hidden_states))
# 取最后4层的CLS向量 [batch_size, 768] *4
last_4_cls = [Lambda(lambda x: x[:, 0])(layer) for layer in hidden_states[-4:]]
# 拼接多层CLS
cls_features = Concatenate(axis=-1)(last_4_cls) # shape=[batch_size, 768*4]
cls_features = Dense(768, activation='gelu')(cls_features) # 降维融合
# # 使用 CNN 提取特征
kernel_initializer = tf.keras.initializers.GlorotUniform() # 这里使用 GlorotUniform 作为初始化器
# cnn_features = textcnn(all_token_embedding, kernel_initializer) # shape=[batch_size, cnn_output_dim]
# CNN特征提取
token_embeddings = hidden_states[-1] # 最后一层输出
cnn_features = textcnn(token_embeddings, 'glorot_uniform')
# 拼接 [CLS] 特征和 CNN 特征
concat_features = Concatenate(axis=-1)([cls_features, cnn_features])
# 全连接层
dense = Dense(
units=512,
activation='gelu',
kernel_initializer=kernel_initializer,
kernel_regularizer=tf.keras.regularizers.l2(1e-4)
)(concat_features)
# 输出层
output = Dense(
units=class_nums,
activation='softmax',
kernel_initializer=kernel_initializer,
dtype=tf.float32
)(dense)
# 定义模型
model = models.Model(inputs=[input_ids, attention_mask], outputs=output)
return model
# 主程序
# if __name__ == '__main__':
# model_path = './bert-base-chinese' # 配置文件路径
# class_nums = 13 # 分类数量
#
# # 构建 BERT + CNN 模型
# model = build_bert_model(model_path, class_nums)
# model.summary()
怎么在这个模型上添加在CNN后添加自注意力层,聚焦关键局部特征这个功能