# coding:utf-8
"""
@author: liu
@File: get_tfbertmainlayer_demo.py
@CreateTime: 2021/7/26
"""
import numpy as np
import tensorflow as tf
from transformers import TFBertModel, TFBertMainLayer, BertConfig, BertTokenizer
bert_config = BertConfig.from_pretrained("../data/bert_base_chinese")
print(bert_config)
model = TFBertModel.from_pretrained("../data/bert_base_chinese", return_dict=True)
tokenizer = BertTokenizer.from_pretrained("../data/bert_base_chinese")
text = "我要去上海"
inputs = tokenizer(text, return_tensors="tf")
print(inputs)
# print(model.bert.embeddings.trainable_weights[0][:10])
# print(model.submodules)
# for module in model.submodules:
# print(module.name)
# bert_main_layer = TFBertMainLayer(bert_config, name="bert")
#
# print(bert_main_layer.embeddings.trainable_weights[0])
# bert_main_layer.
# model.load_weights("../data/bert_base_chinese", by_name="embedding")
# inputs = np.array([[1, 2, 3, 4, 0]])
layer = model.bert.encoder.layer[0].trainable_weights.shape
print(layer)
print("原始的输出")
pool_output = model(inputs).last_hidden_state[0]
# print(pool_output[:5], pool_output[-5:])
print(tf.reduce_mean(pool_output))
# print(model.bert.embeddings)s
print("获取经过分离之后的模型输出")
embedding_layer = model.bert.embeddings
bert_main_layer = model.bert.encoder
input_ids = inputs["input_ids"]
token_type_ids = inputs["token_type_ids"]
attention_mask = inputs["attention_mask"]
training = False
embedding_output = embedding_layer(input_ids, None, token_type_ids, None, training=training)
extended_attention_mask = attention_mask[:, tf.newaxis, tf.newaxis, :]
extended_attention_mask = tf.cast(extended_attention_mask, embedding_output.dtype)
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
head_mask = [None] * 12
encoder_outputs = bert_main_layer(embedding_output, extended_attention_mask, head_mask, None, None, return_dict=True, training=training)
print(encoder_outputs.keys())
seq_output = encoder_outputs.last_hidden_state
print(tf.reduce_mean(seq_output))
def get_initializer(initializer_range: float = 0.02) -> tf.initializers.TruncatedNormal:
"""
Creates a :obj:`tf.initializers.TruncatedNormal` with the given range.
Args:
initializer_range (`float`, defaults to 0.02): Standard deviation of the initializer range.
Returns:
:obj:`tf.initializers.TruncatedNormal`: The truncated normal initializer.
"""
return tf.keras.initializers.TruncatedNormal(stddev=initializer_range)
class TFBertPooler(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
activation="tanh",
name="dense",
)
def call(self, hidden_states):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
first_token_tensor = hidden_states[:, 0]
pooled_output = self.dense(first_token_tensor)
return pooled_output
class Mymodel(tf.keras.Model):
def __init__(self, config):
super(Mymodel, self).__init__()
self.embedding_layer = model.bert.embeddings
self.bert_main_layer = model.bert.encoder
self.pooler = TFBertPooler(config, name="pooler")
def call(self, inputs, training=None, mask=None):
input_ids = inputs["input_ids"]
token_type_ids = inputs["token_type_ids"]
attention_mask = inputs["attention_mask"]
embedding_output = embedding_layer(input_ids, None, token_type_ids, None, training=training)
extended_attention_mask = attention_mask[:, tf.newaxis, tf.newaxis, :]
extended_attention_mask = tf.cast(extended_attention_mask, embedding_output.dtype)
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
head_mask = [None] * 12
encoder_outputs = bert_main_layer(embedding_output, extended_attention_mask, head_mask, None, None,
return_dict=True, training=training)
sequence_output = encoder_outputs[0]
pooled_output = self.pooler(sequence_output)
# pool_output2 = encoder_outputs.pooler_output[0]
return sequence_output
# my_model = Mymodel(bert_config)
#
# outputs = my_model(inputs, training=False)
# # print(outputs[0])
# print(tf.reduce_mean(outputs))