文本生成中的真实标签为 index,shape = [n_batch,]
输出的为概率分布,shape = [n_batch, num_decoder_tokens]
计算 loss 时应 mask 序列 padding部分
def categorical_CE_generation(max_len_char,n_decoder_tokens,idx_pad=0):
def mask_categorical_crossentropy(y_true,y_pred):
epsilon = K.epsilon()
y_pred = K.clip(y_pred, epsilon, 1. - epsilon)#避免 loss 为nan
# print (y_true,y_pred)
y_true = tf.cast(tf.reshape(y_true,[-1,max_len_char]),tf.int32)
y_pred = tf.reshape(y_pred,[-1,max_len_char,n_decoder_tokens])
loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits( labels = y_true,logits = y_pred)
mask = tf.constant(idx_pad,dtype = tf.int32)
loss_ = tf.boolean_mask(tensor = loss_, mask = tf.not_equal(y_true,mask))
loss_CE = tf.reduce_sum(loss_)/tf.cast(tf.shape(y_true)[0],dtype=tf.float32)
return loss_CE
return mask_categorical_crossentropy
Example:
sess = tf.Session()
y_label_onehot = tf.convert_to_tensor([