-tf.reduce_sum(one_hot_labels * log_probs, axis=-1)是什么损失函数?

本文解析了BERT代码中用Tensorflow1.x实现的loss function,重点讲解了负对数似然(NLL)的概念,并结合实例说明了如何计算和应用在模型训练中。
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)

由于BERT代码当初是用Tensorflow1.x发布的,阅读代码时没太看懂这个loss function,经查证,是:
负对数似然(negative log-likelihood)

在这里插入图片描述

import tensorflow as tf from tensorflow.keras import layers, models from tensorflow.keras.utils import to_categorical from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LambdaCallback import numpy as np import matplotlib import matplotlib.pyplot as plt matplotlib.rc("font", family="FangSong") import seaborn as sns from sklearn.metrics import confusion_matrix, classification_report import os # 确认 GPU 是否可用 print("GPU 可用数量:", len(tf.config.list_physical_devices('GPU'))) if tf.config.list_physical_devices('GPU'): print("GPU 已启用!") else: print("⚠️ 未检测到 GPU,将使用 CPU 训练") # 设置数据集缓存路径 os.environ['KERAS_HOME'] = os.path.join(os.getcwd(), 'datasets') # 加载并预处理 CIFAR-10 数据 (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() x_train = x_train.astype('float32') / 255.0 x_test = x_test.astype('float32') / 255.0 y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) #LeNET模型 model = models.Sequential([ # C1: 卷积层 - 6 个 5x5 卷积核 layers.Conv2D(6, kernel_size=(5, 5), activation='tanh', input_shape=(32, 32, 3)), # S2: 平均池化 layers.AveragePooling2D(pool_size=(2, 2), strides=2), # C3: 卷积层 - 16 个 5x5 卷积核 layers.Conv2D(16, kernel_size=(5, 5), activation='tanh'), # S4: 平均池化 layers.AveragePooling2D(pool_size=(2, 2), strides=2), # C5: 全连接式卷积层 layers.Conv2D(120, kernel_size=(5, 5), activation='tanh'), # 展平 layers.Flatten(), # F6: 全连接层 layers.Dense(84, activation='tanh'), # 输出层 layers.Dense(10, activation='softmax') ]) # 编译模型(只保留一次) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', # 因为 y 是 one-hot 编码 metrics=['accuracy']) # 打印模型结构 model.summary() # 定义回调函数 reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1 ) early_stopping = EarlyStopping( monitor='val_loss', patience=5, restore_best_weights=True, verbose=1 ) # 记录学习率 lr_list = [] lr_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: lr_list.append(model.optimizer.learning_rate.numpy()) ) # 开始训练 print("开始训练...") history = model.fit( x_train, y_train, epochs=50, batch_size=64, validation_data=(x_test, y_test), callbacks=[reduce_lr, early_stopping, lr_callback], verbose=1 ) # 评估模型 test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0) print(f"\n测试准确率: {test_acc:.4f}") print(f"测试损失: {test_loss:.4f}") # 绘制训练曲线 plt.figure(figsize=(15, 5)) # 准确率 plt.subplot(1, 3, 1) plt.plot(history.history['accuracy'], label='训练准确率') plt.plot(history.history['val_accuracy'], label='验证准确率') plt.title('模型准确率') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() # 损失 plt.subplot(1, 3, 2) plt.plot(history.history['loss'], label='训练损失') plt.plot(history.history['val_loss'], label='验证损失') plt.title('模型损失') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() # 学习率变化 plt.subplot(1, 3, 3) plt.plot(lr_list, label='学习率', color='purple') plt.title('学习率变化') plt.xlabel('Epoch') plt.ylabel('Learning Rate') plt.yscale('log') plt.legend() plt.tight_layout() plt.show() # 预测与混淆矩阵 y_pred_probs = model.predict(x_test) y_pred_classes = np.argmax(y_pred_probs, axis=1) y_true_classes = np.argmax(y_test, axis=1) cm = confusion_matrix(y_true_classes, y_pred_classes) plt.figure(figsize=(10, 8)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[f'类{i}' for i in range(10)], yticklabels=[f'类{i}' for i in range(10)]) plt.title('混淆矩阵') plt.xlabel('预测标签') plt.ylabel('真实标签') plt.show() # 错误分类样本可视化 error_indices = np.where(y_pred_classes != y_true_classes)[0] print(f"共 {len(error_indices)} 个样本被错误分类") num_show = min(12, len(error_indices)) plt.figure(figsize=(15, 6)) for i in range(num_show): idx = error_indices[i] plt.subplot(2, 6, i + 1) plt.imshow(x_test[idx]) plt.title(f'真:{y_true_classes[idx]}, 预:{y_pred_classes[idx]}') plt.axis('off') plt.suptitle("错误分类的样本示例") plt.tight_layout() plt.show() # 分类报告 print("分类报告:") print(classification_report(y_true_classes, y_pred_classes, target_names=[f'类 {i}' for i in range(10)])) 改为Pytorch
最新发布
11-19
评论 1
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值