7.encoder与layout以及convert

深入理解Logback的Layout与Encoder

一、前言

上一篇文章介绍过, 在OutputStreamAppender中可以注入Encoderlayout, Layout 主要用于格式化日志文本(如 PatternLayout),适用于 控制台日志输出Encoder 负责将日志编码(通常是字符或二进制),适用于 文件、远程日志 等场景。

下面是OutputStreamAppenderEncoderlayout的注入方法

public void setEncoder(Encoder<E> encoder) {
   
   
    this.encoder = encoder;
}

public void setLayout(Layout<E> layout) {
   
   
    addWarn("This appender no longer admits a layout as a sub-component, set an encoder instead.");
    addWarn("To ensure compatibility, wrapping your layout in LayoutWrappingEncoder.");
    addWarn("See also " + CODES_URL + "#layoutInsteadOfEncoder for details");
    LayoutWrappingEncoder<E> lwe = new LayoutWrappingEncoder<E>();
    lwe.setLayout(layout);
    lwe.setContext(context);
    this.encoder = lwe;
}

可以看出设置的layout实际也是被包装到endoer中了(LayoutWrappingEncoder), 整体看也就是只需一个encoder就行了

二、源码解读

Layout

public interface Layout<E> extends ContextAware, LifeCycle {
   
   

    /**
     * 将事件转为字符串, 这个事件一般是日志对象
     */
    String doLayout(E event);

    /**
     * 获取文件头部内容, 可以为空
     */
    String getFileHeader();

    /**
     * 返回日志事件格式化的头部内容, 可以为空
     */
    String getPresentationHeader();

    /**
     * 返回日志事件格式化的尾部内容, 可以为空
     */
    String getPresentationFooter();

    /**
     * 返回文件的尾部内容, 可以为空
     */
    String getFileFooter();

    /**
     * 获取日志内容类型
     */
    String getContentType();

}

LayoutBase

abstract public class LayoutBase<E> extends ContextAwareBase implements Layout<E> {
   
   
    protected boolean started;

    String fileHeader;
    String fileFooter;
    String presentationHeader;
    String presentationFooter;
    
    public String getContentType() {
   
   
        return "text/plain";
    }
    // ... 省略部分代码
}

这个类比较简单, 定义了头部、尾部、格式化头部、格式化头部以及生命周期的start字段, 默认日志内容是文本格式

PatternLayoutBase

abstract public class PatternLayoutBase<E> extends LayoutBase<E> {
   
   
    /**
     * 日志格式转换器链
     */
    Converter<E> head;
    
    /**
     * 日志格式
     */
    String pattern;
    
    /** 获取默认的转换器 */
    protected abstract Map<String, Supplier<DynamicConverter>> getDefaultConverterSupplierMap();
    
    /**
     * 获取有效的转换器
     */
    public Map<String, Supplier<DynamicConverter>> getEffectiveConverterMap
dorm_face_recognition_gui.py代码如下: import pickle import sys import os import cv2 import numpy as np import torch from PyQt5.QtWidgets import QListWidget, QProgressDialog from facenet_pytorch import MTCNN, InceptionResnetV1 from PIL import Image from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QFileDialog, QComboBox, QSlider, QMessageBox, QTextEdit, QGroupBox, QScrollArea, QDialog, QDialogButtonBox, QTableWidget, QTableWidgetItem, QHeaderView, QGridLayout) from PyQt5.QtCore import Qt, QTimer from PyQt5.QtGui import QImage, QPixmap, QIcon, QFont, QColor import joblib import logging import json from datetime import datetime 在 dorm_face_recognition_gui.py 顶部添加导入 from face_recognition import FaceRecognition 配置日志 logging.basicConfig(level=logging.INFO, format=‘%(asctime)s - %(levelname)s - %(message)s’) logger = logging.getLogger(name) class FeedbackDialog(QDialog): “”“反馈对话框”“” def __init__(self, parent=None, last_results=None, dorm_members=None): super().__init__(parent) self.setWindowTitle("识别错误反馈") self.setFixedSize(500, 400) self.last_results = last_results or [] self.dorm_members = dorm_members or [] self.init_ui() def init_ui(self): layout = QVBoxLayout(self) # 添加当前识别结果 result_label = QLabel("当前识别结果:") layout.addWidget(result_label) # 使用表格显示结果 self.results_table = QTableWidget() self.results_table.setColumnCount(4) self.results_table.setHorizontalHeaderLabels(["ID", "识别结果", "置信度", "位置和大小"]) self.results_table.setSelectionBehavior(QTableWidget.SelectRows) self.results_table.setEditTriggers(QTableWidget.NoEditTriggers) # 填充表格数据 self.results_table.setRowCount(len(self.last_results)) for i, result in enumerate(self.last_results): x, y, w, h = result["box"] self.results_table.setItem(i, 0, QTableWidgetItem(str(i + 1))) self.results_table.setItem(i, 1, QTableWidgetItem(result["label"])) self.results_table.setItem(i, 2, QTableWidgetItem(f"{result['confidence']:.2f}")) self.results_table.setItem(i, 3, QTableWidgetItem(f"({x}, {y}) - {w}x{h}")) # 设置表格样式 self.results_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.results_table.verticalHeader().setVisible(False) layout.addWidget(self.results_table) # 添加正确身份选择 correct_layout = QGridLayout() correct_label = QLabel("正确身份:") correct_layout.addWidget(correct_label, 0, 0) self.correct_combo = QComboBox() self.correct_combo.addItem("选择正确身份", None) for member in self.dorm_members: self.correct_combo.addItem(member, member) self.correct_combo.addItem("陌生人", "stranger") self.correct_combo.addItem("不在列表中", "unknown") correct_layout.addWidget(self.correct_combo, 0, 1) # 添加备注 note_label = QLabel("备注:") correct_layout.addWidget(note_label, 1, 0) self.note_text = QTextEdit() self.note_text.setPlaceholderText("可添加额外说明...") self.note_text.setMaximumHeight(60) correct_layout.addWidget(self.note_text, 1, 1) layout.addLayout(correct_layout) # 添加按钮 button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) button_box.accepted.connect(self.accept) button_box.rejected.connect(self.reject) layout.addWidget(button_box) def get_selected_result(self): """获取选择的识别结果""" selected_row = self.results_table.currentRow() if selected_row >= 0 and selected_row < len(self.last_results): return self.last_results[selected_row] return None def get_feedback_data(self): """获取反馈数据""" selected_result = self.get_selected_result() if not selected_result: return None return { "timestamp": datetime.now().isoformat(), "original_label": selected_result["label"], "correct_label": self.correct_combo.currentData(), "confidence": selected_result["confidence"], "box": selected_result["box"], # 保存完整的框信息 "note": self.note_text.toPlainText().strip() } class FaceRecognitionSystem(QMainWindow): def init(self): super().init() self.setWindowTitle(“寝室人脸识别系统”) self.setGeometry(100, 100, 1200, 800) # 初始化变量 self.model_loaded = False self.camera_active = False self.video_capture = None self.timer = QTimer() self.current_image = None self.last_results = [] # 存储上次识别结果 self.dorm_members = [] # 寝室成员列表 # 创建主界面 self.main_widget = QWidget() self.setCentralWidget(self.main_widget) self.layout = QHBoxLayout(self.main_widget) # 左侧控制面板 - 占40%宽度 self.control_panel = QWidget() self.control_layout = QVBoxLayout(self.control_panel) self.control_layout.setAlignment(Qt.AlignTop) self.control_panel.setMaximumWidth(400) self.layout.addWidget(self.control_panel, 40) # 40%宽度 # 右侧图像显示区域 - 占60%宽度 self.image_panel = QWidget() self.image_layout = QVBoxLayout(self.image_panel) self.image_label = QLabel() self.image_label.setAlignment(Qt.AlignCenter) self.image_label.setMinimumSize(800, 600) self.image_label.setStyleSheet("background-color: #333; border: 1px solid #555;") self.image_layout.addWidget(self.image_label) self.layout.addWidget(self.image_panel, 60) # 60%宽度 # 状态栏 self.status_bar = self.statusBar() self.status_bar.showMessage("系统初始化中...") # 初始化人脸识别器 - 关键修复 self.face_recognition = FaceRecognition() # 初始化UI组件 self.init_ui() # 添加工具栏(必须在UI初始化后) self.toolbar = self.addToolBar('工具栏') # 添加反馈按钮 self.add_feedback_button() # 初始化模型 self.init_models() def init_ui(self): """初始化用户界面组件""" # 标题 title_label = QLabel("寝室人脸识别系统") title_label.setFont(QFont("Arial", 18, QFont.Bold)) title_label.setAlignment(Qt.AlignCenter) title_label.setStyleSheet("color: #2c3e50; padding: 10px;") self.control_layout.addWidget(title_label) # 模型加载 model_group = QGroupBox("模型设置") model_layout = QVBoxLayout(model_group) self.load_model_btn = QPushButton("加载模型") self.load_model_btn.setIcon(QIcon.fromTheme("document-open")) self.load_model_btn.setStyleSheet("background-color: #3498db;") self.load_model_btn.clicked.connect(self.load_model) model_layout.addWidget(self.load_model_btn) self.model_status = QLabel("模型状态: 未加载") model_layout.addWidget(self.model_status) self.control_layout.addWidget(model_group) # 在模型设置部分添加重新训练按钮 self.retrain_btn = QPushButton("重新训练模型") self.retrain_btn.setIcon(QIcon.fromTheme("view-refresh")) self.retrain_btn.setStyleSheet("background-color: #f39c12;") self.retrain_btn.clicked.connect(self.retrain_model) self.retrain_btn.setEnabled(False) # 初始不可用 model_layout.addWidget(self.retrain_btn) # 识别设置 settings_group = QGroupBox("识别设置") settings_layout = QVBoxLayout(settings_group) # 置信度阈值 threshold_layout = QHBoxLayout() threshold_label = QLabel("置信度阈值:") threshold_layout.addWidget(threshold_label) self.threshold_slider = QSlider(Qt.Horizontal) self.threshold_slider.setRange(0, 100) self.threshold_slider.setValue(70) self.threshold_slider.valueChanged.connect(self.update_threshold) threshold_layout.addWidget(self.threshold_slider) self.threshold_value = QLabel("0.70") threshold_layout.addWidget(self.threshold_value) settings_layout.addLayout(threshold_layout) # 显示选项 display_layout = QHBoxLayout() display_label = QLabel("显示模式:") display_layout.addWidget(display_label) self.display_combo = QComboBox() self.display_combo.addItems(["原始图像", "检测框", "识别结果"]) self.display_combo.setCurrentIndex(2) display_layout.addWidget(self.display_combo) settings_layout.addLayout(display_layout) self.control_layout.addWidget(settings_group) # 识别功能 recognition_group = QGroupBox("识别功能") recognition_layout = QVBoxLayout(recognition_group) # 图片识别 self.image_recognition_btn = QPushButton("图片识别") self.image_recognition_btn.setIcon(QIcon.fromTheme("image-x-generic")) self.image_recognition_btn.setStyleSheet("background-color: #9b59b6;") self.image_recognition_btn.clicked.connect(self.open_image) self.image_recognition_btn.setEnabled(False) recognition_layout.addWidget(self.image_recognition_btn) # 摄像头识别 self.camera_recognition_btn = QPushButton("启动摄像头识别") self.camera_recognition_btn.setIcon(QIcon.fromTheme("camera-web")) self.camera_recognition_btn.setStyleSheet("background-color: #e74c3c;") self.camera_recognition_btn.clicked.connect(self.toggle_camera) self.camera_recognition_btn.setEnabled(False) recognition_layout.addWidget(self.camera_recognition_btn) self.control_layout.addWidget(recognition_group) # 结果展示区域 - 使用QTextEdit替代QLabel results_group = QGroupBox("识别结果") results_layout = QVBoxLayout(results_group) self.results_text = QTextEdit() self.results_text.setReadOnly(True) self.results_text.setFont(QFont("Microsoft YaHei", 12)) # 使用支持中文的字体 self.results_text.setStyleSheet("background-color: #f8f9fa; border: 1px solid #ddd; padding: 10px;") self.results_text.setPlaceholderText("识别结果将显示在这里") # 添加滚动区域 scroll_area = QScrollArea() scroll_area.setWidgetResizable(True) scroll_area.setWidget(self.results_text) results_layout.addWidget(scroll_area) self.control_layout.addWidget(results_group, 1) # 占据剩余空间 # 系统信息 info_group = QGroupBox("系统信息") info_layout = QVBoxLayout(info_group) self.device_label = QLabel(f"计算设备: {'GPU' if torch.cuda.is_available() else 'CPU'}") info_layout.addWidget(self.device_label) self.model_info = QLabel("加载模型以显示信息") info_layout.addWidget(self.model_info) self.control_layout.addWidget(info_group) # 退出按钮 exit_btn = QPushButton("退出系统") exit_btn.setIcon(QIcon.fromTheme("application-exit")) exit_btn.clicked.connect(self.close) exit_btn.setStyleSheet("background-color: #ff6b6b; color: white;") self.control_layout.addWidget(exit_btn) def add_feedback_button(self): """添加反馈按钮到界面""" # 创建反馈按钮 self.feedback_button = QPushButton("提供反馈", self) self.feedback_button.setFixedSize(120, 40) # 设置固定大小 self.feedback_button.setStyleSheet( "QPushButton {" " background-color: #4CAF50;" " color: white;" " border-radius: 5px;" " font-weight: bold;" "}" "QPushButton:hover {" " background-color: #45a049;" "}" ) # 连接按钮点击事件 self.feedback_button.clicked.connect(self.open_feedback_dialog) # 添加到工具栏 self.toolbar.addWidget(self.feedback_button) def open_feedback_dialog(self): """打开反馈对话框""" if not self.last_results: QMessageBox.warning(self, "无法反馈", "没有可反馈的识别结果") return dialog = FeedbackDialog( self, last_results=self.last_results, dorm_members=self.dorm_members ) if dialog.exec_() == QDialog.Accepted: feedback_data = dialog.get_feedback_data() if feedback_data: # 修复:调用 FaceRecognition 实例的 save_feedback 方法 selected_result = dialog.get_selected_result() if selected_result: # 获取检测框 detected_box = [ selected_result["box"][0], selected_result["box"][1], selected_result["box"][0] + selected_result["box"][2], selected_result["box"][1] + selected_result["box"][3] ] # 调用保存反馈方法 self.face_recognition.save_feedback( self.current_image, detected_box, feedback_data["original_label"], feedback_data["correct_label"] ) QMessageBox.information(self, "反馈提交", "感谢您的反馈!数据已保存用于改进模型") else: QMessageBox.warning(self, "反馈错误", "未选择要反馈的人脸结果") def init_models(self): """初始化模型组件""" # 设置设备 self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.device_label.setText(f"计算设备: {'GPU' if torch.cuda.is_available() else 'CPU'}") # 初始化人脸检测器 try: self.detector = MTCNN( keep_all=True, post_process=False, device=self.device ) self.status_bar.showMessage("MTCNN 检测器初始化完成") logger.info("MTCNN 检测器初始化完成") except Exception as e: self.status_bar.showMessage(f"MTCNN 初始化失败: {str(e)}") logger.error(f"MTCNN 初始化失败: {str(e)}") return # 初始化人脸特征提取器 try: self.embedder = InceptionResnetV1( pretrained='vggface2', classify=False, device=self.device ).eval() self.status_bar.showMessage("FaceNet 特征提取器初始化完成") logger.info("FaceNet 特征提取器初始化完成") except Exception as e: self.status_bar.showMessage(f"FaceNet 初始化失败: {str(e)}") logger.error(f"FaceNet 初始化失败: {str(e)}") def load_model(self): """加载预训练的SVM分类器""" options = QFileDialog.Options() file_path, _ = QFileDialog.getOpenFileName( self, "选择模型文件", "", "模型文件 (*.pkl);;所有文件 (*)", options=options ) if file_path: try: # 加载模型 model_data = joblib.load(file_path) self.classifier = model_data['classifier'] self.label_encoder = model_data['label_encoder'] self.dorm_members = model_data['dorm_members'] # 启用重新训练按钮 self.retrain_btn.setEnabled(True) # 更新UI状态 self.model_loaded = True self.model_status.setText("模型状态: 已加载") self.model_info.setText(f"寝室成员: {', '.join(self.dorm_members)}") self.image_recognition_btn.setEnabled(True) self.camera_recognition_btn.setEnabled(True) # 状态栏消息 self.status_bar.showMessage(f"模型加载成功: {os.path.basename(file_path)}") # 显示成功消息 QMessageBox.information( self, "模型加载", f"模型加载成功!\n识别成员: {len(self.dorm_members)}人\n置信度阈值: {self.threshold_slider.value() / 100:.2f}" ) except Exception as e: QMessageBox.critical(self, "加载错误", f"模型加载失败: {str(e)}") self.status_bar.showMessage(f"模型加载失败: {str(e)}") def update_threshold(self, value): """更新置信度阈值""" threshold = value / 100 self.threshold_value.setText(f"{threshold:.2f}") self.status_bar.showMessage(f"置信度阈值更新为: {threshold:.2f}") def open_image(self): """打开图片文件进行识别""" if not self.model_loaded: QMessageBox.warning(self, "警告", "请先加载模型!") return options = QFileDialog.Options() file_path, _ = QFileDialog.getOpenFileName( self, "选择识别图片", "", "图片文件 (*.jpg *.jpeg *.png);;所有文件 (*)", options=options ) if file_path: # 读取图片 image = cv2.imread(file_path) if image is None: QMessageBox.critical(self, "错误", "无法读取图片文件!") return # 保存当前图片 self.current_image = image.copy() # 进行识别 self.recognize_faces(image) def toggle_camera(self): """切换摄像头状态""" if not self.model_loaded: QMessageBox.warning(self, "警告", "请先加载模型!") return if not self.camera_active: # 尝试打开摄像头 self.video_capture = cv2.VideoCapture(0) if not self.video_capture.isOpened(): QMessageBox.critical(self, "错误", "无法打开摄像头!") return # 启动摄像头 self.camera_active = True self.camera_recognition_btn.setText("停止摄像头识别") self.camera_recognition_btn.setIcon(QIcon.fromTheme("media-playback-stop")) self.timer.timeout.connect(self.process_camera_frame) self.timer.start(30) # 约33 FPS self.status_bar.showMessage("摄像头已启动") else: # 停止摄像头 self.camera_active = False self.camera_recognition_btn.setText("启动摄像头识别") self.camera_recognition_btn.setIcon(QIcon.fromTheme("camera-web")) self.timer.stop() if self.video_capture: self.video_capture.release() self.status_bar.showMessage("摄像头已停止") def process_camera_frame(self): """处理摄像头帧""" ret, frame = self.video_capture.read() if ret: # 保存当前帧 self.current_image = frame.copy() # 进行识别 self.recognize_faces(frame) def retrain_model(self): """使用反馈数据重新训练模型""" # 获取所有反馈数据 feedback_dir = os.path.join(os.getcwd(), "data", "feedback_data") # 修复1:支持多种文件扩展名 feedback_files = [] for f in os.listdir(feedback_dir): filepath = os.path.join(feedback_dir, f) if os.path.isfile(filepath) and (f.endswith('.pkl') or f.endswith('.json')): feedback_files.append(f) # 修复2:添加目录存在性检查 if not os.path.exists(feedback_dir): QMessageBox.warning(self, "目录不存在", f"反馈数据目录不存在: {feedback_dir}") return if not feedback_files: QMessageBox.information(self, "无反馈数据", "没有找到反馈数据,无法重新训练") return # 确认对话框 reply = QMessageBox.question( self, '确认重新训练', f"将使用 {len(feedback_files)} 条反馈数据重新训练模型。此操作可能需要几分钟时间,确定继续吗?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No ) if reply != QMessageBox.Yes: return try: # 创建进度对话框 progress = QProgressDialog("正在重新训练模型...", "取消", 0, len(feedback_files), self) progress.setWindowTitle("模型重新训练") progress.setWindowModality(Qt.WindowModal) progress.setMinimumDuration(0) progress.setValue(0) # 收集所有反馈数据 feedback_data = [] for i, filename in enumerate(feedback_files): filepath = os.path.join(feedback_dir, filename) # 修复3:根据文件扩展名使用不同的加载方式 if filename.endswith('.pkl'): with open(filepath, 'rb') as f: # 二进制模式读取 data = pickle.load(f) elif filename.endswith('.json'): with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) else: continue # 跳过不支持的文件类型 feedback_data.append(data) progress.setValue(i + 1) QApplication.processEvents() # 保持UI响应 if progress.wasCanceled(): return progress.setValue(len(feedback_files)) # 重新训练模型 self.status_bar.showMessage("正在重新训练模型...") # 修复4:添加详细的日志记录 logger.info(f"开始重新训练,使用 {len(feedback_data)} 条反馈数据") # 调用重新训练方法 success = self.face_recognition.retrain_with_feedback(feedback_data) if success: # 更新UI状态 self.model_status.setText("模型状态: 已重新训练") self.dorm_members = self.face_recognition.dorm_members self.model_info.setText(f"寝室成员: {', '.join(self.dorm_members)}") # 保存更新后的模型 model_path = os.path.join("models", "updated_model.pkl") self.face_recognition.save_updated_model(model_path) QMessageBox.information(self, "训练完成", "模型已成功使用反馈数据重新训练!") else: QMessageBox.warning(self, "训练失败", "重新训练过程中出现问题") except Exception as e: logger.error(f"重新训练失败: {str(e)}") QMessageBox.critical(self, "训练错误", f"重新训练模型时出错: {str(e)}") def recognize_faces(self, image): """识别人脸并在图像上标注结果""" # 清空上次结果 self.last_results = [] # 转换为 PIL 图像 pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # 检测人脸 boxes, probs, _ = self.detector.detect(pil_image, landmarks=True) # 获取显示选项 display_mode = self.display_combo.currentIndex() # 准备显示图像 display_image = image.copy() # 如果没有检测到人脸 if boxes is None: if display_mode == 2: # 识别结果模式 cv2.putText(display_image, "未检测到人脸", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) self.results_text.setText("未检测到人脸") else: # 提取每个人脸 faces = [] for box in boxes: x1, y1, x2, y2 = box face = pil_image.crop((x1, y1, x2, y2)) faces.append(face) # 提取特征 embeddings = [] if faces and self.model_loaded: # 批量处理所有人脸 face_tensors = [self.preprocess_face(face) for face in faces] if face_tensors: face_tensors = torch.stack(face_tensors).to(self.device) with torch.no_grad(): embeddings = self.embedder(face_tensors).cpu().numpy() # 处理每个人脸 for i, (box, prob) in enumerate(zip(boxes, probs)): x1, y1, x2, y2 = box w, h = x2 - x1, y2 - y1 # 在图像上绘制结果 if display_mode == 0: # 原始图像 # 不绘制任何内容 pass elif display_mode == 1: # 检测框 # 绘制人脸框 cv2.rectangle(display_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) elif display_mode == 2: # 识别结果 # 绘制人脸框 color = (0, 255, 0) # 绿色 # 如果有嵌入向量,则进行识别 if i < len(embeddings): # 预测 probabilities = self.classifier.predict_proba([embeddings[i]])[0] max_prob = np.max(probabilities) pred_class = self.classifier.predict([embeddings[i]])[0] pred_label = self.label_encoder.inverse_transform([pred_class])[0] # 获取置信度阈值 threshold = self.threshold_slider.value() / 100 # 判断是否为陌生人 if max_prob < threshold or pred_label == 'stranger': label = "陌生人" color = (0, 0, 255) # 红色 else: label = pred_label color = (0, 255, 0) # 绿色 # 保存结果用于文本显示 - 修复:保存完整的框信息 result = { "box": [int(x1), int(y1), int(x2 - x1), int(y2 - y1)], # [x, y, width, height] "label": label, "confidence": max_prob } self.last_results.append(result) # 绘制标签 cv2.rectangle(display_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) cv2.putText(display_image, f"{label} ({max_prob:.2f})", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) else: # 无法识别的处理 cv2.rectangle(display_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 165, 255), 2) cv2.putText(display_image, "处理中...", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 165, 255), 2) # 更新结果文本 self.update_results_text() # 在图像上显示FPS(摄像头模式下) if self.camera_active: fps = self.timer.interval() if fps > 0: cv2.putText(display_image, f"FPS: {1000 / fps:.1f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) # 显示图像 self.display_image(display_image) def update_results_text(self): """更新结果文本区域""" if not self.last_results: self.results_text.setText("未识别到任何人脸") return # 构建结果文本 result_text = "<h3>识别结果:</h3>" for i, result in enumerate(self.last_results, 1): x, y, w, h = result["box"] label = result["label"] confidence = result["confidence"] # 处理中文显示问题 if label in self.dorm_members: result_text += ( f"<p><b>人脸 #{i}:</b> " f"<span style='color:green;'>寝室成员 - {label}</span><br>" f"位置: ({x}, {y}), 大小: {w}x{h}, 置信度: {confidence:.2f}</p>" ) else: result_text += ( f"<p><b>人脸 #{i}:</b> " f"<span style='color:red;'>陌生人</span><br>" f"位置: ({x}, {y}), 大小: {w}x{h}, 置信度: {confidence:.2f}</p>" ) self.results_text.setHtml(result_text) def preprocess_face(self, face_img): """预处理人脸图像""" # 调整大小 face_img = face_img.resize((160, 160)) # 转换为张量并归一化 face_img = np.array(face_img).astype(np.float32) / 255.0 face_img = (face_img - 0.5) / 0.5 # 归一化到[-1, 1] face_img = torch.tensor(face_img).permute(2, 0, 1) # HWC to CHW return face_img def display_image(self, image): """在QLabel中显示图像""" # 将OpenCV图像转换为Qt格式 height, width, channel = image.shape bytes_per_line = 3 * width q_img = QImage(image.data, width, height, bytes_per_line, QImage.Format_RGB888).rgbSwapped() # 缩放图像以适应标签 pixmap = QPixmap.fromImage(q_img) self.image_label.setPixmap(pixmap.scaled( self.image_label.width(), self.image_label.height(), Qt.KeepAspectRatio, Qt.SmoothTransformation )) def closeEvent(self, event): """关闭事件处理""" if self.camera_active: self.timer.stop() if self.video_capture: self.video_capture.release() # 确认退出 reply = QMessageBox.question( self, '确认退出', "确定要退出系统吗?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No ) if reply == QMessageBox.Yes: event.accept() else: event.ignore() if name == “main”: app = QApplication(sys.argv) # 设置全局异常处理 def handle_exception(exc_type, exc_value, exc_traceback): """全局异常处理""" import traceback error_msg = "".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) print(f"未捕获的异常:\n{error_msg}") # 记录到文件 with open("error.log", "a") as f: f.write(f"\n\n{datetime.now()}:\n{error_msg}") # 显示给用户 QMessageBox.critical(None, "系统错误", f"发生未处理的异常:\n{str(exc_value)}") sys.exit(1) sys.excepthook = handle_exception window = FaceRecognitionSystem() window.show() sys.exit(app.exec_()) face_model.py代码如下:import os os.environ[‘TF_CPP_MIN_LOG_LEVEL’] = ‘3’ # 禁用 TensorFlow 日志(如果仍有依赖) import cv2 import numpy as np import time import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision import transforms from sklearn.svm import SVC from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import joblib import logging import sys import glob from facenet_pytorch import MTCNN, InceptionResnetV1 from PIL import Image import gc 配置日志 logging.basicConfig(level=logging.INFO, format=‘%(asctime)s - %(levelname)s - %(message)s’) logger = logging.getLogger(name) def check_gpu_environment(): “”“检查 GPU 环境”“” print(“=” * 60) print(“GPU 环境检查”) print(“=” * 60) # 检查 CUDA 是否可用 print(f"PyTorch 版本: {torch.__version__}") print(f"CUDA 可用: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"GPU 数量: {torch.cuda.device_count()}") for i in range(torch.cuda.device_count()): print(f"GPU {i}: {torch.cuda.get_device_name(i)}") print(f" 显存总量: {torch.cuda.get_device_properties(i).total_memory / 1024 ** 3:.2f} GB") print("=" * 60) class FaceDataset(Dataset): “”“人脸数据集类”“” def __init__(self, data_dir, min_samples=10, transform=None): self.data_dir = data_dir self.transform = transform self.faces = [] self.labels = [] self.label_map = {} self.dorm_members = [] self._load_dataset(min_samples) def _load_dataset(self, min_samples): """加载数据集""" # 遍历每个成员文件夹 for member_dir in os.listdir(self.data_dir): member_path = os.path.join(self.data_dir, member_dir) if not os.path.isdir(member_path): continue # 记录寝室成员 self.dorm_members.append(member_dir) self.label_map[member_dir] = len(self.label_map) # 遍历成员的所有照片 member_faces = [] for img_file in os.listdir(member_path): img_path = os.path.join(member_path, img_file) try: # 使用 PIL 加载图像 img = Image.open(img_path).convert('RGB') member_faces.append(img) except Exception as e: logger.warning(f"无法加载图像 {img_path}: {str(e)}") # 确保每个成员有足够样本 if len(member_faces) < min_samples: logger.warning(f"{member_dir} 只有 {len(member_faces)} 个有效样本,至少需要 {min_samples} 个") continue # 添加成员数据 self.faces.extend(member_faces) self.labels.extend([self.label_map[member_dir]] * len(member_faces)) # 添加陌生人样本 stranger_faces = self._generate_stranger_samples(len(self.faces) // 4) self.faces.extend(stranger_faces) self.labels.extend([len(self.label_map)] * len(stranger_faces)) self.label_map['stranger'] = len(self.label_map) logger.info(f"数据集加载完成: {len(self.faces)} 个样本, {len(self.dorm_members)} 个成员") def _generate_stranger_samples(self, num_samples): """生成陌生人样本""" stranger_faces = [] # 使用公开数据集的人脸作为陌生人 # 这里使用 LFW 数据集作为示例(实际项目中应使用真实数据) for _ in range(num_samples): # 生成随机噪声图像(实际应用中应使用真实陌生人照片) random_face = Image.fromarray(np.uint8(np.random.rand(160, 160, 3) * 255)) stranger_faces.append(random_face) return stranger_faces def __len__(self): return len(self.faces) def __getitem__(self, idx): face = self.faces[idx] label = self.labels[idx] if self.transform: face = self.transform(face) return face, label class DormFaceRecognizer: “”“寝室人脸识别系统 (PyTorch 实现)”“” def __init__(self, threshold=0.7, device=None): # 设置设备 self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f"使用设备: {self.device}") # 初始化人脸检测器 self.detector = MTCNN( keep_all=True, post_process=False, device=self.device ) logger.info("MTCNN 检测器初始化完成") # 初始化人脸特征提取器 self.embedder = InceptionResnetV1( pretrained='vggface2', classify=False, device=self.device ).eval() # 设置为评估模式 logger.info("FaceNet 特征提取器初始化完成") # 初始化其他组件 self.classifier = None self.label_encoder = None self.threshold = threshold self.dorm_members = [] # 数据预处理 self.transform = transforms.Compose([ transforms.Resize((160, 160)), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) def create_dataset(self, data_dir, min_samples=10, batch_size=32, num_workers=4): """创建数据集""" dataset = FaceDataset( data_dir, min_samples=min_samples, transform=self.transform ) # 保存成员信息 self.dorm_members = dataset.dorm_members self.label_encoder = LabelEncoder().fit( list(dataset.label_map.keys()) + ['stranger'] ) # 创建数据加载器 dataloader = DataLoader( dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True ) return dataset, dataloader def extract_features(self, dataloader): """提取人脸特征向量""" embeddings = [] labels = [] logger.info("开始提取特征...") start_time = time.time() with torch.no_grad(): for batch_idx, (faces, batch_labels) in enumerate(dataloader): # 移动到设备 faces = faces.to(self.device) # 提取特征 batch_embeddings = self.embedder(faces) # 保存结果 embeddings.append(batch_embeddings.cpu().numpy()) labels.append(batch_labels.numpy()) # 每10个批次打印一次进度 if (batch_idx + 1) % 10 == 0: elapsed = time.time() - start_time logger.info(f"已处理 {batch_idx + 1}/{len(dataloader)} 批次, 耗时: {elapsed:.2f}秒") # 合并结果 embeddings = np.vstack(embeddings) labels = np.hstack(labels) logger.info(f"特征提取完成: {embeddings.shape[0]} 个样本, 耗时: {time.time() - start_time:.2f}秒") return embeddings, labels def train_classifier(self, embeddings, labels): """训练 SVM 分类器""" logger.info("开始训练分类器...") start_time = time.time() # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( embeddings, labels, test_size=0.2, random_state=42 ) # 创建并训练 SVM 分类器 self.classifier = SVC(kernel='linear', probability=True, C=1.0) self.classifier.fit(X_train, y_train) # 评估模型 y_pred = self.classifier.predict(X_test) accuracy = accuracy_score(y_test, y_pred) logger.info(f"分类器训练完成, 准确率: {accuracy:.4f}, 耗时: {time.time() - start_time:.2f}秒") return accuracy def recognize_face(self, image): """识别单张图像中的人脸""" # 转换为 PIL 图像 if isinstance(image, np.ndarray): image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # 检测人脸 boxes, probs, landmarks = self.detector.detect(image, landmarks=True) recognitions = [] if boxes is not None: # 提取每个人脸 faces = [] for box in boxes: x1, y1, x2, y2 = box face = image.crop((x1, y1, x2, y2)) faces.append(face) # 预处理人脸 face_tensors = torch.stack([self.transform(face) for face in faces]).to(self.device) # 提取特征 with torch.no_grad(): embeddings = self.embedder(face_tensors).cpu().numpy() # 预测 probabilities = self.classifier.predict_proba(embeddings) pred_classes = self.classifier.predict(embeddings) for i, (box, prob) in enumerate(zip(boxes, probs)): max_prob = np.max(probabilities[i]) pred_label = self.label_encoder.inverse_transform([pred_classes[i]])[0] # 判断是否为陌生人 if max_prob < self.threshold or pred_label == 'stranger': recognitions.append(("陌生人", max_prob, box)) else: recognitions.append((pred_label, max_prob, box)) return recognitions def save_model(self, file_path): """保存模型""" model_data = { 'classifier': self.classifier, 'label_encoder': self.label_encoder, 'threshold': self.threshold, 'dorm_members': self.dorm_members } joblib.dump(model_data, file_path) logger.info(f"模型已保存至: {file_path}") def load_model(self, file_path): """加载模型""" model_data = joblib.load(file_path) self.classifier = model_data['classifier'] self.label_encoder = model_data['label_encoder'] self.threshold = model_data['threshold'] self.dorm_members = model_data['dorm_members'] logger.info(f"模型已加载,寝室成员: {', '.join(self.dorm_members)}") def main(): “”“主函数”“” print(f"[{time.strftime(‘%H:%M:%S’)}] 程序启动") # 检查 GPU 环境 check_gpu_environment() # 检查并创建必要的目录 os.makedirs('data/dorm_faces', exist_ok=True) # 初始化识别器 try: recognizer = DormFaceRecognizer(threshold=0.6) logger.info("人脸识别器初始化成功") except Exception as e: logger.error(f"初始化失败: {str(e)}") print("程序将在10秒后退出...") time.sleep(10) return # 数据集路径 data_dir = "data/dorm_faces" # 检查数据集是否存在 if not os.path.exists(data_dir) or not os.listdir(data_dir): logger.warning(f"数据集目录 '{data_dir}' 不存在或为空") print("请创建以下结构的目录:") print("dorm_faces/") print("├── 成员1/") print("│ ├── 照片1.jpg") print("│ ├── 照片2.jpg") print("│ └── ...") print("├── 成员2/") print("│ └── ...") print("└── ...") print("\n程序将在10秒后退出...") time.sleep(10) return # 步骤1: 创建数据集 try: dataset, dataloader = recognizer.create_dataset( data_dir, min_samples=10, batch_size=64, num_workers=4 ) except Exception as e: logger.error(f"数据集创建失败: {str(e)}") return # 步骤2: 提取特征 try: embeddings, labels = recognizer.extract_features(dataloader) except Exception as e: logger.error(f"特征提取失败: {str(e)}") return # 步骤3: 训练分类器 try: accuracy = recognizer.train_classifier(embeddings, labels) except Exception as e: logger.error(f"分类器训练失败: {str(e)}") return # 保存模型 model_path = "models/dorm_face_model_pytorch.pkl" try: recognizer.save_model(model_path) except Exception as e: logger.error(f"模型保存失败: {str(e)}") # 测试识别 test_image_path = "test_photo.jpg" if not os.path.exists(test_image_path): logger.warning(f"测试图片 '{test_image_path}' 不存在,跳过识别测试") else: logger.info(f"正在测试识别: {test_image_path}") try: test_image = cv2.imread(test_image_path) if test_image is None: logger.error(f"无法读取图片: {test_image_path}") else: recognitions = recognizer.recognize_face(test_image) if not recognitions: logger.info("未检测到人脸") else: # 在图像上绘制结果 for name, confidence, box in recognitions: x1, y1, x2, y2 = box label = f"{name} ({confidence:.2f})" color = (0, 255, 0) if name != "陌生人" else (0, 0, 255) # 绘制矩形框 cv2.rectangle(test_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) # 绘制标签 cv2.putText(test_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) # 显示结果 cv2.imshow("人脸识别结果", test_image) cv2.waitKey(0) cv2.destroyAllWindows() # 保存结果图像 result_path = "recognition_result_pytorch.jpg" cv2.imwrite(result_path, test_image) logger.info(f"识别结果已保存至: {result_path}") except Exception as e: logger.error(f"人脸识别失败: {str(e)}") logger.info("程序执行完成") if name == “main”: main() face_recognition.py代码如下:import json import cv2 import numpy as np import torch import insightface from insightface.app import FaceAnalysis from facenet_pytorch import InceptionResnetV1 from PIL import Image import joblib import os import pickle from datetime import datetime import random import torch.nn as nn import torch.optim as optim from sklearn.preprocessing import LabelEncoder from sklearn.svm import SVC from torch.utils.data import Dataset, DataLoader class FaceRecognition: def init(self, device=None): self.device = device or torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’) self.model_loaded = False self.training_data = {} # 初始化 training_data 属性 self.dorm_members = [] # 初始化 dorm_members 属性 self.label_encoder = LabelEncoder() # 初始化标签编码器 self.init_models() def init_models(self): """初始化人脸识别模型""" try: # 初始化ArcFace模型 - 使用正确的方法 self.arcface_model = FaceAnalysis(providers=['CPUExecutionProvider']) self.arcface_model.prepare(ctx_id=0, det_size=(640, 640)) # 初始化FaceNet模型作为备选 self.facenet_model = InceptionResnetV1( pretrained='vggface2', classify=False, device=self.device ).eval() # 状态标记 self.models_initialized = True print("模型初始化完成") except Exception as e: print(f"模型初始化失败: {str(e)}") self.models_initialized = False def load_classifier(self, model_path): """加载分类器模型""" try: model_data = joblib.load(model_path) self.classifier = model_data['classifier'] self.label_encoder = model_data['label_encoder'] self.dorm_members = model_data['dorm_members'] # 确保加载training_data self.training_data = model_data.get('training_data', {}) self.model_loaded = True print(f"分类器加载成功,成员: {', '.join(self.dorm_members)}") print(f"训练数据包含 {len(self.training_data)} 个类别") return True except Exception as e: print(f"分类器加载失败: {str(e)}") self.model_loaded = False return False def extract_features(self, face_img): """使用ArcFace提取人脸特征""" try: if face_img.size == 0: print("错误:空的人脸图像") return None # 将图像从BGR转换为RGB rgb_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) faces = self.arcface_model.get(rgb_img) if faces: return faces[0].embedding print("未检测到人脸特征") return None except Exception as e: print(f"特征提取失败: {str(e)}") return None def extract_features_facenet(self, face_img): """使用FaceNet提取人脸特征(备选)""" try: # 转换为PIL图像并预处理 face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)) face_tensor = self.preprocess_face(face_pil).to(self.device) with torch.no_grad(): features = self.facenet_model(face_tensor.unsqueeze(0)).cpu().numpy()[0] return features except Exception as e: print(f"FaceNet特征提取失败: {str(e)}") return None def preprocess_face(self, face_img): """预处理人脸图像""" # 调整大小 face_img = face_img.resize((160, 160)) # 转换为张量并归一化 face_img = np.array(face_img).astype(np.float32) / 255.0 face_img = (face_img - 0.5) / 0.5 # 归一化到[-1, 1] face_img = torch.tensor(face_img).permute(2, 0, 1) # HWC to CHW return face_img def retrain_with_feedback(self, feedback_data): """使用反馈数据重新训练模型""" # 检查是否有原始训练数据 if not self.training_data: print("错误:没有可用的原始训练数据") return False # 收集原始训练数据 original_features = [] original_labels = [] # 收集特征和标签 for member, embeddings in self.training_data.items(): for emb in embeddings: original_features.append(emb) original_labels.append(member) # 收集反馈数据 feedback_features = [] feedback_labels = [] for feedback in feedback_data: # 获取正确标签 correct_label = feedback.get("correct_label") if not correct_label or correct_label == "unknown": continue # 获取原始图像和人脸位置 image_path = feedback.get("image_path", "") if not image_path or not os.path.exists(image_path): print(f"图像路径无效: {image_path}") continue box = feedback.get("box", []) if len(box) != 4: print(f"无效的人脸框: {box}") continue # 处理图像 image = cv2.imread(image_path) if image is None: print(f"无法读取图像: {image_path}") continue # 裁剪人脸区域 x1, y1, x2, y2 = map(int, box) face_img = image[y1:y2, x1:x2] if face_img.size == 0: print(f"裁剪后的人脸图像为空: {image_path}") continue # 提取特征 embedding = self.extract_features(face_img) if embedding is None: print(f"无法提取特征: {image_path}") continue # 添加到训练数据 feedback_features.append(embedding) feedback_labels.append(correct_label) print(f"添加反馈数据: {correct_label} - {image_path}") # 检查是否有有效的反馈数据 if not feedback_features: print("错误:没有有效的反馈数据") return False # 合并数据 all_features = np.vstack([original_features, feedback_features]) all_labels = original_labels + feedback_labels # 重新训练分类器 self.classifier = SVC(kernel='linear', probability=True) self.classifier.fit(all_features, all_labels) # 更新标签编码器 self.label_encoder = LabelEncoder() self.label_encoder.fit(all_labels) # 更新寝室成员列表 self.dorm_members = list(self.label_encoder.classes_) # 更新训练数据 self.training_data = {} for label, feature in zip(all_labels, all_features): if label not in self.training_data: self.training_data[label] = [] self.training_data[label].append(feature) print(f"重新训练完成! 新模型包含 {len(self.dorm_members)} 个成员") return True def recognize(self, image, threshold=0.7): """识别人脸""" if not self.model_loaded or not self.models_initialized: return [], image.copy() # 使用ArcFace检测人脸 rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) faces = self.arcface_model.get(rgb_img) results = [] display_img = image.copy() if faces: for face in faces: # 获取人脸框 x1, y1, x2, y2 = face.bbox.astype(int) # 提取特征 embedding = face.embedding # 预测 probabilities = self.classifier.predict_proba([embedding])[0] max_prob = np.max(probabilities) pred_class = self.classifier.predict([embedding])[0] pred_label = self.label_encoder.inverse_transform([pred_class])[0] # 判断是否为陌生人 if max_prob < threshold or pred_label == 'stranger': label = "陌生人" color = (0, 0, 255) # 红色 else: label = pred_label color = (0, 255, 0) # 绿色 # 保存结果 results.append({ "box": [x1, y1, x2, y2], "label": label, "confidence": max_prob }) # 在图像上绘制结果 cv2.rectangle(display_img, (x1, y1), (x2, y2), color, 2) cv2.putText(display_img, f"{label} ({max_prob:.2f})", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2) return results, display_img def save_feedback(self, image, detected_box, incorrect_label, correct_label): """保存用户反馈数据 - 改进为保存图像路径而非完整图像""" feedback_dir = "data/feedback_data" os.makedirs(feedback_dir, exist_ok=True) # 创建唯一文件名 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 保存人脸图像 face_img_dir = os.path.join(feedback_dir, "faces") os.makedirs(face_img_dir, exist_ok=True) face_img_path = os.path.join(face_img_dir, f"face_{timestamp}.jpg") # 裁剪并保存人脸区域 x1, y1, x2, y2 = map(int, detected_box) # 修复1:确保裁剪区域有效 if y2 > y1 and x2 > x1: face_img = image[y1:y2, x1:x2] if face_img.size > 0: cv2.imwrite(face_img_path, face_img) else: logger.warning(f"裁剪的人脸区域无效: {detected_box}") face_img_path = None else: logger.warning(f"无效的检测框: {detected_box}") face_img_path = None # 保存反馈元数据 filename = f"feedback_{timestamp}.json" # 修复2:使用JSON格式 filepath = os.path.join(feedback_dir, filename) # 准备数据 feedback_data = { "image_path": face_img_path, # 保存路径而非完整图像 "detected_box": detected_box, "incorrect_label": incorrect_label, "correct_label": correct_label, "timestamp": timestamp } # 修复3:使用JSON保存便于阅读和调试 with open(filepath, 'w', encoding='utf-8') as f: json.dump(feedback_data, f, ensure_ascii=False, indent=2) return True def save_updated_model(self, output_path): """保存更新后的模型""" model_data = { 'classifier': self.classifier, 'label_encoder': self.label_encoder, 'dorm_members': self.dorm_members, 'training_data': self.training_data # 包含训练数据 } joblib.dump(model_data, output_path) print(f"更新后的模型已保存到: {output_path}") class TripletFaceDataset(Dataset): “”“三元组人脸数据集”“” def __init__(self, embeddings, labels): self.embeddings = embeddings self.labels = labels self.label_to_indices = {} # 创建标签到索引的映射 for idx, label in enumerate(labels): if label not in self.label_to_indices: self.label_to_indices[label] = [] self.label_to_indices[label].append(idx) def __getitem__(self, index): anchor_label = self.labels[index] # 随机选择正样本 positive_idx = index while positive_idx == index: positive_idx = random.choice(self.label_to_indices[anchor_label]) # 随机选择负样本 negative_label = random.choice([l for l in set(self.labels) if l != anchor_label]) negative_idx = random.choice(self.label_to_indices[negative_label]) return ( self.embeddings[index], self.embeddings[positive_idx], self.embeddings[negative_idx] ) def __len__(self): return len(self.embeddings) class TripletLoss(nn.Module): “”“三元组损失函数”“” def __init__(self, margin=1.0): super(TripletLoss, self).__init__() self.margin = margin def forward(self, anchor, positive, negative): distance_positive = (anchor - positive).pow(2).sum(1) distance_negative = (anchor - negative).pow(2).sum(1) losses = torch.relu(distance_positive - distance_negative + self.margin) return losses.mean() def train_triplet_model(embeddings, labels, epochs=100): “”“训练三元组模型”“” dataset = TripletFaceDataset(embeddings, labels) dataloader = DataLoader(dataset, batch_size=32, shuffle=True) model = nn.Sequential( nn.Linear(embeddings.shape[1], 256), nn.ReLU(), nn.Linear(256, 128) ) criterion = TripletLoss(margin=0.5) optimizer = optim.Adam(model.parameters(), lr=0.001) for epoch in range(epochs): total_loss = 0.0 for anchor, positive, negative in dataloader: optimizer.zero_grad() anchor_embed = model(anchor) positive_embed = model(positive) negative_embed = model(negative) loss = criterion(anchor_embed, positive_embed, negative_embed) loss.backward() optimizer.step() total_loss += loss.item() print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}") return model main.py代码如下:import sys from dorm_face_recognition_gui import FaceRecognitionSystem from PyQt5.QtWidgets import QApplication if name == “main”: # 设置中文编码支持 if sys.platform == “win32”: import ctypes ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID(“dorm.face.recognition”) app = QApplication(sys.argv) app.setStyle("Fusion") # 使用Fusion风格 # 设置应用样式 app.setStyleSheet(""" QMainWindow { background-color: #ecf0f1; } QGroupBox { border: 1px solid #bdc3c7; border-radius: 8px; margin-top: 20px; padding: 10px; font-weight: bold; background-color: #ffffff; } QGroupBox::title { subcontrol-origin: margin; subcontrol-position: top center; padding: 0 5px; } QPushButton { background-color: #3498db; color: white; border: none; padding: 10px 15px; font-size: 14px; margin: 5px; border-radius: 5px; } QPushButton:hover { background-color: #2980b9; } QPushButton:pressed { background-color: #1c6ea4; } QPushButton:disabled { background-color: #bdc3c7; } QLabel { font-size: 14px; padding: 3px; } QComboBox, QSlider { padding: 4px; background-color: #ffffff; } QTextEdit { font-family: "Microsoft YaHei"; font-size: 12px; } """) window = FaceRecognitionSystem() window.show() sys.exit(app.exec_()) ui.py代码如下:from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QFileDialog, QComboBox, QSlider, QMessageBox, QTextEdit, QGroupBox, QScrollArea, QDialog, QListWidget) from PyQt5.QtCore import Qt, QTimer from PyQt5.QtGui import QImage, QPixmap, QIcon, QFont from face_recognition import FaceRecognition class FaceRecognitionSystem(QMainWindow): def init(self): super().init() # … 原有初始化代码 … # 初始化人脸识别器 self.face_recognition = FaceRecognition() # 添加反馈按钮 self.add_feedback_button() def add_feedback_button(self): """添加反馈按钮到界面""" self.feedback_btn = QPushButton("反馈识别错误") self.feedback_btn.setIcon(QIcon.fromTheme("dialog-warning")) self.feedback_btn.setStyleSheet("background-color: #f39c12;") self.feedback_btn.clicked.connect(self.handle_feedback) # 找到识别功能组并添加按钮 for i in range(self.control_layout.count()): widget = self.control_layout.itemAt(i).widget() if isinstance(widget, QGroupBox) and widget.title() == "识别功能": layout = widget.layout() layout.addWidget(self.feedback_btn) break def handle_feedback(self): """处理用户反馈""" if not hasattr(self, 'last_results') or not self.last_results: QMessageBox.warning(self, "警告", "没有可反馈的识别结果") return # 创建反馈对话框 dialog = QDialog(self) dialog.setWindowTitle("识别错误反馈") dialog.setFixedSize(400, 300) layout = QVBoxLayout(dialog) # 添加当前识别结果 result_label = QLabel("当前识别结果:") layout.addWidget(result_label) self.feedback_list = QListWidget() for i, result in enumerate(self.last_results, 1): label = result["label"] confidence = result["confidence"] self.feedback_list.addItem(f"人脸 #{i}: {label} (置信度: {confidence:.2f})") layout.addWidget(self.feedback_list) # 添加正确身份选择 correct_label = QLabel("正确身份:") layout.addWidget(correct_label) self.correct_combo = QComboBox() self.correct_combo.addItems(["选择正确身份"] + self.face_recognition.dorm_members + ["陌生人", "不在列表中"]) layout.addWidget(self.correct_combo) # 添加按钮 btn_layout = QHBoxLayout() submit_btn = QPushButton("提交反馈") submit_btn.clicked.connect(lambda: self.submit_feedback(dialog)) btn_layout.addWidget(submit_btn) cancel_btn = QPushButton("取消") cancel_btn.clicked.connect(dialog.reject) btn_layout.addWidget(cancel_btn) layout.addLayout(btn_layout) dialog.exec_() def submit_feedback(self, dialog): """提交反馈并更新模型""" selected_index = self.feedback_list.currentRow() if selected_index < 0: QMessageBox.warning(self, "警告", "请选择一个识别结果") return result = self.last_results[selected_index] correct_identity = self.correct_combo.currentText() if correct_identity == "选择正确身份": QMessageBox.warning(self, "警告", "请选择正确身份") return # 保存反馈数据 self.face_recognition.save_feedback( self.current_image.copy(), result["box"], result["label"], correct_identity ) QMessageBox.information(self, "反馈提交", "感谢您的反馈!数据已保存用于改进模型") dialog.accept() def recognize_faces(self, image): """识别人脸并在图像上标注结果""" # 使用人脸识别器进行识别 self.last_results, display_image = self.face_recognition.recognize( image, threshold=self.threshold_slider.value() / 100 ) # 更新结果文本 self.update_results_text() # 显示图像 self.display_image(display_image) def update_results_text(self): """更新结果文本区域""" if not self.last_results: self.results_text.setText("未识别到任何人脸") return # 构建结果文本 result_text = "<h3>识别结果:</h3>" for i, result in enumerate(self.last_results, 1): x1, y1, x2, y2 = result["box"] label = result["label"] confidence = result["confidence"] # 处理中文显示问题 if label in self.face_recognition.dorm_members: result_text += ( f"<p><b>人脸 #{i}:</b> " f"<span style='color:green;'>寝室成员 - {label}</span><br>" f"位置: ({x1}, {y1}), 置信度: {confidence:.2f}</p>" ) else: result_text += ( f"<p><b>人脸 #{i}:</b> " f"<span style='color:red;'>陌生人</span><br>" f"位置: ({x1}, {y1}), 置信度: {confidence:.2f}</p>" ) self.results_text.setHtml(result_text) # ... 其余原有方法 ... 需要把重新训练模型部分和反馈部分全部删除
06-26
import torch import torch.nn as nn import torch.nn.functional as F class PatchEmbed(nn.Module): def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): super().__init__() self.img_size = img_size self.patch_size = patch_size self.grid = img_size // patch_size self.num_patches = self.grid * self.grid self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) def forward(self, x): # x: (B, C, H, W) -> (B, N, D), where N = H*W/patch_size^2, D=embed_dim x = self.proj(x) x = x.flatten(2).transpose(1, 2) return x class Encoder(nn.Module): def __init__(self, embed_dim=768, depth=6, num_heads=12): super().__init__() # 注意:通常会在这里使用 Pre-LayerNorm,但为了简洁,保持默认 layer = nn.TransformerEncoderLayer(embed_dim, num_heads, dim_feedforward=embed_dim*4, batch_first=True) # 建议加上 batch_first=True self.encoder = nn.TransformerEncoder(layer, depth) def forward(self, x): # x: (B, N, D) return self.encoder(x) class Decoder(nn.Module): def __init__(self, embed_dim=768, decoder_dim=512, depth=4, num_heads=8, patch_dim=16*16*3): super().__init__() # 这个投影层在 MAE 中完成更合适,这里可以移除 # self.proj = nn.Linear(embed_dim, decoder_dim) layer = nn.TransformerDecoderLayer( decoder_dim, num_heads, dim_feedforward=decoder_dim*4, batch_first=True # 建议加上 batch_first=True ) self.decoder = nn.TransformerDecoder(layer, depth) self.head = nn.Linear(decoder_dim, patch_dim) # 从解码器维度映射回像素 def forward(self, tgt, memory): # tgt: (B, N, decoder_dim),解码器的输入序列 # memory: (B, N_visible, embed_dim),编码器的输出 x = self.decoder(tgt=tgt, memory=memory) return self.head(x) class MAE(nn.Module): def __init__(self, img_size=224, patch_size=16, mask_ratio=0.75): super().__init__() self.patch_embed = PatchEmbed(img_size, patch_size) self.mask_ratio = mask_ratio embed_dim = 768 # 关键修改 1: 将 decoder_dim 定义为实例属性 self.decoder_dim = 512 patch_dim = patch_size * patch_size * 3 self.encoder = Encoder(embed_dim) self.decoder = Decoder(embed_dim, self.decoder_dim, patch_dim=patch_dim) self.enc_to_dec_proj = nn.Linear(embed_dim, self.decoder_dim) self.mask_token = nn.Parameter(torch.randn(1, 1, self.decoder_dim)) def random_mask(self, x): B, N, C = x.shape len_keep = int(N * (1 - self.mask_ratio)) noise = torch.rand(B, N, device=x.device) ids_shuffle = torch.argsort(noise, dim=1) ids_keep = ids_shuffle[:, :len_keep] ids_mask = ids_shuffle[:, len_keep:] x_keep = torch.gather(x, 1, ids_keep.unsqueeze(-1).repeat(1, 1, C)) return x_keep, ids_keep, ids_mask, ids_shuffle def forward(self, imgs): x = self.patch_embed(imgs) B, N, _ = x.shape x_keep, ids_keep, ids_mask, ids_shuffle = self.random_mask(x) encoded = self.encoder(x_keep) encoded_proj = self.enc_to_dec_proj(encoded) num_mask = ids_mask.shape[1] mask_tokens = self.mask_token.repeat(B, num_mask, 1) dec_tokens = torch.cat([encoded_proj, mask_tokens], dim=1) dec_tokens = torch.gather(dec_tokens, 1, ids_shuffle.unsqueeze(-1).repeat(1, 1, dec_tokens.shape[-1])) # 6. 解码器解码 memory = torch.zeros_like(dec_tokens) # 关键修改 2: 使用 self.decoder_dim 替代 decoder_dim memory.scatter_(1, ids_keep.unsqueeze(-1).repeat(1, 1, self.decoder_dim), encoded_proj) pred = self.decoder(tgt=dec_tokens, memory=memory) # 7. 计算 Loss pred_masked = pred.gather(1, ids_mask.unsqueeze(-1).repeat(1, 1, pred.shape[-1])) target = x.gather(1, ids_mask.unsqueeze(-1).repeat(1, 1, x.shape[-1])) loss = F.mse_loss(pred_masked, target) return loss, pred_masked, ids_mask, x def unpatchify(patches, img_size=224, patch_size=16): B, N, PP = patches.shape C = 3 patches = patches.reshape(B, N, patch_size, patch_size, C) grid = img_size // patch_size patches = patches.reshape(B, grid, grid, patch_size, patch_size, C) img = patches.permute(0,1,3,2,4,5).reshape(B, C, img_size, img_size) return img def show_img(tensor, title=""): img = tensor.detach().cpu().permute(1,2,0).numpy() img = np.clip(img,0,1) plt.imshow(img) plt.title(title) plt.axis("off")img_path = r"D:\bone_marrow_sample.png" img = Image.open(img_path).convert("RGB").resize((224,224)) img_np = np.array(img)/255.0 img_tensor = torch.tensor(img_np).permute(2,0,1).unsqueeze(0).float() img_tensor.shapemodel = MAE() loss, pred_masked, ids_mask, full_patch = model(img_tensor) print("Loss:", loss.item())# 还原 patch orig_patches = full_patch.clone() mask = torch.ones_like(orig_patches) mask[:, ids_mask[0]] = 0 # masked 部分置 0 masked_img = unpatchify(orig_patches * mask, 224, 16)[0]-------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[39], line 1 ----> 1 recon_img = unpatchify(pred_masked, 224, 16)[0] Cell In[35], line 6 4 patches = patches.reshape(B, N, patch_size, patch_size, C) 5 grid = img_size // patch_size ----> 6 patches = patches.reshape(B, grid, grid, patch_size, patch_size, C) 7 img = patches.permute(0,1,3,2,4,5).reshape(B, C, img_size, img_size) 8 return img RuntimeError: shape '[1, 14, 14, 16, 16, 3]' is invalid for input of size 112896
最新发布
11-22
RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. !pip install transformers datasets torch rouge-score matplotlib import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from transformers import BertTokenizerFast import time import numpy as np from datasets import load_dataset from rouge_score import rouge_scorer import matplotlib.pyplot as plt from IPython.display import clear_output # 设备配置 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"使用设备: {device}") # 数据预处理(严格过滤无效样本) class SummaryDataset(Dataset): def __init__(self, dataset_split, tokenizer, max_article_len=384, max_summary_len=96, subset_size=0.01): self.tokenizer = tokenizer self.max_article_len = max_article_len self.max_summary_len = max_summary_len self.subset = dataset_split.select(range(int(len(dataset_split) * subset_size))) # 严格过滤无效样本 self.articles = [] self.summaries = [] self.vocab = set(tokenizer.vocab.keys()) for item in self.subset: article = item['article'].strip() summary = item['highlights'].strip() if len(article) > 20 and len(summary) > 10: article_tokens = tokenizer.tokenize(article) summary_tokens = tokenizer.tokenize(summary) if all(t in self.vocab for t in article_tokens) and all(t in self.vocab for t in summary_tokens): self.articles.append(article) self.summaries.append(summary) self.pad_token_id = tokenizer.pad_token_id self.unk_token_id = tokenizer.unk_token_id def __len__(self): return len(self.articles) def __getitem__(self, idx): src = self.tokenizer( self.articles[idx], max_length=self.max_article_len, truncation=True, padding='max_length', return_tensors='pt', add_special_tokens=True ) tgt = self.tokenizer( self.summaries[idx], max_length=self.max_summary_len, truncation=True, padding='max_length', return_tensors='pt', add_special_tokens=True ) tgt_labels = tgt['input_ids'].squeeze() tgt_labels[tgt_labels == self.pad_token_id] = -100 # 忽略填充 tgt_labels[tgt_labels >= len(self.tokenizer.vocab)] = self.unk_token_id # 过滤无效id return { 'input_ids': src['input_ids'].squeeze(), 'attention_mask': src['attention_mask'].squeeze(), 'labels': tgt_labels } # 基础Seq2Seq模型 class BasicEncoder(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0) self.gru = nn.GRU(emb_dim, hidden_dim, num_layers=2, batch_first=True, bidirectional=True) self.fc_hidden = nn.Linear(hidden_dim * 2, hidden_dim) def forward(self, src): embedded = self.embedding(src) outputs, hidden = self.gru(embedded) # 取第二层双向隐藏状态 forward_hidden = hidden[-2, :, :] # 第二层正向 backward_hidden = hidden[-1, :, :] # 第二层反向 hidden = torch.cat([forward_hidden, backward_hidden], dim=1) # (batch, 2*hidden_dim) hidden = self.fc_hidden(hidden).unsqueeze(0) # (1, batch, hidden_dim) return hidden class BasicDecoder(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0) self.gru = nn.GRU(emb_dim + hidden_dim, hidden_dim, num_layers=1, batch_first=True) self.fc = nn.Linear(hidden_dim * 2 + emb_dim, vocab_size) def forward(self, input_ids, hidden, context): input_embedded = self.embedding(input_ids.unsqueeze(1)) # (batch, 1, emb_dim) input_combined = torch.cat([input_embedded, context.unsqueeze(1)], dim=2) # (batch, 1, emb_dim+hidden_dim) output, hidden = self.gru(input_combined, hidden) # (batch, 1, hidden_dim) output = output.squeeze(1) # (batch, hidden_dim) combined = torch.cat([output, context, input_embedded.squeeze(1)], dim=1) # (batch, 2*hidden_dim+emb_dim) logits = self.fc(combined) return logits, hidden class BasicSeq2Seq(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.encoder = BasicEncoder(vocab_size, emb_dim, hidden_dim) self.decoder = BasicDecoder(vocab_size, emb_dim, hidden_dim) self.device = device self.sos_token_id = 101 # [CLS] self.eos_token_id = 102 # [SEP] self.unk_token_id = 100 # [UNK] def forward(self, src, tgt): hidden = self.encoder(src) context = hidden.squeeze(0) batch_size, tgt_len = tgt.size() outputs = torch.zeros(batch_size, tgt_len, self.decoder.fc.out_features).to(device) input_ids = tgt[:, 0] for t in range(1, tgt_len): logits, hidden = self.decoder(input_ids, hidden, context) outputs[:, t] = logits input_ids = tgt[:, t] return outputs def generate(self, src, max_length=80): src = src.to(device) hidden = self.encoder(src) context = hidden.squeeze(0) # 修正后的生成初始化 generated = torch.full((src.size(0), 1), self.sos_token_id, device=device) # 注意这里的修正 for _ in range(max_length-1): logits, hidden = self.decoder(generated[:, -1], hidden, context) next_token = torch.argmax(logits, dim=1, keepdim=True) # 防止过早生成标点 if generated.size(1) < 5: punctuation = [',', '.', ';', ':', '!', '?', "'", '"', '`', '~'] punct_ids = [self.tokenizer.convert_tokens_to_ids(p) for p in punctuation] if next_token.item() in punct_ids: # 替换为最常见的实词 next_token = torch.tensor([[self.tokenizer.convert_tokens_to_ids('the')]], device=device) generated = torch.cat([generated, next_token], dim=1) if (next_token == self.eos_token_id).all(): break return generated # 注意力Seq2Seq模型 class Attention(nn.Module): def __init__(self, hidden_dim): super().__init__() self.W = nn.Linear(2 * hidden_dim, hidden_dim) self.v = nn.Linear(hidden_dim, 1, bias=False) def forward(self, hidden, encoder_outputs): src_len = encoder_outputs.size(1) hidden = hidden.unsqueeze(1).repeat(1, src_len, 1) # (batch, src_len, hidden_dim) combined = torch.cat([hidden, encoder_outputs], dim=2) # (batch, src_len, 2*hidden_dim) energy = self.v(torch.tanh(self.W(combined))).squeeze(2) # (batch, src_len) return torch.softmax(energy, dim=1) class AttnEncoder(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0) self.lstm = nn.LSTM(emb_dim, hidden_dim, num_layers=2, batch_first=True, bidirectional=True, dropout=0.1) self.fc_hidden = nn.Linear(hidden_dim * 2, hidden_dim) # 双向输出拼接 self.fc_cell = nn.Linear(hidden_dim * 2, hidden_dim) def forward(self, src): embedded = self.embedding(src) outputs, (hidden, cell) = self.lstm(embedded) # outputs: (batch, src_len, 2*hidden_dim) # 取第二层双向隐藏状态 hidden = torch.cat([hidden[-2, :, :], hidden[-1, :, :]], dim=1) # (batch, 2*hidden_dim) cell = torch.cat([cell[-2, :, :], cell[-1, :, :]], dim=1) hidden = self.fc_hidden(hidden).unsqueeze(0) # (1, batch, hidden_dim) cell = self.fc_cell(cell).unsqueeze(0) return outputs, (hidden, cell) class AttnDecoder(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0) self.attention = Attention(hidden_dim) self.lstm = nn.LSTM(emb_dim + 2 * hidden_dim, hidden_dim, num_layers=1, batch_first=True) self.fc = nn.Linear(hidden_dim + emb_dim, vocab_size) def forward(self, input_ids, hidden, cell, encoder_outputs): input_embedded = self.embedding(input_ids.unsqueeze(1)) # (batch, 1, emb_dim) attn_weights = self.attention(hidden.squeeze(0), encoder_outputs) # (batch, src_len) context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs) # (batch, 1, 2*hidden_dim) lstm_input = torch.cat([input_embedded, context], dim=2) # (batch, 1, emb_dim+2*hidden_dim) output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell)) # output: (batch, 1, hidden_dim) logits = self.fc(torch.cat([output.squeeze(1), input_embedded.squeeze(1)], dim=1)) # (batch, vocab_size) return logits, hidden, cell class AttnSeq2Seq(nn.Module): def __init__(self, vocab_size, emb_dim=128, hidden_dim=256): super().__init__() self.encoder = AttnEncoder(vocab_size, emb_dim, hidden_dim) self.decoder = AttnDecoder(vocab_size, emb_dim, hidden_dim) self.device = device self.sos_token_id = 101 # [CLS] self.eos_token_id = 102 # [SEP] self.unk_token_id = 100 # [UNK] def forward(self, src, tgt): encoder_outputs, (hidden, cell) = self.encoder(src) batch_size, tgt_len = tgt.size() outputs = torch.zeros(batch_size, tgt_len, self.decoder.fc.out_features).to(device) input_ids = tgt[:, 0] for t in range(1, tgt_len): logits, hidden, cell = self.decoder(input_ids, hidden, cell, encoder_outputs) outputs[:, t] = logits input_ids = tgt[:, t] return outputs def generate(self, src, max_length=80): encoder_outputs, (hidden, cell) = self.encoder(src) # 修正后的生成初始化 generated = torch.full((src.size(0), 1), self.sos_token_id, device=device) # 注意这里的修正 for _ in range(max_length-1): logits, hidden, cell = self.decoder(generated[:, -1], hidden, cell, encoder_outputs) next_token = torch.argmax(logits, dim=1, keepdim=True) # 防止过早生成标点 if generated.size(1) < 5: punctuation = [',', '.', ';', ':', '!', '?', "'", '"', '`', '~'] punct_ids = [self.tokenizer.convert_tokens_to_ids(p) for p in punctuation] if next_token.item() in punct_ids: # 替换为最常见的实词 next_token = torch.tensor([[self.tokenizer.convert_tokens_to_ids('the')]], device=device) generated = torch.cat([generated, next_token], dim=1) if (next_token == self.eos_token_id).all(): break return generated # Transformer模型 class PositionalEncoding(nn.Module): def __init__(self, d_model, max_len=5000): super().__init__() pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) self.register_buffer('pe', pe.unsqueeze(0)) def forward(self, x): return x + self.pe[:, :x.size(1)] class TransformerModel(nn.Module): def __init__(self, vocab_size, d_model=128, nhead=8, num_layers=3, dim_feedforward=512, max_len=5000): super().__init__() self.embedding = nn.Embedding(vocab_size, d_model, padding_idx=0) self.pos_encoder = PositionalEncoding(d_model, max_len) # 编码器 encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout=0.1) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers) # 解码器 decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout=0.1) self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers) self.fc = nn.Linear(d_model, vocab_size) self.d_model = d_model self.sos_token_id = 101 # [CLS] self.eos_token_id = 102 # [SEP] def _generate_square_subsequent_mask(self, sz): mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) return mask def forward(self, src, tgt): src_mask = None tgt_mask = self._generate_square_subsequent_mask(tgt.size(1)).to(device) src_key_padding_mask = (src == 0) tgt_key_padding_mask = (tgt == 0) src = self.embedding(src) * np.sqrt(self.d_model) src = self.pos_encoder(src) tgt = self.embedding(tgt) * np.sqrt(self.d_model) tgt = self.pos_encoder(tgt) memory = self.transformer_encoder(src.transpose(0, 1), src_mask, src_key_padding_mask) output = self.transformer_decoder( tgt.transpose(0, 1), memory, tgt_mask, None, tgt_key_padding_mask, src_key_padding_mask ) output = self.fc(output.transpose(0, 1)) return output def generate(self, src, max_length=80): src_mask = None src_key_padding_mask = (src == 0) src = self.embedding(src) * np.sqrt(self.d_model) src = self.pos_encoder(src) memory = self.transformer_encoder(src.transpose(0, 1), src_mask, src_key_padding_mask) batch_size = src.size(0) generated = torch.full((batch_size, 1), self.sos_token_id, device=device) for i in range(max_length-1): tgt_mask = self._generate_square_subsequent_mask(generated.size(1)).to(device) tgt_key_padding_mask = (generated == 0) tgt = self.embedding(generated) * np.sqrt(self.d_model) tgt = self.pos_encoder(tgt) output = self.transformer_decoder( tgt.transpose(0, 1), memory, tgt_mask, None, tgt_key_padding_mask, src_key_padding_mask ) output = self.fc(output.transpose(0, 1)[:, -1, :]) next_token = torch.argmax(output, dim=1, keepdim=True) generated = torch.cat([generated, next_token], dim=1) if (next_token == self.eos_token_id).all(): break return generated # 训练函数 def train_model(model, train_loader, optimizer, criterion, epochs=3): model.train() optimizer = optim.Adam(model.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=1, factor=0.5) start_time = time.time() for epoch in range(epochs): total_loss = 0 model.train() for i, batch in enumerate(train_loader): src = batch['input_ids'].to(device) tgt = batch['labels'].to(device) optimizer.zero_grad() outputs = model(src, tgt[:, :-1]) # 检查模型输出有效性 if torch.isnan(outputs).any(): print("警告:模型输出包含NaN,跳过此批次") continue loss = criterion(outputs.reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1)) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) # 梯度裁剪 optimizer.step() total_loss += loss.item() if (i+1) % 10 == 0: print(f"Epoch {epoch+1}/{epochs} | Batch {i+1}/{len(train_loader)} | Loss: {loss.item():.4f}") avg_loss = total_loss / len(train_loader) scheduler.step(avg_loss) print(f"Epoch {epoch+1} | 平均损失: {avg_loss:.4f}") torch.cuda.empty_cache() total_time = time.time() - start_time print(f"训练完成!总耗时: {total_time:.2f}s ({total_time/60:.2f}分钟)") return model, total_time # 评估函数 def evaluate_model(model, val_loader, tokenizer, num_examples=2): model.eval() scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True) rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []} valid_count = 0 with torch.no_grad(): for i, batch in enumerate(val_loader): src = batch['input_ids'].to(device) tgt = batch['labels'].to(device) generated = model.generate(src) for s, p, t in zip(src, generated, tgt): src_txt = tokenizer.decode(s, skip_special_tokens=True) pred_txt = tokenizer.decode(p, skip_special_tokens=True) true_txt = tokenizer.decode(t[t != -100], skip_special_tokens=True) if len(pred_txt.split()) > 3 and len(true_txt.split()) > 3: valid_count += 1 if valid_count <= num_examples: print(f"\n原文: {src_txt[:100]}...") print(f"生成: {pred_txt}") print(f"参考: {true_txt[:80]}...") print("-"*60) if true_txt and pred_txt: scores = scorer.score(true_txt, pred_txt) for key in rouge_scores: rouge_scores[key].append(scores[key].fmeasure) if valid_count > 0: avg_scores = {key: sum(rouge_scores[key])/len(rouge_scores[key]) for key in rouge_scores} print(f"\n评估结果 (基于{valid_count}个样本):") print(f"ROUGE-1: {avg_scores['rouge1']*100:.2f}%") print(f"ROUGE-2: {avg_scores['rouge2']*100:.2f}%") print(f"ROUGE-L: {avg_scores['rougeL']*100:.2f}%") else: print("警告:未生成有效摘要") avg_scores = {key: 0.0 for key in rouge_scores} return avg_scores # 可视化模型性能 def visualize_model_performance(model_names, train_times, rouge_scores): plt.figure(figsize=(15, 6)) # 训练时间对比图 plt.subplot(1, 2, 1) bars = plt.bar(model_names, train_times) plt.title('模型训练时间对比') plt.ylabel('时间 (分钟)') for bar in bars: height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height, f'{height:.1f} min', ha='center', va='bottom') # ROUGE分数对比图 plt.subplot(1, 2, 2) x = np.arange(len(model_names)) width = 0.25 plt.bar(x - width, [scores['rouge1'] for scores in rouge_scores], width, label='ROUGE-1') plt.bar(x, [scores['rouge2'] for scores in rouge_scores], width, label='ROUGE-2') plt.bar(x + width, [scores['rougeL'] for scores in rouge_scores], width, label='ROUGE-L') plt.title('模型ROUGE分数对比') plt.ylabel('F1分数') plt.xticks(x, model_names) plt.legend() plt.tight_layout() plt.savefig('performance_comparison.png') plt.show() print("性能对比图已保存为 performance_comparison.png") # 交互式文本摘要生成 def interactive_summarization(models, tokenizer, model_names, max_length=80): while True: print("\n" + "="*60) print("文本摘要交互式测试 (输入 'q' 退出)") print("="*60) input_text = input("请输入要摘要的文本:\n") if input_text.lower() == 'q': break if len(input_text) < 50: print("请输入更长的文本(至少50个字符)") continue # 生成摘要 inputs = tokenizer( input_text, max_length=384, truncation=True, padding='max_length', return_tensors='pt' ).to(device) print("\n生成摘要中...") all_summaries = [] for i, model in enumerate(models): model.eval() with torch.no_grad(): generated = model.generate(inputs["input_ids"]) summary = tokenizer.decode(generated[0], skip_special_tokens=True) all_summaries.append(summary) # 打印结果 print(f"\n{model_names[i]} 摘要:") print("-"*50) print(summary) print("-"*50) print("\n所有模型摘要对比:") for i, (name, summary) in enumerate(zip(model_names, all_summaries)): print(f"{i+1}. {name}: {summary}") # 主程序 print("加载数据集...") dataset = load_dataset("cnn_dailymail", "3.0.0") tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') vocab_size = len(tokenizer.vocab) # 准备训练数据 print("准备训练数据...") train_ds = SummaryDataset(dataset['train'], tokenizer, subset_size=0.01) # 使用1%的数据 val_ds = SummaryDataset(dataset['validation'], tokenizer, subset_size=0.01) train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=0) val_loader = DataLoader(val_ds, batch_size=8, shuffle=False, num_workers=0) # 定义损失函数 criterion = nn.CrossEntropyLoss(ignore_index=-100) # 训练基础Seq2Seq print("\n" + "="*60) print("训练基础Seq2Seq模型") print("="*60) basic_model = BasicSeq2Seq(vocab_size).to(device) trained_basic, basic_time = train_model(basic_model, train_loader, None, criterion, epochs=3) basic_rouge = evaluate_model(trained_basic, val_loader, tokenizer) # 训练注意力Seq2Seq print("\n" + "="*60) print("训练注意力Seq2Seq模型") print("="*60) attn_model = AttnSeq2Seq(vocab_size).to(device) trained_attn, attn_time = train_model(attn_model, train_loader, None, criterion, epochs=3) attn_rouge = evaluate_model(trained_attn, val_loader, tokenizer) # 训练Transformer print("\n" + "="*60) print("训练Transformer模型") print("="*60) transformer_model = TransformerModel(vocab_size).to(device) trained_transformer, transformer_time = train_model(transformer_model, train_loader, None, criterion, epochs=3) transformer_rouge = evaluate_model(trained_transformer, val_loader, tokenizer) # 可视化模型性能 print("\n" + "="*60) print("模型性能对比") print("="*60) model_names = ['基础Seq2Seq', '注意力Seq2Seq', 'Transformer'] train_times = [basic_time/60, attn_time/60, transformer_time/60] rouge_scores = [basic_rouge, attn_rouge, transformer_rouge] visualize_model_performance(model_names, train_times, rouge_scores) # 交互式测试 print("\n" + "="*60) print("交互式文本摘要测试") print("="*60) print("提示:输入一段文本,将同时生成三个模型的摘要结果") interactive_summarization( [trained_basic, trained_attn, trained_transformer], tokenizer, model_names ) 修改完错误后发完整代码给我
06-09
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

uncleqiao

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值