np.array_split可以不均等划分 np.split为均等划分

本文详细解析了如何在优快云上发布一篇高质量的技术博客,包括标题、标签和内容的优化策略,旨在帮助作者提高文章的可见性和吸引力。
from libs.PipeLine import PipeLine, ScopedTiming from libs.AIBase import AIBase from libs.AI2D import Ai2d import os import ujson from media.media import * from time import * import nncase_runtime as nn import ulab.numpy as np import time import image import aicube import random import gc import sys from libs.YbProtocol import YbProtocol from ybUtils.YbUart import YbUart # uart = None uart = YbUart(baudrate=115200) pto = YbProtocol() # 自定义OCR检测类 # Custom OCR detection class class OCRDetectionApp(AIBase): def __init__(self, kmodel_path, model_input_size, mask_threshold=0.5, box_threshold=0.2, rgb888p_size=[224,224], display_size=[1920,1080], debug_mode=0): """ 初始化OCR检测应用 Initialize the OCR detection application 参数: kmodel_path: 模型路径 / Model path model_input_size: 模型输入大小 / Model input size mask_threshold: 掩码阈值,用于区分前景和背景 / Mask threshold for foreground/background separation box_threshold: 边界框阈值,用于确定检测结果 / Box threshold for detection results rgb888p_size: 输入图像大小 / Input image size display_size: 显示大小 / Display size debug_mode: 调试模式级别 / Debug mode level """ super().__init__(kmodel_path, model_input_size, rgb888p_size, debug_mode) self.kmodel_path = kmodel_path # 模型输入分辨率 / Model input resolution self.model_input_size = model_input_size # 分类阈值 / Classification thresholds self.mask_threshold = mask_threshold # 掩码阈值 / Mask threshold self.box_threshold = box_threshold # 框阈值 / Box threshold # sensor给到AI的图像分辨率,宽度16字节对齐 / Image resolution from sensor to AI, width aligned to 16 bytes self.rgb888p_size = [ALIGN_UP(rgb888p_size[0], 16), rgb888p_size[1]] # 显示分辨率 / Display resolution self.display_size = [ALIGN_UP(display_size[0], 16), display_size[1]] self.debug_mode = debug_mode # Ai2d实例,用于实现模型预处理 / Ai2d instance for model preprocessing self.ai2d = Ai2d(debug_mode) # 设置Ai2d的输入输出格式和类型 / Set input and output formats and types for Ai2d self.ai2d.set_ai2d_dtype(nn.ai2d_format.NCHW_FMT, nn.ai2d_format.NCHW_FMT, np.uint8, np.uint8) # 配置预处理操作,这里使用了pad和resize # Configure preprocessing operations, using pad and resize here # Ai2d支持crop/shift/pad/resize/affine,具体代码请打开/sdcard/app/libs/AI2D.py查看 # Ai2d supports crop/shift/pad/resize/affine, see /sdcard/app/libs/AI2D.py for details def config_preprocess(self, input_image_size=None): with ScopedTiming("set preprocess config", self.debug_mode > 0): # 初始化ai2d预处理配置,默认为sensor给到AI的尺寸,您可以通过设置input_image_size自行修改输入尺寸 # Initialize ai2d preprocessing config, default is the size from sensor to AI # You can modify the input size by setting input_image_size ai2d_input_size = input_image_size if input_image_size else self.rgb888p_size top, bottom, left, right = self.get_padding_param() self.ai2d.pad([0, 0, 0, 0, top, bottom, left, right], 0, [0, 0, 0]) self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel) self.ai2d.build([1, 3, ai2d_input_size[1], ai2d_input_size[0]], [1, 3, self.model_input_size[1], self.model_input_size[0]]) # 自定义当前任务的后处理 # Custom post-processing for the current task def postprocess(self, results): with ScopedTiming("postprocess", self.debug_mode > 0): # chw2hwc 将数据从[channel,height,width]转换为[height,width,channel]格式 # Convert data from [channel,height,width] to [height,width,channel] format hwc_array = self.chw2hwc(self.cur_img) # 这里使用了aicube封装的接口ocr_post_process做后处理 # Using aicube's ocr_post_process interface for post-processing # 返回的det_boxes结构为[[crop_array_nhwc,[p1_x,p1_y,p2_x,p2_y,p3_x,p3_y,p4_x,p4_y]],...] # The structure of returned det_boxes is [[crop_array_nhwc,[p1_x,p1_y,p2_x,p2_y,p3_x,p3_y,p4_x,p4_y]],...] det_boxes = aicube.ocr_post_process(results[0][:,:,:,0].reshape(-1), hwc_array.reshape(-1), self.model_input_size, self.rgb888p_size, self.mask_threshold, self.box_threshold) return det_boxes # 计算padding参数 # Calculate padding parameters def get_padding_param(self): # 右padding或下padding / Right padding or bottom padding dst_w = self.model_input_size[0] dst_h = self.model_input_size[1] input_width = self.rgb888p_size[0] input_high = self.rgb888p_size[1] # 计算缩放比例 / Calculate scaling ratios ratio_w = dst_w / input_width ratio_h = dst_h / input_high # 选择较小的缩放比例,保持原始宽高比 / Choose the smaller ratio to maintain the original aspect ratio if ratio_w < ratio_h: ratio = ratio_w else: ratio = ratio_h # 计算缩放后的新尺寸 / Calculate new dimensions after scaling new_w = (int)(ratio * input_width) new_h = (int)(ratio * input_high) # 计算padding量 / Calculate padding amounts dw = (dst_w - new_w) / 2 dh = (dst_h - new_h) / 2 # 四边的padding值 / Padding values for all four sides top = (int)(round(0)) bottom = (int)(round(dh * 2 + 0.1)) left = (int)(round(0)) right = (int)(round(dw * 2 - 0.1)) return top, bottom, left, right # chw2hwc 转换通道顺序 # chw2hwc converts channel order def chw2hwc(self, features): # 从[channel,height,width]转换为[height,width,channel] # Convert from [channel,height,width] to [height,width,channel] ori_shape = (features.shape[0], features.shape[1], features.shape[2]) c_hw_ = features.reshape((ori_shape[0], ori_shape[1] * ori_shape[2])) hw_c_ = c_hw_.transpose() new_array = hw_c_.copy() hwc_array = new_array.reshape((ori_shape[1], ori_shape[2], ori_shape[0])) # 释放中间变量,降低内存占用 / Release intermediate variables to reduce memory usage del c_hw_ del hw_c_ del new_array return hwc_array # 自定义OCR识别任务类 # Custom OCR recognition task class class OCRRecognitionApp(AIBase): def __init__(self, kmodel_path, model_input_size, dict_path, rgb888p_size=[1920,1080], display_size=[1920,1080], debug_mode=0): """ 初始化OCR识别应用 Initialize OCR recognition application 参数: kmodel_path: 模型路径 / Model path model_input_size: 模型输入大小 / Model input size dict_path: 字典文件路径,用于将识别结果映射为文字 / Dictionary file path for mapping recognition results to text rgb888p_size: 输入图像大小 / Input image size display_size: 显示大小 / Display size debug_mode: 调试模式级别 / Debug mode level """ super().__init__(kmodel_path, model_input_size, rgb888p_size, debug_mode) # kmodel路径 / kmodel path self.kmodel_path = kmodel_path # 识别模型输入分辨率 / Recognition model input resolution self.model_input_size = model_input_size self.dict_path = dict_path # sensor给到AI的图像分辨率,宽16字节对齐 / Image resolution from sensor to AI, width aligned to 16 bytes self.rgb888p_size = [ALIGN_UP(rgb888p_size[0], 16), rgb888p_size[1]] # 视频输出VO分辨率,宽16字节对齐 / Video output VO resolution, width aligned to 16 bytes self.display_size = [ALIGN_UP(display_size[0], 16), display_size[1]] # debug模式 / debug mode self.debug_mode = debug_mode self.dict_word = None # 读取OCR的字典 / Read OCR dictionary self.read_dict() self.ai2d = Ai2d(debug_mode) self.ai2d.set_ai2d_dtype(nn.ai2d_format.RGB_packed, nn.ai2d_format.NCHW_FMT, np.uint8, np.uint8) # 配置预处理操作 # Configure preprocessing operations def config_preprocess(self, input_image_size=None, input_np=None): with ScopedTiming("set preprocess config", self.debug_mode > 0): ai2d_input_size = input_image_size if input_image_size else self.rgb888p_size top, bottom, left, right = self.get_padding_param(ai2d_input_size, self.model_input_size) self.ai2d.pad([0, 0, 0, 0, top, bottom, left, right], 0, [0, 0, 0]) self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel) # 如果传入input_np,输入shape为input_np的shape,如果传入,输入shape为[1,3,ai2d_input_size[1],ai2d_input_size[0]] # If input_np is provided, use its shape as input shape # Otherwise, use [1,3,ai2d_input_size[1],ai2d_input_size[0]] as input shape self.ai2d.build([input_np.shape[0], input_np.shape[1], input_np.shape[2], input_np.shape[3]], [1, 3, self.model_input_size[1], self.model_input_size[0]]) # 自定义后处理,results是模型输出的array列表 # Custom post-processing, results is the array list output by the model def postprocess(self, results): with ScopedTiming("postprocess", self.debug_mode > 0): # 获取每一列的最高概率对应的字符索引 / Get character index corresponding to highest probability for each column preds = np.argmax(results[0], axis=2).reshape((-1)) output_txt = "" for i in range(len(preds)): # 当前识别字符是字典的最后一个字符并且和前一个字符重复(去重),加入识别结果字符串 # Add character to result string if it's not the last character in dictionary # and not a duplicate of the previous character (deduplication) if preds[i] != (len(self.dict_word) - 1) and (not (i > 0 and preds[i - 1] == preds[i])): output_txt = output_txt + self.dict_word[preds[i]] return output_txt # 计算padding参数 # Calculate padding parameters def get_padding_param(self, src_size, dst_size): # 右padding或下padding / Right or bottom padding dst_w = dst_size[0] dst_h = dst_size[1] input_width = src_size[0] input_high = src_size[1] # 计算缩放比例 / Calculate scaling ratios ratio_w = dst_w / input_width ratio_h = dst_h / input_high # 选择较小的缩放比例,保持原始宽高比 / Choose smaller ratio to maintain aspect ratio if ratio_w < ratio_h: ratio = ratio_w else: ratio = ratio_h # 计算缩放后的新尺寸 / Calculate new dimensions after scaling new_w = (int)(ratio * input_width) new_h = (int)(ratio * input_high) # 计算padding量 / Calculate padding amounts dw = (dst_w - new_w) / 2 dh = (dst_h - new_h) / 2 # 四边的padding值 / Padding values for all four sides top = (int)(round(0)) bottom = (int)(round(dh * 2 + 0.1)) left = (int)(round(0)) right = (int)(round(dw * 2 - 0.1)) return top, bottom, left, right # 读取字典文件 # Read dictionary file def read_dict(self): if self.dict_path != "": with open(dict_path, 'r') as file: line_one = file.read(100000) line_list = line_one.split("\r\n") # 创建字典,将索引映射到字符 / Create dictionary mapping indices to characters self.dict_word = {num: char.replace("\r", "").replace("\n", "") for num, char in enumerate(line_list)} # OCR检测和识别的组合类 # Combined class for OCR detection and recognition class OCRDetRec: def __init__(self, ocr_det_kmodel, ocr_rec_kmodel, det_input_size, rec_input_size, dict_path, mask_threshold=0.25, box_threshold=0.3, rgb888p_size=[1920,1080], display_size=[1920,1080], debug_mode=0): """ 初始化OCR检测和识别的组合应用 Initialize combined OCR detection and recognition application 参数: ocr_det_kmodel: OCR检测模型路径 / OCR detection model path ocr_rec_kmodel: OCR识别模型路径 / OCR recognition model path det_input_size: 检测模型输入大小 / Detection model input size rec_input_size: 识别模型输入大小 / Recognition model input size dict_path: 字典文件路径 / Dictionary file path mask_threshold: 掩码阈值 / Mask threshold box_threshold: 框阈值 / Box threshold rgb888p_size: 输入图像大小 / Input image size display_size: 显示大小 / Display size debug_mode: 调试模式级别 / Debug mode level """ # OCR检测模型路径 / OCR detection model path self.ocr_det_kmodel = ocr_det_kmodel # OCR识别模型路径 / OCR recognition model path self.ocr_rec_kmodel = ocr_rec_kmodel # OCR检测模型输入分辨率 / OCR detection model input resolution self.det_input_size = det_input_size # OCR识别模型输入分辨率 / OCR recognition model input resolution self.rec_input_size = rec_input_size # 字典路径 / Dictionary path self.dict_path = dict_path # 置信度阈值 / Confidence threshold self.mask_threshold = mask_threshold # nms阈值 / NMS threshold self.box_threshold = box_threshold # sensor给到AI的图像分辨率,宽16字节对齐 / Image resolution from sensor to AI, width aligned to 16 bytes self.rgb888p_size = [ALIGN_UP(rgb888p_size[0], 16), rgb888p_size[1]] # 视频输出VO分辨率,宽16字节对齐 / Video output resolution, width aligned to 16 bytes self.display_size = [ALIGN_UP(display_size[0], 16), display_size[1]] # debug_mode模式 / debug_mode mode self.debug_mode = debug_mode # 创建检测和识别实例 / Create detection and recognition instances self.ocr_det = OCRDetectionApp(self.ocr_det_kmodel, model_input_size=self.det_input_size, mask_threshold=self.mask_threshold, box_threshold=self.box_threshold, rgb888p_size=self.rgb888p_size, display_size=self.display_size, debug_mode=0) self.ocr_rec = OCRRecognitionApp(self.ocr_rec_kmodel, model_input_size=self.rec_input_size, dict_path=self.dict_path, rgb888p_size=self.rgb888p_size, display_size=self.display_size) # 配置检测模型预处理 / Configure detection model preprocessing self.ocr_det.config_preprocess() # run函数,执行推理 # Run function for inference def run(self, input_np): # 先进行OCR检测 / First perform OCR detection det_res = self.ocr_det.run(input_np) boxes = [] ocr_res = [] for det in det_res: # 对得到的每个检测框执行OCR识别 / Perform OCR recognition on each detected box self.ocr_rec.config_preprocess(input_image_size=[det[0].shape[2], det[0].shape[1]], input_np=det[0]) ocr_str = self.ocr_rec.run(det[0]) ocr_res.append(ocr_str) boxes.append(det[1]) # 执行垃圾回收,减少内存占用 / Perform garbage collection to reduce memory usage gc.collect() return boxes, ocr_res # 绘制OCR检测识别效果 # Draw OCR detection and recognition results def draw_result(self, pl, det_res, rec_res): # 清除叠加层 / Clear overlay layer pl.osd_img.clear() if det_res: # 循环绘制所有检测到的框 / Loop through all detected boxes for j in range(len(det_res)): # 将原图的坐标点转换成显示的坐标点,循环绘制四条直线,得到一个矩形框 # Convert coordinates from original image to display coordinates # Draw four lines to form a rectangle for i in range(4): # 坐标转换 / Coordinate conversion x1 = det_res[j][(i * 2)] / self.rgb888p_size[0] * self.display_size[0] y1 = det_res[j][(i * 2 + 1)] / self.rgb888p_size[1] * self.display_size[1] x2 = det_res[j][((i + 1) * 2) % 8] / self.rgb888p_size[0] * self.display_size[0] y2 = det_res[j][((i + 1) * 2 + 1) % 8] / self.rgb888p_size[1] * self.display_size[1] # 绘制线段 / Draw line segment pl.osd_img.draw_line((int(x1), int(y1), int(x2), int(y2)), color=(255, 0, 0, 255), thickness=5) # 在框上方绘制识别文本 / Draw recognized text above the box pl.osd_img.draw_string_advanced(int(x1), int(y1), 32, rec_res[j], color=(0, 0, 255)) pto_data = pto.get_ocr_rec_data(rec_res[j]) uart.send(pto_data) print(pto_data) if __name__ == "__main__": # 显示模式,默认"hdmi",可以选择"hdmi"和"lcd",k230d受限内存支持 # Display mode, default is "hdmi", can choose between "hdmi" and "lcd", k230d with limited memory doesn't support display_mode = "lcd" if display_mode == "hdmi": display_size = [1920, 1080] else: display_size = [640, 480] # OCR检测模型路径 / OCR detection model path ocr_det_kmodel_path = "/sdcard/kmodel/ocr_det_int16.kmodel" # OCR识别模型路径 / OCR recognition model path ocr_rec_kmodel_path = "/sdcard/kmodel/ocr_rec_int16.kmodel" # 其他参数 / Other parameters dict_path = "/sdcard/utils/dict.txt" rgb888p_size = [640, 360] ocr_det_input_size = [640, 640] ocr_rec_input_size = [512, 32] mask_threshold = 0.25 box_threshold = 0.3 # 初始化PipeLine,只关注传给AI的图像分辨率,显示的分辨率 # Initialize PipeLine, focusing only on image resolution for AI and display pl = PipeLine(rgb888p_size=rgb888p_size, display_size=display_size, display_mode=display_mode) pl.create() # 创建OCR检测识别实例 / Create OCR detection and recognition instance ocr = OCRDetRec(ocr_det_kmodel_path, ocr_rec_kmodel_path, det_input_size=ocr_det_input_size, rec_input_size=ocr_rec_input_size, dict_path=dict_path, mask_threshold=mask_threshold, box_threshold=box_threshold, rgb888p_size=rgb888p_size, display_size=display_size) while True: # 计时整个处理流程 / Time the entire processing flow with ScopedTiming("total", 0): img = pl.get_frame() # 获取当前帧 / Get current frame det_res, rec_res = ocr.run(img) # 推理当前帧 / Inference on current frame ocr.draw_result(pl, det_res, rec_res) # 绘制当前帧推理结果 / Draw inference results for current frame pl.show_image() # 展示当前帧推理结果 / Display inference results for current frame # 执行垃圾回收,减少内存占用 / Perform garbage collection to reduce memory usage gc.collect() # 释放资源 / Release resources ocr.ocr_det.deinit() ocr.ocr_rec.deinit() pl.destroy() 让程序能准确识别数字
07-26
import math import os import shutil import struct import socket from itertools import chain import torch.nn.init as init import numpy as np import pandas as pd import torch import random import openpyxl import torch.nn as nn from torch.utils.data import DataLoader from hyper_parameter import program_path, input_size, hidden_size, num_layers, output_size, batch_size, n_ahead, \ learning_rate, a, a1, train_window, name_tr, epochs, patience, shared_path, path_tr, min_error0, min_error1, \ min_error2 import threading """ 设置随机数种子 """ seed = 42 np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False global n_ahead n_ahead = n_ahead # 从超参数导入预测步长 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"当前使用设备: {device}") """ 获取共享训练文件 """ def copyfile(root, filename, dst): for root, dirs, files in os.walk(root): for file in files: if filename == file: print('target found') src = os.path.join(root, file) print('src:', src) shutil.copy(src, dst) print('copy that') return True return False # 获取共享训练数据文件 copyfile(root=shared_path, filename=name_tr, dst=path_tr) """ 定义数据处理函数 """ def nn_seq(name, B, train_window, n_ahead, data_name): path = program_path file = os.path.join(path, name) mon_data = pd.read_excel(file, sheet_name=0, usecols=None) all_data = mon_data[data_name].values.astype(float) # 将数据转为float类型 # 新增:滑动平均平滑处理(核心去噪步骤) window_size = 5 # 滑动窗口大小(可根据数据波动调整,建议5-10) if window_size > 1: # 计算滑动平均:用窗口内的均值代替中心值,边缘用原数据填充 # np.convolve实现滑动平均,mode='same'保证输出长度与输入一致 all_data = np.convolve( all_data, np.ones(window_size) / window_size, # 窗口内权重均等 mode='same' ) # 归一化(原有逻辑变) max_value = np.max(all_data) min_value = np.min(all_data) scalar = max_value - min_value if scalar == 0: all_data_normalized = list(map(lambda x: x, all_data)) else: all_data_normalized = list(map(lambda x: (x - min_value) / scalar, all_data)) # 转换为tensor并生成序列(原有逻辑变) all_data_normalized = torch.FloatTensor(all_data_normalized).view(-1) seq = [] l_data = len(all_data_normalized) for i in range(0, l_data - train_window - n_ahead, 10): data_seq = all_data_normalized[i:i + train_window] data_label = all_data_normalized[i + train_window:i + train_window + n_ahead] data_seq = torch.FloatTensor(data_seq).view(-1, 1) data_label = torch.FloatTensor(data_label).view(-1) seq.append((data_seq, data_label)) # 处理训练数据(原有逻辑变) train_data = seq train_len = int(len(train_data) / B) * B train_data = train_data[:train_len] train = MyDataset(train_data) train_data = DataLoader(dataset=train, batch_size=B, shuffle=False, num_workers=0) return train_data, max_value, min_value class MyDataset(torch.utils.data.Dataset): def __init__(self, data): self.data = data def __getitem__(self, item): return self.data[item] def __len__(self): return len(self.data) """ 构造LSTM模型 """ class CNN_LSTM_attention(nn.Module): def __init__(self, input_size, hidden_size, num_layers, output_size,train_window, dim=1): super(CNN_LSTM_attention, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.output_size = output_size self.bidirectional = False # 明确为单向LSTM self.lstm_output_dim = hidden_size # 单向LSTM输出维度=hidden_size(无需×2) # 新增:SE注意力后添加通道维度的LayerNorm(归一化通道维度,参数为hidden_size) self.se_norm = nn.LayerNorm(hidden_size) # 输入维度=CNN输出通道数(hidden_size) # 1. CNN模块:提取局部时序特征 self.cnn_block = nn.Sequential( nn.Conv1d(in_channels=dim, out_channels=hidden_size, kernel_size=7, padding=1), # 保持序列长度 nn.Sigmoid(), # 激活函数 nn.MaxPool1d(kernel_size=2, stride=1), # 池化后长度减1 nn.Dropout(0.3) # 防过拟合 ) # 2. SE注意力模块:通道注意力(关注重要特征通道) conv_out_len = (train_window + 2 * 1 - 7) // 1 + 1 # 输入长度=train_window,padding=1,kernel_size=7,stride=1 # 池化层输出长度公式:(输入长度 - kernel_size) // stride + 1 self.pooled_seq_len = (conv_out_len - 2) // 1 + 1 # 池化kernel_size=2,stride=1 self.se_attention = nn.Sequential( nn.Linear(self.pooled_seq_len, self.pooled_seq_len // 8), nn.BatchNorm1d(self.pooled_seq_len // 8), # 新增:压缩后批量归一化 nn.ReLU(), nn.Linear(self.pooled_seq_len // 8, self.pooled_seq_len), nn.BatchNorm1d(self.pooled_seq_len), # 新增:恢复后批量归一化 nn.Softmax(dim=-1) ) # LSTM部分(单向) self.lstm = nn.LSTM( input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=self.bidirectional # 显式关闭双向 ) # 残差连接(单向LSTM:输入输出维度均为hidden_size,无需×2) self.residual_proj = nn.Linear(hidden_size, self.lstm_output_dim) # 输出层self.reg(适配单向LSTM,维度=hidden_size) self.reg = nn.Sequential( nn.BatchNorm1d(self.lstm_output_dim), # 输入维度=单向LSTM输出维度 nn.Linear(self.lstm_output_dim, self.lstm_output_dim), # 特征转换(维度一致) nn.LeakyReLU(0.1), # 非线性激活 nn.Linear(self.lstm_output_dim, output_size) # 映射到最终输出维度 ) self.reg_act = nn.Tanh() self.dropout = nn.Dropout(0.3) # 全局Dropout self.post_se_dropout = nn.Dropout(0.3) # SE后的Dropout # 新增:分类头(cls) # self.cls = nn.Sequential( # nn.BatchNorm1d(self.lstm_output_dim), # 与reg共享归一化逻辑 # nn.Linear(self.lstm_output_dim, self.lstm_output_dim // 2), # 特征降维 # nn.ReLU(), # nn.Dropout(0.1), # 防过拟合 # nn.Linear(self.lstm_output_dim // 2, num_classes) # 输出类别数 # ) # 权重初始化 self._init_weights() def _init_weights(self): for name, param in self.lstm.named_parameters(): if 'weight' in name: init.xavier_normal_(param) elif 'bias' in name: # 遗忘门偏置初始化为 1 n = param.size(0) init.constant_(param, 0) param.data[n // 4: n // 2] = 1 # LSTM 偏置分 4 部分,第 2 部分是遗忘门 # 回归头初始化 for m in self.reg.modules(): if isinstance(m, nn.Linear): init.xavier_normal_(m.weight) if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) # # 分类头初始化 # for m in self.cls_head.modules(): # if isinstance(m, nn.Linear): # init.xavier_normal_(m.weight) # if m.bias is not None: # init.constant_(m.bias, 0) # elif isinstance(m, nn.BatchNorm1d): # init.constant_(m.weight, 1) # init.constant_(m.bias, 0) def forward(self, input_seq, n_ahead): # 1. CNN+SE注意力(原有逻辑变) x = input_seq.transpose(-1, -2) # (batch, dim=1, train_window) x = self.cnn_block(x) # (batch, hidden_size, pooled_seq_len) max_ = x.max(dim=1)[0] # (batch, pooled_seq_len) se_attn = self.se_attention(max_) # (batch, pooled_seq_len) x = torch.einsum("bnd,bd->bnd", x, se_attn) # 时间步权重加权 # 新增:通道维度归一化(将x从(batch, hidden_size, pooled_seq_len)转为(batch, pooled_seq_len, hidden_size)后归一化) x = x.transpose(1, 2) # 交换通道和时间步维度 → (batch, pooled_seq_len, hidden_size) x = self.se_norm(x) # 对通道维度(hidden_size)做归一化,平衡数值尺度 x = x.transpose(1, 2) # 换回原维度 → (batch, hidden_size, pooled_seq_len) x = self.post_se_dropout(x) x = x.transpose(-1, -2) # (batch, pooled_seq_len, hidden_size) → LSTM输入 # 2. LSTM+残差连接(原有逻辑变) batch_size = x.size(0) seq_len = x.shape[1] h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device) c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device) lstm_output, _ = self.lstm(x, (h0, c0)) # (batch, seq_len, lstm_output_dim) residual = self.residual_proj(x) # (batch, seq_len, lstm_output_dim) residual_output = lstm_output + residual # (batch, seq_len, lstm_output_dim) # 3. 回归头(reg)计算(原有逻辑变) reg_feat = residual_output.contiguous().view(-1, self.lstm_output_dim) reg_feat = self.dropout(reg_feat) # 复用dropout reg_pred = self.reg(reg_feat) # 输出:(batch×seq_len, output_size) pred = self.reg_act(reg_pred) # 激活 # 恢复时序维度:(batch, seq_len, output_size) → 取最后n_ahead步 pred = pred.view(batch_size, seq_len, self.output_size)[:, -n_ahead:, :] return pred """ 训练函数 """ def train(k, model, optimizer, scheduler, loss_function): motion_name = a[k] try: train_data, max_value, min_value = nn_seq( name=name_tr, B=batch_size, train_window=train_window, n_ahead=n_ahead, data_name=motion_name, ) print('training motion', k, motion_name) except Exception as e: print(f"数据加载错误: {str(e)}") return save_file = 'save_' + str(a1[k]) + '_model.pt' counters = [0, 0, 0] best_losses = [100, 100, 100] min_errors = [min_error0, min_error1, min_error2] switch_epoch = 300 for i in range(epochs): model.train()#切换训练模式 cnt = 0 tra_loss = 0.0 # 训练过程 for (seq, label) in train_data: cnt += 1 seq = seq.to(device) label = label.to(device) # 关键修复:调用模型时传入n_ahead参数 y_pred = model(seq, n_ahead).squeeze() loss = loss_function(y_pred, label) if torch.isnan(loss): print(f"!!!在epoch {i + 1},第 {cnt} 个batch中检测到loss为nan,终止训练!!!") torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict()}, f"nan_error_epoch_{i + 1}_motion_{k}.pt") return optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) optimizer.step() if cnt % 20 == 0: print(f'epoch {i + 1} 当前是自由度{k}的训练结果:', cnt - 20, '~', cnt, loss.item()) tra_loss += loss.item() * seq.size(0) # 评估过程 model.eval() pre = [] y = [] with torch.no_grad(): for (seq, target) in train_data:#seq是模型输入数据,target是真实值 target = list(chain.from_iterable(target.data.tolist()))#转为一维列表 y.extend(target) seq = seq.to(device) y_pre = model(seq, n_ahead).squeeze() # 同样需要传入n_ahead y_pre = list(chain.from_iterable(y_pre.data.tolist())) pre.extend(y_pre) y, pre = np.array(y), np.array(pre) y = y * (max_value - min_value) + min_value # 真实值反归一化 pre = pre * (max_value - min_value) + min_value # 预测值反归一化 #loss的计算 raw_mse = np.mean((pre - y) **2) # 3. 再计算R²(此时用的是原始尺度,结果更真实) y_flat = y.flatten() pre_flat = pre.flatten() # 注意:corrcoef计算的是相关系数,是严格的R²,需用正确公式计算 # 正确R²公式:1 - (残差平方和 / 总平方和) ss_res = np.sum((y_flat - pre_flat) ** 2) # 残差平方和 ss_tot = np.sum((y_flat - np.mean(y_flat)) ** 2) # 总平方和(均值误差) R2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0 # 避免除以0 rmse_mean = np.sqrt(np.mean((y - pre) ** 2))#使用反归一化的rmse if i < switch_epoch: # 前switch_epoch轮:更新学习率(固定初始值) current_lr = learning_rate # 强制保持初始学习率 else: # switch_epoch轮后:启用调度器,动态调整学习率 scheduler.step(R2) current_lr = optimizer.param_groups[0]['lr'] # 保留学习率最小值限制(原有逻辑) if current_lr < 0.0001: current_lr = 0.0001 optimizer.param_groups[0]['lr'] = current_lr # 早停逻辑 if rmse_mean <= best_losses[k]: best_losses[k] = rmse_mean if rmse_mean <= min_errors[k]: counters[k] += 1 state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_file) else: counters[k] = 0 elif rmse_mean <= min_errors[k]: counters[k] += 1 else: counters[k] = 0 # 达到最大epoch或早停条件 if i == epochs - 1: print(f"✅ 已达到最大训练轮次 {epochs},强制保存最优模型") state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, f"motion_{a[k]}_{save_file}_final.pt") break elif counters[k] >= patience: state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_file) print(f"motion{k} Early stopping! (patience={patience})") break print(f'epoch: {i + 1}, training motion {k} {motion_name}, ' f"Loss: {raw_mse:.4f}, RMSE: {rmse_mean:.4f}, R2: {R2:.4f}") # UDP发送数据 try: all_data = np.array([tra_loss, rmse_mean, R2], dtype=float) pre_data_bytes = struct.pack('ddd', *all_data) udpSerSock.sendto(pre_data_bytes, predict_addr) except Exception as e: print(f"UDP发送错误: {str(e)}") """ 主程序 """ if __name__ == "__main__": udpSerSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) predict_addr = ('192.168.112.1', 25001) n_ahead = 80 torch.set_num_threads(8) loss_function = nn.MSELoss().to(device) def thread_task(k): # 每个线程独立初始化模型 model = CNN_LSTM_attention( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size, train_window=train_window ).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='max', factor=0.90, patience=80, threshold=0.0001 ) # 确保Excel写入正常 try: train(k, model=model,optimizer=optimizer, scheduler=scheduler,loss_function=loss_function) except Exception as e: print(f"线程{k}错误: {str(e)}") print(f"线程完成:运动类型k={k}") # 创建并启动线程 threads = [] for k in range(3): t = threading.Thread(target=thread_task, args=(k,)) threads.append(t) t.start() # 等待所有线程完成 for t in threads: t.join() udpSerSock.close() print(f"所有训练完成") 这个模型的问题在哪,为什么训练很稳定
最新发布
10-23
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值