js str.split()_arr.join().html

本文详细介绍了JavaScript中字符串的split方法和数组的join方法的使用,包括如何使用正则表达式作为分隔符,以及如何指定返回的数组长度。同时,文章还解释了str.split()与arr.join()的操作互逆性。
<!doctype html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <title>js str.split()_arr.join()</title>
</head>
<body>
<script>
    /*参考:https://www.w3school.com.cn/js/jsref_split.asp
    * 知识点:
    * 1.str.split(separator,howMany) -> 分割字符串后的数组。
    *   把一个字符串分割成字符串数组。
    *   separator,必需。字符串或正则表达式,从该参数指定的地方分割 str。
    *   howMany,可选。指定返回的数组的最大长度。
    *   如果 separator 为空字符串(""),那么 str 中的每个字符之间都会被分割。
    * 2.str.split() 与 arr.join() 互为反操作。
    */
    // 一、str.split(separator,howMany)
    // 1.把句子分割成单词。
    let str = "How are you doing today?";
    console.log(str.split(" "));
    // (5) ["How", "are", "you", "doing", "today?"]
    // 使用正则表达式作为 separator。
    console.log(str.split(/\s+/));
    // (5) ["How", "are", "you", "doing", "today?"]
    console.log(str.split(" ", 3));
    // (3) ["How", "are", "you"]

    // 2.把单词分割为字母,或者把字符串分割为字符。
    console.log(str.split(""));
    // (24) ["H", "o", "w", " ", "a", "r", "e", " ", "y", "o", "u", " ", "d", "o", "i", "n", "g", " ", "t", "o", "d", "a", "y", "?"]

    // 3.
    console.log("2:3:4:5".split(":"));
    // (4) ["2", "3", "4", "5"]
    console.log("|a|b|c".split("|"));
    // (4) ["", "a", "b", "c"]
    let arr = "张三丰|男|65|19280202".split("|");
    console.log(arr);
    // (4) ["张三丰", "男", "65", "19280202"]
    console.log(typeof arr);
    // object
    // 在js中,json {}, array [] 的类型都是 object。


    /*二、arr.join(separator) -> 数组元素拼接的字符串。
      把数组中的所有元素拼接成一个字符串。
      通过把 arr 的每个元素转换为字符串,
      然后把这些字符串拼接起来,在两个元素之间插入 separator。
      separator,可选。指定要使用的分隔符,默认使用 ","。*/
    // 1.
    console.log(arr.join());
    // 张三丰,男,65,19280202
    console.log(arr.join("+"));
    // 张三丰+男+65+19280202
    console.log(typeof str);
    // string

    arr = [1, 2, 3];
    str = arr.join("+");
    console.log(str);
    // 1+2+3

    arr = str.split("+");
    console.log(arr);
    // (3) ["1", "2", "3"]
</script>
</body>
</html>





import os import difflib import numpy as np import tensorflow as tf import scipy.io.wavfile as wav from tqdm import tqdm from scipy.fftpack import fft from python_speech_features import mfcc from random import shuffle from keras import backend as K def data_hparams(): params = tf.contrib.training.HParams( # vocab data_type='train', data_path='data/', thchs30=True, aishell=True, prime=True, stcmd=True, batch_size=1, data_length=10, shuffle=True) return params class get_data(): def __init__(self, args): self.data_type = args.data_type self.data_path = args.data_path self.thchs30 = args.thchs30 self.aishell = args.aishell self.prime = args.prime self.stcmd = args.stcmd self.data_length = args.data_length self.batch_size = args.batch_size self.shuffle = args.shuffle self.source_init() def source_init(self): print('get source list...') read_files = [] if self.data_type == 'train': if self.thchs30 == True: read_files.append('thchs_train.txt') if self.aishell == True: read_files.append('aishell_train.txt') if self.prime == True: read_files.append('prime.txt') if self.stcmd == True: read_files.append('stcmd.txt') elif self.data_type == 'dev': if self.thchs30 == True: read_files.append('thchs_dev.txt') if self.aishell == True: read_files.append('aishell_dev.txt') elif self.data_type == 'test': if self.thchs30 == True: read_files.append('thchs_test.txt') if self.aishell == True: read_files.append('aishell_test.txt') self.wav_lst = [] self.pny_lst = [] self.han_lst = [] for file in read_files: print('load ', file, ' data...') sub_file = 'data/' + file with open(sub_file, 'r', encoding='utf8') as f: data = f.readlines() for line in tqdm(data): wav_file, pny, han = line.split('\t') self.wav_lst.append(wav_file) self.pny_lst.append(pny.split(' ')) self.han_lst.append(han.strip('\n')) if self.data_length: self.wav_lst = self.wav_lst[:self.data_length] self.pny_lst = self.pny_lst[:self.data_length] self.han_lst = self.han_lst[:self.data_length] print('make am vocab...') self.am_vocab = self.mk_am_vocab(self.pny_lst) print('make lm pinyin vocab...') self.pny_vocab = self.mk_lm_pny_vocab(self.pny_lst) print('make lm hanzi vocab...') self.han_vocab = self.mk_lm_han_vocab(self.han_lst) def get_am_batch(self): shuffle_list = [i for i in range(len(self.wav_lst))] while 1: if self.shuffle == True: shuffle(shuffle_list) for i in range(len(self.wav_lst) // self.batch_size): wav_data_lst = [] label_data_lst = [] begin = i * self.batch_size end = begin + self.batch_size sub_list = shuffle_list[begin:end] for index in sub_list: fbank = compute_fbank(self.data_path + self.wav_lst[index]) pad_fbank = np.zeros((fbank.shape[0] // 8 * 8 + 8, fbank.shape[1])) pad_fbank[:fbank.shape[0], :] = fbank label = self.pny2id(self.pny_lst[index], self.am_vocab) label_ctc_len = self.ctc_len(label) if pad_fbank.shape[0] // 8 >= label_ctc_len: wav_data_lst.append(pad_fbank) label_data_lst.append(label) pad_wav_data, input_length = self.wav_padding(wav_data_lst) pad_label_data, label_length = self.label_padding(label_data_lst) inputs = {'the_inputs': pad_wav_data, 'the_labels': pad_label_data, 'input_length': input_length, 'label_length': label_length, } outputs = {'ctc': np.zeros(pad_wav_data.shape[0], )} yield inputs, outputs def get_lm_batch(self): batch_num = len(self.pny_lst) // self.batch_size for k in range(batch_num): begin = k * self.batch_size end = begin + self.batch_size input_batch = self.pny_lst[begin:end] label_batch = self.han_lst[begin:end] max_len = max([len(line) for line in input_batch]) input_batch = np.array( [self.pny2id(line, self.pny_vocab) + [0] * (max_len - len(line)) for line in input_batch]) label_batch = np.array( [self.han2id(line, self.han_vocab) + [0] * (max_len - len(line)) for line in label_batch]) yield input_batch, label_batch def pny2id(self, line, vocab): return [vocab.index(pny) for pny in line] def han2id(self, line, vocab): return [vocab.index(han) for han in line] def wav_padding(self, wav_data_lst): wav_lens = [len(data) for data in wav_data_lst] wav_max_len = max(wav_lens) wav_lens = np.array([leng // 8 for leng in wav_lens]) new_wav_data_lst = np.zeros((len(wav_data_lst), wav_max_len, 200, 1)) for i in range(len(wav_data_lst)): new_wav_data_lst[i, :wav_data_lst[i].shape[0], :, 0] = wav_data_lst[i] return new_wav_data_lst, wav_lens def label_padding(self, label_data_lst): label_lens = np.array([len(label) for label in label_data_lst]) max_label_len = max(label_lens) new_label_data_lst = np.zeros((len(label_data_lst), max_label_len)) for i in range(len(label_data_lst)): new_label_data_lst[i][:len(label_data_lst[i])] = label_data_lst[i] return new_label_data_lst, label_lens def mk_am_vocab(self, data): vocab = [] for line in tqdm(data): line = line for pny in line: if pny not in vocab: vocab.append(pny) vocab.append('_') return vocab def mk_lm_pny_vocab(self, data): vocab = ['<PAD>'] for line in tqdm(data): for pny in line: if pny not in vocab: vocab.append(pny) return vocab def mk_lm_han_vocab(self, data): vocab = ['<PAD>'] for line in tqdm(data): line = ''.join(line.split(' ')) for han in line: if han not in vocab: vocab.append(han) return vocab def ctc_len(self, label): add_len = 0 label_len = len(label) for i in range(label_len - 1): if label[i] == label[i + 1]: add_len += 1 return label_len + add_len # 对音频文件提取mfcc特征 def compute_mfcc(file): fs, audio = wav.read(file) mfcc_feat = mfcc(audio, samplerate=fs, numcep=26) mfcc_feat = mfcc_feat[::3] mfcc_feat = np.transpose(mfcc_feat) return mfcc_feat # 获取信号的时频图 def compute_fbank(file): x = np.linspace(0, 400 - 1, 400, dtype=np.int64) w = 0.54 - 0.46 * np.cos(2 * np.pi * (x) / (400 - 1)) # 汉明窗 fs, wavsignal = wav.read(file) # wav波形 加时间窗以及时移10ms time_window = 25 # 单位ms wav_arr = np.array(wavsignal) range0_end = int(len(wavsignal) / fs * 1000 - time_window) // 10 + 1 # 计算循环终止的位置,也就是最终生成的窗数 data_input = np.zeros((range0_end, 200), dtype=np.float) # 用于存放最终的频率特征数据 data_line = np.zeros((1, 400), dtype=np.float) for i in range(0, range0_end): p_start = i * 160 p_end = p_start + 400 data_line = wav_arr[p_start:p_end] data_line = data_line * w # 加窗 data_line = np.abs(fft(data_line)) data_input[i] = data_line[0:200] # 设置为400除以2的值(即200)是取一半数据,因为是对称的 data_input = np.log(data_input + 1) # data_input = data_input[::] return data_input # word error rate------------------------------------ def GetEditDistance(str1, str2): leven_cost = 0 s = difflib.SequenceMatcher(None, str1, str2) for tag, i1, i2, j1, j2 in s.get_opcodes(): if tag == 'replace': leven_cost += max(i2-i1, j2-j1) elif tag == 'insert': leven_cost += (j2-j1) elif tag == 'delete': leven_cost += (i2-i1) return leven_cost # 定义解码器------------------------------------ def decode_ctc(num_result, num2word): result = num_result[:, :, :] in_len = np.zeros((1), dtype = np.int32) in_len[0] = result.shape[1] r = K.ctc_decode(result, in_len, greedy = True, beam_width=10, top_paths=1) r1 = K.get_value(r[0][0]) r1 = r1[0] text = [] for i in r1: text.append(num2word[i]) return r1, text 将这部分代码更新一下
05-31
import time import cv2 import tkinter as tk from tkinter import filedialog, ttk, messagebox from PIL import Image, ImageTk import numpy as np from sklearn.metrics import confusion_matrix, precision_recall_curve, average_precision_score import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg import pandas as pd import subprocess import os import random from threading import Thread # 用于训练时不阻塞界面 class ObjectDetectionApp: def __init__(self): self.window = tk.Tk() self.window.title("YOLOv4训练&验证系统") self.window.geometry("1600x1000") # 模式变量(训练/验证/原评估) self.mode = tk.StringVar(value="validation") self.train_running = False # 训练状态标志 # 模型配置(可自定义) self.base_cfg = "yolov4.cfg" # 基础配置文件 self.custom_cfg = "yolov4-custom.cfg" # 训练用自定义配置 self.weights_path = "yolov4.weights" # 默认权重/训练后权重 self.classes_file = "coco.names" self.classes = self.load_classes() self.COLORS = np.random.uniform(0, 255, size=(len(self.classes), 3)) self.voc_root = None # 新增:VOC数据集根路径 # 训练参数 self.train_params = { "epochs": 100, "batch": 8, "subdivisions": 4, "learning_rate": 0.001, "data_file": "custom.data", "names_file": self.classes_file } # 摄像头初始化 self.cap = None self.image_flipped = True # 评估相关变量(验证阶段使用) self.predicted_labels = [] self.ground_truths = [] self.performance_data = {} # 创建界面 self.create_widgets() self.window.mainloop() def create_widgets(self): # -------------------- 顶部模式栏 -------------------- mode_frame = tk.Frame(self.window, padx=10, pady=5) mode_frame.pack(fill=tk.X) tk.Radiobutton(mode_frame, text="模型训练", variable=self.mode, value="training", command=self.switch_mode).pack(side=tk.LEFT, padx=10) tk.Radiobutton(mode_frame, text="实时验证", variable=self.mode, value="validation", command=self.switch_mode).pack(side=tk.LEFT, padx=10) # -------------------- 训练模块控件 -------------------- self.train_frame = tk.Frame(self.window, padx=10, pady=5) # VOC数据集选择按钮 self.select_voc_btn = tk.Button(self.train_frame, text="选择VOC数据集", command=self.select_voc_dataset) self.select_voc_btn.pack(side=tk.LEFT, padx=10) # 训练参数输入 param_frame = tk.Frame(self.train_frame) tk.Label(param_frame, text="迭代次数:").grid(row=0, column=0, padx=5) self.epochs_entry = ttk.Entry(param_frame, width=8) self.epochs_entry.grid(row=0, column=1, padx=5) self.epochs_entry.insert(0, "100") tk.Label(param_frame, text="批次大小:").grid(row=1, column=0, padx=5) self.batch_entry = ttk.Entry(param_frame, width=8) self.batch_entry.grid(row=1, column=1, padx=5) self.batch_entry.insert(0, "8") param_frame.pack(side=tk.LEFT, padx=10) # 数据集加载按钮 self.load_train_btn = tk.Button(self.train_frame, text="加载训练集", command=self.load_train_dataset) self.load_train_btn.pack(side=tk.LEFT, padx=10) self.load_val_btn = tk.Button(self.train_frame, text="加载验证集", command=self.load_val_dataset) self.load_val_btn.pack(side=tk.LEFT, padx=10) # 开始训练按钮(正确顺序:先定义,再布局) self.start_train_btn = tk.Button(self.train_frame, text="开始训练", command=self.start_training_thread) self.start_train_btn.pack(side=tk.LEFT, padx=10) # -------------------- 主显示区域 -------------------- self.main_frame = tk.Frame(self.window) self.main_frame.pack(fill=tk.BOTH, expand=True) # 视频/图像显示区 self.photo_label = tk.Label(self.main_frame, width=1000, height=600) self.photo_label.pack(side=tk.LEFT, padx=10, pady=10) # 右侧信息区 self.right_frame = tk.Frame(self.main_frame, width=400) self.right_frame.pack(side=tk.RIGHT, padx=10, pady=10, fill=tk.Y) # 日志文本框 self.log_text = tk.Text(self.right_frame, width=40, height=15) self.log_text.pack(pady=5, fill=tk.X) self.log_text.insert(tk.END, "系统日志:\n") # 性能报告区 self.report_canvas = None self.report_frame = tk.Frame(self.right_frame) self.report_frame.pack(pady=5, fill=tk.BOTH, expand=True) # 初始显示验证模式 self.switch_mode() def select_voc_dataset(self): """选择VOC数据集根目录并执行划分""" self.voc_root = filedialog.askdirectory(title="选择VOC数据集根目录(如VOCdevkit/VOC2007)") if not self.voc_root: return # 检查所选目录下是否存在JPEGImages和Annotations目录 jpeg_dir = os.path.join(self.voc_root, "JPEGImages") ann_dir = os.path.join(self.voc_root, "Annotations") if not (os.path.isdir(jpeg_dir) and os.path.isdir(ann_dir)): messagebox.showerror("错误", "所选目录不是正确的VOC数据集根目录(缺少JPEGImages或Annotations目录)") return try: # 执行VOC数据集划分(75:25) self.split_voc_dataset(self.voc_root) self.log_text.insert(tk.END, "数据集划分完成!\n") # 自动设置训练/验证集路径到参数中 self.train_params["train_images"] = os.path.join(self.voc_root, "ImageSets/Main/train.txt") self.train_params["val_images"] = os.path.join(self.voc_root, "ImageSets/Main/val.txt") self.log_text.insert(tk.END, f"训练集路径:{self.train_params['train_images']}\n") self.log_text.insert(tk.END, f"验证集路径:{self.train_params['val_images']}\n") except Exception as e: messagebox.showerror("错误", f"数据集划分失败:{str(e)}") self.log_text.insert(tk.END, f"划分错误:{str(e)}\n") def split_voc_dataset(self, voc_root, train_ratio=0.75): """VOC数据集划分核心逻辑(集成到应用中)""" img_dir = os.path.join(voc_root, "JPEGImages") ann_dir = os.path.join(voc_root, "Annotations") sets_dir = os.path.join(voc_root, "ImageSets", "Main") os.makedirs(sets_dir, exist_ok=True) img_files = [f for f in os.listdir(img_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))] valid_ids = [] for img_file in img_files: img_id = os.path.splitext(img_file)[0] ann_path = os.path.join(ann_dir, f"{img_id}.xml") if os.path.isfile(ann_path): valid_ids.append(img_file) # 保存带扩展名的图像文件名 if not valid_ids: messagebox.showerror("错误", "未找到任何有效标注文件,确保JPEGImages和Annotations目录中的文件一一对应(除扩展名外文件名相同)") return random.shuffle(valid_ids) total = len(valid_ids) train_count = int(total * train_ratio) train_ids = valid_ids[:train_count] val_ids = valid_ids[train_count:] train_txt = os.path.join(sets_dir, "train.txt") val_txt = os.path.join(sets_dir, "val.txt") # 写入完整路径(如:D:/pycharm/pythonProject/VOCdevkit/VOC2007/JPEGImages/1.jpg) with open(train_txt, "w") as f: for img_file in train_ids: img_full_path = os.path.join(voc_root, "JPEGImages", img_file) f.write(img_full_path + '\n') with open(val_txt, "w") as f: for img_file in val_ids: img_full_path = os.path.join(voc_root, "JPEGImages", img_file) f.write(img_full_path + '\n') self.log_text.insert(tk.END, f"总样本数: {total},训练集: {len(train_ids)},验证集: {len(val_ids)}\n") def switch_mode(self): """模式切换逻辑""" current_mode = self.mode.get() # 关闭摄像头 if self.cap: self.cap.release() self.cap = None self.photo_label.config(image=None) # 清空界面 self.log_text.delete(1.0, tk.END) self.log_text.insert(tk.END, "系统日志:\n") if current_mode == "training": self.train_frame.pack(fill=tk.X, pady=5) self.report_frame.pack_forget() self.log_text.insert(tk.END, "切换到训练模式\n") else: # 验证模式 self.train_frame.pack_forget() self.report_frame.pack(fill=tk.BOTH, expand=True) self.cap = cv2.VideoCapture(0) # 打开摄像头 self.update_validation_frame() # 启动实时验证 self.log_text.insert(tk.END, "切换到实时验证模式\n") def load_train_dataset(self): """加载训练数据集(图像+标注)""" img_dir = filedialog.askdirectory(title="选择训练图像文件夹") label_dir = filedialog.askdirectory(title="选择训练标注文件夹") if img_dir and label_dir: self.train_params["train_images"] = img_dir self.train_params["train_labels"] = label_dir self.log_text.insert(tk.END, f"训练集加载完成:{img_dir}\n") def load_val_dataset(self): """加载验证数据集""" img_dir = filedialog.askdirectory(title="选择验证图像文件夹") label_dir = filedialog.askdirectory(title="选择验证标注文件夹") if img_dir and label_dir: self.train_params["val_images"] = img_dir self.train_params["val_labels"] = label_dir self.log_text.insert(tk.END, f"验证集加载完成:{img_dir}\n") def start_training_thread(self): """启动训练线程(防止界面阻塞)""" if self.train_running: messagebox.showwarning("提示", "训练已在进行中") return self.train_running = True Thread(target=self.start_training, daemon=True).start() def start_training(self): """执行训练流程(调用Darknet命令)""" try: # 生成训练配置文件 self.generate_training_config() # 训练命令(示例,根据实际Darknet路径调整) cmd = [ "darknet.exe", "detector", "train", self.train_params["data_file"], self.custom_cfg, "yolov4.conv.137", # 预训练权重 "-map", # 计算mAP "-gpus", "0", "-batch", self.batch_entry.get(), "-subdivisions", "4", "-epochs", self.epochs_entry.get() ] self.log_text.insert(tk.END, "开始训练...\n") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) # 实时输出训练日志 for line in process.stdout: self.log_text.insert(tk.END, line) self.log_text.see(tk.END) # 自动滚动 process.wait() self.train_running = False self.log_text.insert(tk.END, "训练完成!最佳权重已保存\n") # 训练完成后生成性能报告 self.generate_training_report() except Exception as e: self.log_text.insert(tk.END, f"训练错误:{str(e)}\n") self.train_running = False def generate_training_config(self): """生成自定义训练配置文件""" with open(self.base_cfg, 'r') as f: cfg_lines = f.readlines() # 修改批次和subdivisions for i, line in enumerate(cfg_lines): if line.startswith("batch="): cfg_lines[i] = f"batch={self.batch_entry.get()}\n" if line.startswith("subdivisions="): cfg_lines[i] = "subdivisions=4\n" with open(self.custom_cfg, 'w') as f: f.writelines(cfg_lines) # 生成data文件,指定训练和验证的图像列表路径 data_content = f""" train = {os.path.join(self.voc_root, "ImageSets/Main/train.txt")} # 训练集图像列表 valid = {os.path.join(self.voc_root, "ImageSets/Main/val.txt")} # 验证集图像列表 names = {self.classes_file} backup = backup/ eval = coco """ with open(self.train_params["data_file"], 'w') as f: f.write(data_content) def generate_training_report(self): """生成训练性能报告""" # 假设从训练日志或结果文件中读取数据 # 这里模拟加载验证集结果 y_true = ["car", "person", "car", "bike"] y_pred = ["car", "person", "bike", "bike"] confidences = [0.92, 0.85, 0.78, 0.91] # 计算指标 cm = confusion_matrix(y_true, y_pred, labels=self.classes) precision, recall, _ = precision_recall_curve(y_true, confidences, pos_label="car") ap = average_precision_score([1 if x == "car" else 0 for x in y_true], confidences) # 绘制报告 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # 混淆矩阵 ax1.imshow(cm, cmap=plt.cm.Blues) ax1.set_title("验证集混淆矩阵") ax1.set_xticks(range(len(self.classes))) ax1.set_xticklabels(self.classes, rotation=45) ax1.set_yticks(range(len(self.classes))) ax1.set_yticklabels(self.classes) # PR曲线 ax2.plot(recall, precision) ax2.set_title(f"PR曲线 (AP={ap:.2f})") ax2.set_xlabel("召回率") ax2.set_ylabel("精确率") # 在界面显示 if self.report_canvas: self.report_canvas.get_tk_widget().destroy() self.report_canvas = FigureCanvasTkAgg(fig, master=self.report_frame) self.report_canvas.draw() self.report_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True) def update_validation_frame(self): """实时验证摄像头画面""" if not self.cap or not self.mode.get() == "validation": return ret, frame = self.cap.read() if ret: if self.image_flipped: frame = cv2.flip(frame, 1) # 使用训练后的权重进行检测 frame, results = self.detect_objects(frame) # 显示结果 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_img = Image.fromarray(frame_rgb) tk_img = ImageTk.PhotoImage(image=pil_img) self.photo_label.config(image=tk_img) self.photo_label.image = tk_img # 记录验证结果(可选) self.record_validation_results(results) self.window.after(30, self.update_validation_frame) def detect_objects(self, frame): """使用当前权重进行目标检测""" # 改为使用实例变量 self.net 存储网络 self.net = cv2.dnn.readNetFromDarknet( self.custom_cfg if self.mode.get() == "training" else self.base_cfg, self.weights_path ) # 使用 self.net 获取层信息 layer_names = self.net.getLayerNames() output_layers = [layer_names[i - 1] for i in self.net.getUnconnectedOutLayers()] # 图像预处理 height, width = frame.shape[:2] blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False) # 前向传播(使用 self.net) self.net.setInput(blob) outs = self.net.forward(output_layers) # 注意:output_layers 已提前计算 # 后续代码保持不变... class_ids = [] confidences = [] boxes = [] for out in outs: for detection in out: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: # 置信度阈值 center_x = int(detection[0] * width) center_y = int(detection[1] * height) w = int(detection[2] * width) h = int(detection[3] * height) # 计算边界框坐标 x = int(center_x - w / 2) y = int(center_y - h / 2) boxes.append([x, y, w, h]) confidences.append(float(confidence)) class_ids.append(class_id) # 非极大值抑制 indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) results = [] for i in range(len(boxes)): if i in indexes: x, y, w, h = boxes[i] label = str(self.classes[class_ids[i]]) confidence = confidences[i] results.append((label, x, y, x + w, y + h, confidence)) # 绘制边界框和标签 color = self.COLORS[class_ids[i]] cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) cv2.putText(frame, f"{label}: {confidence:.2f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) return frame, results def record_validation_results(self, results): """记录验证结果(预测标签和置信度)""" # 从results中提取预测的标签(假设results格式为:(label, x1, y1, x2, y2, confidence)) for result in results: predicted_label = result[0] # 标签 confidence = result[5] # 置信度 self.predicted_labels.append(predicted_label) # 如果有真实标签(例如从验证集标注文件读取),可添加类似逻辑: # self.ground_truths.append(ground_truth_label) def load_classes(self): with open(self.classes_file, "r") as f: return [line.strip() for line in f.readlines()] def close_app(self): if self.cap: self.cap.release() self.window.destroy() if __name__ == "__main__": app = ObjectDetectionApp() 代码检测
05-14
检查代码错误并优化,以减少内存资源,但需要保证原始代码的一致性(包括输出、核心内容): class EnhancedAudioProcessor: SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a') MAX_SEGMENT_DURATION = 5 * 60 * 1000 # 5分钟分段限制 # 新增配置参数 ENHANCEMENT_CONFIG = { 'enable_voice_extraction': True, 'enable_telephone_enhancement': True, 'noise_sample_duration': 0.5, # 噪声采样时长(秒) 'telephone_filter_range': (300, 3400), # 电话频段范围(Hz) 'compression_threshold': -25.0, # 压缩阈值(dBFS) 'compression_ratio': 3.0 # 压缩比 } @staticmethod def check_dependencies(): """检查音频处理所需的依赖""" # 新增检查noisereduce依赖 try: import noisereduce as nr return True, "依赖检查通过" except ImportError: return False, "缺少noisereduce库,请执行: pip install noisereduce" @staticmethod def convert_to_wav(input_path: str, temp_dir: str) -> Optional[List[str]]: # 先检查ffmpeg是否可用 ffmpeg_available, ffmpeg_msg = check_ffmpeg_available() if not ffmpeg_available: logger.error(f"ffmpeg错误: {ffmpeg_msg}") return None try: os.makedirs(temp_dir, exist_ok=True) ext = os.path.splitext(input_path)[1].lower() if ext not in EnhancedAudioProcessor.SUPPORTED_FORMATS: raise ValueError( f"不支持的音频格式: {ext},支持的格式为: {', '.join(EnhancedAudioProcessor.SUPPORTED_FORMATS)}") # 加载原始音频文件 try: audio = AudioSegment.from_file(input_path) except Exception as e: raise RuntimeError(f"无法加载音频文件: {str(e)}") # ============== 新增: 音频增强处理流程 ============== if EnhancedAudioProcessor._should_enhance_audio(input_path): logger.info(f"开始增强处理音频: {os.path.basename(input_path)}") # 步骤1: 主要说话人提取 audio = EnhancedAudioProcessor._extract_main_voice(audio) # 步骤2: 电话质量增强 if EnhancedAudioProcessor.ENHANCEMENT_CONFIG['enable_telephone_enhancement']: audio = EnhancedAudioProcessor._enhance_telephone_quality(audio) # ================================================ # 检查时长并决定是否需要分段 max_duration = ConfigManager().get("max_audio_duration", 3600) * 1000 if len(audio) > max_duration: return EnhancedAudioProcessor._split_long_audio(audio, input_path, temp_dir) return EnhancedAudioProcessor._convert_single_audio(audio, input_path, temp_dir) except Exception as e: logger.error(f"音频处理失败: {str(e)}", exc_info=True) return None # ============== 新增音频增强方法 ============== @staticmethod def _should_enhance_audio(file_path: str) -> bool: """判断是否需要应用增强处理""" # 基于配置和文件扩展名判断 config = ConfigManager().get("audio_enhancement", {}) enable = config.get("enable", True) # 特殊处理: 电话录音文件通常需要增强 filename = os.path.basename(file_path).lower() if "phone" in filename or "tel" in filename: return True return enable and EnhancedAudioProcessor.ENHANCEMENT_CONFIG['enable_voice_extraction'] @staticmethod def _extract_main_voice(audio: AudioSegment) -> AudioSegment: """核心方法: 提取主要说话人声音""" try: # 获取采样率和音频数据 samples = np.array(audio.get_array_of_samples()) sr = audio.frame_rate # 噪声采样 (取前0.5秒作为噪声参考) noise_duration = int( sr * EnhancedAudioProcessor.ENHANCEMENT_CONFIG['noise_sample_duration'] ) noise_sample = samples[:min(noise_duration, len(samples))] # 应用降噪处理 import noisereduce as nr reduced_noise = nr.reduce_noise( y=samples.astype(np.float32), sr=sr, y_noise=noise_sample.astype(np.float32), prop_decrease=0.8, stationary=True, n_std_thresh_stationary=1.5 ) # 转换为16位PCM格式 processed = reduced_noise.astype(np.int16) # 创建新的AudioSegment对象 return AudioSegment( processed.tobytes(), frame_rate=sr, sample_width=audio.sample_width, channels=1 ) except Exception as e: logger.warning(f"说话人提取失败, 使用原始音频: {str(e)}") return audio @staticmethod def _enhance_telephone_quality(audio: AudioSegment) -> AudioSegment: """核心方法: 增强电话录音质量""" try: # 获取滤波器范围 low_cut, high_cut = EnhancedAudioProcessor.ENHANCEMENT_CONFIG['telephone_filter_range'] # 应用电话频段滤波 audio = audio.high_pass_filter(low_cut).low_pass_filter(high_cut) # 动态范围压缩 compression_params = { 'threshold': EnhancedAudioProcessor.ENHANCEMENT_CONFIG['compression_threshold'], 'ratio': EnhancedAudioProcessor.ENHANCEMENT_CONFIG['compression_ratio'] } audio = audio.compress_dynamic_range(**compression_params) # 音量标准化 audio = effects.normalize(audio) return audio except Exception as e: logger.warning(f"电话质量增强失败, 使用原始音频: {str(e)}") return audio # ============== 保留原始分段处理方法 ============== @staticmethod def _split_long_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: chunks = split_on_silence( audio, min_silence_len=ConfigManager().get("min_silence_len", 1000), silence_thresh=ConfigManager().get("silence_thresh", -40), keep_silence=500 ) merged_chunks = [] current_chunk = AudioSegment.empty() for chunk in chunks: if len(current_chunk) + len(chunk) < EnhancedAudioProcessor.MAX_SEGMENT_DURATION: current_chunk += chunk else: if len(current_chunk) > 0: merged_chunks.append(current_chunk) current_chunk = chunk if len(current_chunk) > 0: merged_chunks.append(current_chunk) # 确保每个分段不超过最大时长 final_chunks = [] for chunk in merged_chunks: if len(chunk) <= EnhancedAudioProcessor.MAX_SEGMENT_DURATION: final_chunks.append(chunk) else: # 强制分段 subchunks = make_chunks(chunk, EnhancedAudioProcessor.MAX_SEGMENT_DURATION) final_chunks.extend(subchunks) wav_paths = [] sample_rate = ConfigManager().get("sample_rate", 16000) for i, chunk in enumerate(final_chunks): chunk = chunk.set_frame_rate(sample_rate).set_channels(1) chunk_path = os.path.join(temp_dir, f"{os.path.splitext(os.path.basename(input_path))[0]}_part{i + 1}.wav") chunk.export(chunk_path, format="wav") wav_paths.append(chunk_path) return wav_paths @staticmethod def _convert_single_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: sample_rate = ConfigManager().get("sample_rate", 16000) audio = audio.set_frame_rate(sample_rate).set_channels(1) wav_path = os.path.join(temp_dir, os.path.splitext(os.path.basename(input_path))[0] + ".wav") audio.export(wav_path, format="wav") return [wav_path] def extract_features_from_audio(y: np.ndarray, sr: int) -> Dict[str, float]: try: duration = librosa.get_duration(y=y, sr=sr) segment_length = 60 total_segments = max(1, int(np.ceil(duration / segment_length))) syllable_rates, volume_stabilities = [], [] total_samples = len(y) samples_per_segment = int(segment_length * sr) for i in range(total_segments): start = i * samples_per_segment end = min((i + 1) * samples_per_segment, total_samples) y_segment = y[start:end] if len(y_segment) == 0: continue intervals = librosa.effects.split(y_segment, top_db=20) speech_samples = sum(end - start for start, end in intervals) speech_duration = speech_samples / sr syllable_rates.append(len(intervals) / speech_duration if speech_duration > 0.1 else 0) rms = librosa.feature.rms(y=y_segment, frame_length=2048, hop_length=512)[0] if len(rms) > 0 and np.mean(rms) > 0: volume_stabilities.append(np.std(rms) / np.mean(rms)) return { "duration": duration, "syllable_rate": round(np.mean([r for r in syllable_rates if r > 0]) if syllable_rates else 0, 2), "volume_stability": round(np.mean(volume_stabilities) if volume_stabilities else 0, 4) } except Exception as e: logger.error(f"特征提取错误: {str(e)}") return {"duration": 0, "syllable_rate": 0, "volume_stability": 0}
09-04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值