判断图片(images)和标签(labels)是否对应,并同时更改文件名

import os
train_images_path_old = r'C:\Users\Administrator\Desktop\mask\train\images'
train_images_path_new = r'C:\Users\Administrator\Desktop\mask\train\images2'
train_labels_path_old = r'C:\Users\Administrator\Desktop\mask\train\labels'
train_labels_path_new = r'C:\Users\Administrator\Desktop\mask\train\labels2'

os.chdir(train_images_path_old)
os.chdir(train_labels_path_old)
list_images = os.listdir(train_images_path_old)
list_labels = os.listdir(train_labels_path_old)
images_number = 0 #图片后面加的序号
labels_number = 0 #图片后面加的序号,用一个就行
for images_list_num in range(0, len(list_images)):  #先遍历图片序号,再检测标签需要是否与之对应,之后改成一样的名字

    for labels_list_num in range(0, len(list_labels)):
       images_old_name = list_images[images_list_num]
       labels_old_name = list_labels[labels_list_num]
       if (images_old_name[:-3]==labels_old_name[:-3]):
            print("图片文件与标签已经对应,对应的序号为:%d" %(images_number) )
        # old_name = list_images[images_list_num]
            images_number = images
import numpy as np import os import pandas as pd from PIL import Image import matplotlib.pyplot as plt import matplotlib as mpl 设置中文字体支持 plt.rcParams[‘font.sans-serif’] = [‘SimHei’] # 使用黑体 plt.rcParams[‘axes.unicode_minus’] = False # 解决负号显示问题 def create_sliding_window_samples_corrected(image_folder, labels_file, output_folder, window_size=5, step=1): “”" 使用滑动窗口技术从现有图像创建新样本 - 修正版本 参数: image_folder: 包含原始小波变换图像的文件夹路径 labels_file: 包含原始标签的CSV文件路径 output_folder: 输出文件夹路径 window_size: 滑动窗口大小 (默认5) step: 滑动步长 (默认1) """ # 创建输出文件夹 if not os.path.exists(output_folder): os.makedirs(output_folder) print(f"创建输出文件夹: {output_folder}") # 读取标签文件 labels_df = pd.read_csv(labels_file) print(f"读取标签文件: {labels_file}") # 按文件名排序,确保时间顺序 labels_df = labels_df.sort_values('filename').reset_index(drop=True) # 获取所有图像文件名 image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')] image_files.sort() # 确保按时间顺序 # 创建新的标签列表 new_labels = [] # 使用滑动窗口处理图像 for i in range(0, len(image_files) - window_size, step): # 获取当前窗口的图像 window_files = image_files[i:i + window_size] # 检查窗口中的图像是否连续 (文件名顺序) if not check_consecutive_files(window_files): continue # 读取窗口中的图像 merged_image = merge_images(image_folder, window_files) # 获取第6个图像(窗口后第一个)标签 # 注意:这里需要找到第6个图像在标签数据框中的索引 sixth_image = image_files[i + window_size] sixth_image_label_row = labels_df[labels_df['filename'] == sixth_image] if not sixth_image_label_row.empty: target_label = sixth_image_label_row.iloc[0]['rul_normalized'] else: continue # 如果没有找到对应标签,跳过 # 生成新文件名 new_filename = f"window_{i:04d}_to_{i + window_size - 1:04d}.png" # 保存合后的图像 merged_image.save(os.path.join(output_folder, new_filename)) # 添加到新标签列表 new_labels.append({ 'filename': new_filename, 'rul_normalized': target_label, 'source_files': ';'.join(window_files), 'target_file': sixth_image }) # 打印进度 if (len(new_labels) % 50 == 0) or (i + window_size >= len(image_files) - step): print(f"已处理 {len(new_labels)} 个窗口样本") # 保存新标签到CSV文件 new_labels_df = pd.DataFrame(new_labels) new_labels_csv_path = os.path.join(output_folder, "sliding_window_rul_labels_corrected.csv") new_labels_df.to_csv(new_labels_csv_path, index=False) print(f"新标签已保存到: {new_labels_csv_path}") return new_labels_df def check_consecutive_files(filenames): “”" 检查文件名是否连续 (按时间顺序) 参数: filenames: 文件名列表 返回: bool: 如果文件名连续返回True,否则返回False """ # 提取文件名中的数字部分进行比较 numbers = [] for filename in filenames: # 假设文件名格式为 "acc_xxxx_x_wavelet.png" parts = filename.split('_') if len(parts) >= 2: try: # 尝试提取数字部分 num = int(parts[1]) numbers.append(num) except ValueError: return False # 检查数字是否连续 for i in range(1, len(numbers)): if numbers[i] != numbers[i - 1] + 1: return False return True def merge_images(image_folder, filenames): “”" 将多个图像水平合为一个图像 参数: image_folder: 图像文件夹路径 filenames: 要合的图像文件名列表 返回: PIL.Image: 合后的图像 """ # 读取所有图像 images = [Image.open(os.path.join(image_folder, f)) for f in filenames] # 计算新图像的尺寸 widths, heights = zip(*(img.size for img in images)) total_width = sum(widths) max_height = max(heights) # 创建新图像 merged_image = Image.new('RGB', (total_width, max_height)) # 将图像水平拼接 x_offset = 0 for img in images: merged_image.paste(img, (x_offset, 0)) x_offset += img.size[0] return merged_image 主程序 if name == “main”: # 设置路径 original_image_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\Bearing1_1" original_labels_file = os.path.join(original_image_folder, “rul_labels.csv”) output_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT_SlidingWindow_Corrected\Bearing1_1" # 使用滑动窗口创建新样本 - 修正版本 new_labels = create_sliding_window_samples_corrected( original_image_folder, original_labels_file, output_folder, window_size=5, # 5张图像合为一个样本 step=1 # 每次滑动1个图像 ) # 打印结果 print(f"\n处理完成! 共生成 {len(new_labels)} 个新样本") print("\n新标签数据预览:") print(new_labels.head()) # 验证标签是否正确 print("\n验证标签是否正确:") for i in range(min(5, len(new_labels))): row = new_labels.iloc[i] print( f"窗口 {i}: 源文件={row['source_files'].split(';')[-1]}, 目标文件={row['target_file']}, 标签={row['rul_normalized']}")上述为滑动窗口,import pandas as pd import os import numpy as np def create_correct_labels_for_windows(bearing_folders, output_base_folder): “”" 为已经生成的滑动窗口图像创建正确的标签文件 参数: bearing_folders: 轴承文件夹路径列表 output_base_folder: 输出基础文件夹路径 """ # 创建输出文件夹 if not os.path.exists(output_base_folder): os.makedirs(output_base_folder) train_labels = [] test_labels = [] for bearing_folder in bearing_folders: bearing_name = os.path.basename(bearing_folder) windows_info_path = os.path.join(output_base_folder, f"{bearing_name}_windows", "windows_info.csv") # 读取窗口信息 if not os.path.exists(windows_info_path): print(f"警告: 未找到 {bearing_name} 的窗口信息文件: {windows_info_path}") continue windows_info = pd.read_csv(windows_info_path) # 读取原始标签文件 original_labels_path = os.path.join(bearing_folder, "rul_labels.csv") if not os.path.exists(original_labels_path): print(f"警告: 未找到 {bearing_name} 的原始标签文件: {original_labels_path}") continue original_labels = pd.read_csv(original_labels_path) # 创建文件名到RUL值的映射 file_to_rul = {} for _, row in original_labels.iterrows(): # 确保文件名格式一致 filename = row['filename'] if not filename.endswith('.png'): filename += '.png' file_to_rul[filename] = row['rul_normalized'] # 为每个窗口计算正确的标签 corrected_info = [] for _, window_row in windows_info.iterrows(): window_files = window_row['window_files'].split(';') # 获取窗口后的下一个文件 last_file = window_files[-1] file_prefix, file_num_str = last_file.split('_')[:2] file_num = int(file_num_str) next_file_num = file_num + 1 next_file = f"{file_prefix}_{next_file_num:05d}_wavelet.png" # 获取下一个文件的RUL值 if next_file in file_to_rul: correct_rul = file_to_rul[next_file] # 添加到修正后的信息 corrected_row = window_row.copy() corrected_row['rul_normalized'] = correct_rul corrected_info.append(corrected_row) else: print(f"警告: 未找到文件 {next_file} 的RUL值,跳过窗口 {window_row['window_id']}") # 转换为DataFrame corrected_df = pd.DataFrame(corrected_info) # 根据轴承名称添加到训练集或测试集 if bearing_name == "Bearing1_3": test_labels.append(corrected_df) else: train_labels.append(corrected_df) print(f"完成处理 {bearing_name}, 生成 {len(corrected_df)} 个修正标签") # 合训练集测试集标签 if train_labels: train_df = pd.concat(train_labels, ignore_index=True) train_output_path = os.path.join(output_base_folder, "train_labels.csv") train_df.to_csv(train_output_path, index=False) print(f"训练集标签已保存到: {train_output_path}") if test_labels: test_df = pd.concat(test_labels, ignore_index=True) test_output_path = os.path.join(output_base_folder, "test_labels.csv") test_df.to_csv(test_output_path, index=False) print(f"测试集标签已保存到: {test_output_path}") return train_df if train_labels else None, test_df if test_labels else None 使用示例 if name == “main”: # 定义轴承文件夹路径 bearing1_1_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\Bearing1_1" bearing1_2_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\Bearing1_2" bearing1_3_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\Bearing1_3" # 窗口数据集基础文件夹 windows_base_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\window_datasets" # 输出文件夹 output_folder = r"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\WT\corrected_labels" # 处理所有轴承 bearing_folders = [bearing1_1_folder, bearing1_2_folder, bearing1_3_folder] train_labels, test_labels = create_correct_labels_for_windows(bearing_folders, output_folder) # 验证第一个窗口的标签是否正确 if train_labels is not None and len(train_labels) > 0: first_window = train_labels.iloc[0] print("\n验证训练集第一个窗口的标签:") print(f"窗口文件: {first_window['window_files']}") print(f"修正后的标签值: {first_window['rul_normalized']}") # 提取下一个文件名 window_files = first_window['window_files'].split(';') last_file = window_files[-1] file_prefix, file_num_str = last_file.split('_')[:2] file_num = int(file_num_str) next_file = f"{file_prefix}_{file_num + 1:05d}_wavelet.png" # 读取原始标签文件 original_labels = pd.read_csv(os.path.join(bearing1_1_folder, 'rul_labels.csv')) # 查找下一个文件的RUL值 next_rul = None for _, row in original_labels.iterrows(): filename = row['filename'] if not filename.endswith('.png'): filename += '.png' if filename == next_file: next_rul = row['rul_normalized'] break print(f"下一个文件应该是: {next_file}") print(f"下一个文件的RUL值: {next_rul}") print(f"窗口标签值: {first_window['rul_normalized']}") if abs(next_rul - first_window['rul_normalized']) < 1e-6: print("✓ 标签正确!") else: print("✗ 标签不正确!") # 显示统计信息 if train_labels is not None: print(f"\n训练集统计:") print(f"样本数量: {len(train_labels)}") print(f"RUL范围: {train_labels['rul_normalized'].min():.6f} - {train_labels['rul_normalized'].max():.6f}") if test_labels is not None: print(f"\n测试集统计:") print(f"样本数量: {len(test_labels)}") print(f"RUL范围: {test_labels['rul_normalized'].min():.6f} - {test_labels['rul_normalized'].max():.6f}")上述为标签制作,参考上述代码对"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\CWT\Bearing1_1"已经生成的图片进行滑动窗口技术拼接,将结果保存至"D:\成电——研究生\基于数据驱动的故障诊断研究\数据集汇总\phm-ieee-2012-data-challenge-dataset-master\CWT\Bearing1_1hdck"。上述代码仅供参考,可以进行适当优化修改,给出完整代码
10-15
评论 1
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值