import os
import numpy as np
import matplotlib.pyplot as plt
import re
from matplotlib.ticker import MaxNLocator
from scipy.stats import linregress
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'WenQuanYi Micro Hei']
plt.rcParams['axes.unicode_minus'] = False
def natural_sort_key(s):
"""自然排序算法:确保文件名按数字顺序排列"""
return [int(text) if text.isdigit() else text.lower()
for text in re.split(r'(\d+)', s)]
def normalize_data(data):
"""归一化数据到[0,1]范围"""
min_val = min(data)
max_val = max(data)
if max_val == min_val: # 避免除以零
return [0.5] * len(data) # 所有值相同,返回0.5
return [(x - min_val) / (max_val - min_val) for x in data]
def find_stable_intervals(counts, method='std', min_window=300, max_window=2000,
threshold=0.5, merge_gap=300, min_length=500):
"""
改进版稳定区间检测:支持三种不同指标
:param counts: 预测框数量列表(已归一化)
:param method: 检测方法 ('std', 'cv', 'slope')
:param min_window: 最小窗口尺寸
:param max_window: 最大窗口尺寸
:param threshold: 阈值(基于整体统计量)
:param merge_gap: 相邻区间合并的最大间隔
:param min_length: 最小有效区间长度
:return: 优化后的稳定区间列表
"""
n = len(counts)
if n == 0:
return []
# 计算整体统计量(基于归一化数据)
total_mean = np.mean(counts)
total_std = np.std(counts)
# 1. 自适应窗口机制
window_size = min(max_window, max(min_window, n // 10))
step_size = max(1, window_size // 2) # 50%重叠滑动
# 2. 初始检测稳定区间
base_intervals = []
for i in range(0, n - window_size + 1, step_size):
window = counts[i:i + window_size]
if len(window) < 2: # 至少需要2个点计算
continue
# 根据不同方法计算稳定性指标
if method == 'std':
# 标准差方法
std_dev = np.std(window)
if std_dev < total_std * threshold:
base_intervals.append((i, i + window_size - 1))
elif method == 'cv':
# 变异系数方法
mean_val = np.mean(window)
if mean_val > 0: # 避免除以0
cv = np.std(window) / mean_val
if cv >= threshold:
base_intervals.append((i, i + window_size - 1))
elif method == 'slope':
# 趋势斜率方法
x = np.arange(len(window))
slope, _, _, _, _ = linregress(x, window)
if abs(slope) < threshold * total_std / window_size:
base_intervals.append((i, i + window_size - 1))
# 如果没有检测到任何区间,直接返回
if not base_intervals:
return []
# 极值点检测
min_val = min(counts)
max_val = max(counts)
min_indices = [i for i, x in enumerate(counts) if x == min_val]
max_indices = [i for i, x in enumerate(counts) if x == max_val]
# 3. 合并相邻平稳段
base_intervals.sort(key=lambda x: x[0]) # 确保按起始索引排序
merged_intervals = []
current_start, current_end = base_intervals[0]
for start, end in base_intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged_intervals.append((current_start, current_end))
current_start, current_end = start, end
merged_intervals.append((current_start, current_end))
# 4. 过滤短时伪平稳段
final_intervals = [
(start, end) for start, end in merged_intervals
if (end - start + 1) >= min_length # 区间长度包含两端点
]
return final_intervals
def plot_box_count_trend(file_list, box_counts, stable_intervals, output_path,
title_suffix="", method_name="标准差"):
"""
绘制预测框数量变化趋势图并标记稳定区间
:param file_list: 文件名列表
:param box_counts: 原始预测框数量列表
:param stable_intervals: 稳定区间列表
:param output_path: 输出图片路径
:param title_suffix: 标题后缀
:param method_name: 检测方法名称
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 标记稳定区间 - 确保区间显示
for i, (start, end) in enumerate(stable_intervals):
interval_files = file_list[start:end + 1]
interval_counts = box_counts[start:end + 1]
if not interval_counts: # 确保区间有效
continue
# 计算区间统计量
avg_count = np.mean(interval_counts)
min_count = np.min(interval_counts)
max_count = np.max(interval_counts)
std_dev = np.std(interval_counts)
# 绘制稳定区间 - 增加透明度使区间更明显
plt.fill_between(interval_files,
min_count, max_count,
color='green',
alpha=0.3, # 增加透明度使区间更明显
zorder=0, # 确保填充在数据线下方
label=f'稳定区间{i + 1}' if i == 0 else "")
# 添加区间标注 - 确保标注位置正确
mid_idx = start + (end - start) // 2
if mid_idx < len(file_list): # 确保索引有效
plt.annotate(f"区间{i + 1}: {start + 1}-{end + 1}\n均值: {avg_count:.1f}±{std_dev:.1f}",
(file_list[mid_idx], avg_count),
xytext=(0, 20),
textcoords='offset points',
ha='center',
fontsize=10,
bbox=dict(boxstyle="round,pad=0.3", fc="yellow", alpha=0.7),
zorder=10) # 确保标注在最上层
# 设置图表属性
plt.title(f'预测框数量变化趋势及稳定区间分析 - {method_name}{title_suffix}', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
# 限制X轴刻度数量
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
def plot_combined_intervals(file_list, box_counts, intervals_std, intervals_cv, intervals_slope, output_path):
"""
绘制三种方法检测结果的合并图
:param file_list: 文件名列表
:param box_counts: 原始预测框数量列表
:param intervals_std: 标准差方法检测的区间
:param intervals_cv: 变异系数方法检测的区间
:param intervals_slope: 趋势斜率方法检测的区间
:param output_path: 输出图片路径
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 为每种方法定义不同的颜色和标签
method_colors = {
'标准差': ('green', '标准差区间'),
'变异系数': ('blue', '变异系数区间'),
'趋势斜率': ('orange', '趋势斜率区间')
}
# 绘制标准差方法的区间 - 确保区间显示
for i, (start, end) in enumerate(intervals_std):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['标准差'][0], alpha=0.3,
zorder=0, # 确保填充在数据线下方
label=method_colors['标准差'][1] if i == 0 else "")
# 绘制变异系数方法的区间 - 确保区间显示
for i, (start, end) in enumerate(intervals_cv):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['变异系数'][0], alpha=0.3,
zorder=0, # 确保填充在数据线下方
label=method_colors['变异系数'][1] if i == 0 else "")
# 绘制趋势斜率方法的区间 - 确保区间显示
for i, (start, end) in enumerate(intervals_slope):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['趋势斜率'][0], alpha=0.3,
zorder=0, # 确保填充在数据线下方
label=method_colors['趋势斜率'][1] if i == 0 else "")
# 设置图表属性
plt.title('预测框数量变化趋势及稳定区间分析 - 三种方法合并', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
# 限制X轴刻度数量
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
# 配置路径
label_dir = "F:/0701-label" # 替换为您的标签文件夹路径
output_dir = "F:/0710-stable" # 输出目录
os.makedirs(output_dir, exist_ok=True)
# 获取文件列表并按自然顺序排序
file_list = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
file_list.sort(key=natural_sort_key)
# 提取文件名(不含扩展名)
file_names = [os.path.splitext(f)[0] for f in file_list]
# 统计每个文件的预测框数量
box_counts = []
for file in file_list:
file_path = os.path.join(label_dir, file)
count = 0
with open(file_path, 'r') as f:
for line in f:
if line.strip(): # 非空行
count += 1
box_counts.append(count)
# 归一化预测框数量(仅用于区间检测)
normalized_counts = normalize_data(box_counts)
# 计算整体统计数据
total_mean = np.mean(box_counts)
total_std = np.std(box_counts)
# 使用三种不同方法找出稳定区间(基于归一化数据)
intervals_std = find_stable_intervals(
normalized_counts, method='std',
min_window=300, max_window=2000,
threshold=0.9, # 标准差阈值
merge_gap=300, min_length=300
)
intervals_cv = find_stable_intervals(
normalized_counts, method='cv',
min_window=300, max_window=2000,
threshold=0.9, # 变异系数阈值
merge_gap=300, min_length=300
)
intervals_slope = find_stable_intervals(
normalized_counts, method='slope',
min_window=300, max_window=2000,
threshold=0.2, # 趋势斜率阈值
merge_gap=300, min_length=300
)
# 生成三种方法的结果图片
output_std = os.path.join(output_dir, "box_count_stable_intervals_std.png")
output_cv = os.path.join(output_dir, "box_count_stable_intervals_cv.png")
output_slope = os.path.join(output_dir, "box_count_stable_intervals_slope.png")
output_combined = os.path.join(output_dir, "box_count_stable_intervals_combined.png")
# 绘制图表(不显示归一化曲线)
plot_box_count_trend(file_names, box_counts, intervals_std, output_std,
title_suffix="", method_name="标准差方法")
plot_box_count_trend(file_names, box_counts, intervals_cv, output_cv,
title_suffix="", method_name="变异系数方法")
plot_box_count_trend(file_names, box_counts, intervals_slope, output_slope,
title_suffix="", method_name="趋势斜率方法")
# 生成合并图
plot_combined_intervals(file_names, box_counts, intervals_std, intervals_cv, intervals_slope, output_combined)
# 输出详细结果
print(f"分析完成! 共处理 {len(file_list)} 个文件")
print(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}")
def print_interval_info(intervals, method_name):
print(f"\n{method_name}发现 {len(intervals)} 个稳定区间:")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
interval_normalized = normalized_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率(基于原始数据)
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
print(f"区间{i + 1}:")
print(f" - 文件范围: {start + 1}-{end + 1} (共{end - start + 1}个文件)")
print(f" - 平均框数: {avg_count:.2f} ± {std_dev:.2f}")
print(f" - 变异系数: {cv:.4f}")
print(f" - 趋势斜率: {slope:.6f}")
print(f" - 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}")
print_interval_info(intervals_std, "标准差方法")
print_interval_info(intervals_cv, "变异系数方法")
print_interval_info(intervals_slope, "趋势斜率方法")
# 合并所有检测到的区间
all_intervals = intervals_std + intervals_cv + intervals_slope
def merge_intervals(intervals, merge_gap=300, min_length=500):
"""合并重叠或接近的区间"""
if not intervals:
return []
# 按起始索引排序
intervals.sort(key=lambda x: x[0])
merged = []
current_start, current_end = intervals[0]
for start, end in intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged.append((current_start, current_end))
current_start, current_end = start, end
merged.append((current_start, current_end))
# 过滤短区间
final_merged = [
(start, end) for start, end in merged
if (end - start + 1) >= min_length
]
return final_merged
# 合并所有检测到的区间
merged_intervals = merge_intervals(all_intervals, merge_gap=300, min_length=500)
# 保存区间信息到文本文件
def save_interval_report(intervals, method_name, file_path):
with open(file_path, 'a') as f:
f.write(f"\n{method_name}稳定区间分析报告\n")
f.write(f"稳定区间数: {len(intervals)}\n")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
interval_normalized = normalized_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
f.write(f"\n区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 变异系数: {cv:.4f}\n")
f.write(f" 趋势斜率: {slope:.6f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write("=" * 80 + "\n")
# 创建报告文件
interval_info_path = os.path.join(output_dir, "stable_intervals_report.txt")
with open(interval_info_path, 'w') as f:
f.write(f"稳定区间综合分析报告\n")
f.write(f"总文件数: {len(file_list)}\n")
f.write(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}\n")
f.write(f"归一化范围: {min(box_counts)}-{max(box_counts)}\n")
# 保存三种方法的区间报告
save_interval_report(intervals_std, "标准差方法", interval_info_path)
save_interval_report(intervals_cv, "变异系数方法", interval_info_path)
save_interval_report(intervals_slope, "趋势斜率方法", interval_info_path)
# 保存合并后的区间报告
with open(interval_info_path, 'a') as f:
f.write("\n\n=== 合并区间分析报告 ===\n")
f.write("此部分展示三种方法检测到的所有稳定区间合并后的结果\n")
f.write(f"合并后稳定区间数: {len(merged_intervals)}\n")
for i, (start, end) in enumerate(merged_intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
# 检测此区间被哪些方法覆盖
covered_by = []
if any(start >= s and end <= e for s, e in intervals_std):
covered_by.append("标准差")
if any(start >= s and end <= e for s, e in intervals_cv):
covered_by.append("变异系数")
if any(start >= s and end <= e for s, e in intervals_slope):
covered_by.append("趋势斜率")
f.write(f"\n合并区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write(f" 覆盖方法: {', '.join(covered_by) if covered_by else '无'}\n")
# 添加合并区间统计
total_covered_files = sum(end - start + 1 for start, end in merged_intervals)
coverage_percentage = (total_covered_files / len(file_list)) * 100
f.write("\n合并区间统计:\n")
f.write(f" 总覆盖文件数: {total_covered_files}/{len(file_list)} ({coverage_percentage:.2f}%)\n")
f.write(f" 平均区间长度: {np.mean([end - start + 1 for start, end in merged_intervals]):.1f} 文件\n")
f.write(f" 最长区间: {max([end - start + 1 for start, end in merged_intervals])} 文件\n")
f.write(f" 最短区间: {min([end - start + 1 for start, end in merged_intervals])} 文件\n")
print_interval_info(merged_intervals, "合并区间")
print(f"\n结果图片已保存至: {output_dir}")
print(f"详细区间报告已保存至: {interval_info_path}")
把这个代码改成合并相邻平稳段之前的图,过滤短时伪平稳段之前的图也绘制出来