import os
import numpy as np
import matplotlib.pyplot as plt
import re
from matplotlib.ticker import MaxNLocator
from scipy.stats import linregress
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'WenQuanYi Micro Hei']
plt.rcParams['axes.unicode_minus'] = False
def natural_sort_key(s):
"""自然排序算法:确保文件名按数字顺序排列"""
return [int(text) if text.isdigit() else text.lower()
for text in re.split(r'(\d+)', s)]
def find_stable_intervals(counts, method='std', min_window=300, max_window=2000,
threshold=0.5, merge_gap=300, min_length=500,
window_step=50):
"""
改进版稳定区间检测:支持三种不同指标
:param counts: 预测框数量列表(原始数据)
:param method: 检测方法 ('std', 'zscore', 'slope')
:param min_window: 最小窗口尺寸
:param max_window: 最大窗口尺寸
:param threshold: 阈值(基于整体统计量)
:param merge_gap: 相邻区间合并的最大间隔
:param min_length: 最小有效区间长度
:param window_step: 窗口尺寸增加的步长
:return: 优化后的稳定区间列表
"""
n = len(counts)
if n == 0:
return [] # 返回空列表
# 计算整体统计量(基于原始数据)
total_mean = np.mean(counts)
total_std = np.std(counts)
# 1. 多窗口尺寸检测机制
base_intervals = []
# 遍历所有窗口尺寸
for window_size in range(min_window, max_window + 1, window_step):
# 确保窗口大小不超过数据长度
if window_size > n:
continue
step_size = max(1, window_size) # 50%重叠滑动
# 使用当前窗口尺寸检测稳定区间
for i in range(0, n - window_size + 1, step_size):
window = counts[i:i + window_size]
if len(window) < 2: # 至少需要2个点计算
continue
# 根据不同方法计算稳定性指标
if method == 'std':
# 标准差方法
std_dev = np.std(window)
if std_dev < threshold:
base_intervals.append((i, i + window_size - 1))
elif method == 'zscore_avg':
# Z-score方法:基于窗口内所有点的Z-score绝对值的平均值
mean_val = np.mean(window)
std_val = np.std(window)
if std_val > 0: # 避免除以0
# 计算所有点的Z-score绝对值
z_scores = np.abs((window - mean_val) / std_val)
# 计算Z-score绝对值的平均值
avg_zscore = np.mean(z_scores)
# 与阈值比较
if avg_zscore < threshold: # 平均Z-score绝对值低于阈值
base_intervals.append((i, i + window_size - 1))
# 处理标准差为0的特殊情况(所有值相同)
elif len(window) > 0:
# 所有点相同,Z-score绝对值为0,肯定小于任何正阈值
base_intervals.append((i, i + window_size - 1))
elif method == 'slope':
# 趋势斜率方法
x = np.arange(len(window))
slope, _, _, _, _ = linregress(x, window)
if abs(slope) < threshold:
base_intervals.append((i, i + window_size - 1))
# 如果没有检测到任何区间,直接返回
if not base_intervals:
return [] # 返回空列表
# 2. 合并相邻平稳段
base_intervals.sort(key=lambda x: x[0]) # 确保按起始索引排序
merged_intervals = []
if base_intervals: # 确保列表非空
current_start, current_end = base_intervals[0]
for start, end in base_intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged_intervals.append((current_start, current_end))
current_start, current_end = start, end
merged_intervals.append((current_start, current_end))
# 3. 过滤短时伪平稳段
final_intervals = [
(start, end) for start, end in merged_intervals
if (end - start + 1) >= min_length # 区间长度包含两端点
]
return final_intervals
def plot_box_count_trend(file_list, box_counts, stable_intervals, output_path,
title_suffix="", method_name="标准差"):
"""
绘制预测框数量变化趋势图并标记稳定区间
修改:根据方法名称设置对应颜色,与合并图一致
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 根据方法名称设置颜色(与合并图保持一致)
method_colors = {
'标准差方法': 'green',
'Z-score方法': 'purple',
'趋势斜率方法': 'orange'
}
# 获取当前方法的颜色
fill_color = method_colors.get(method_name, 'green') # 默认绿色
# 标记稳定区间
for i, (start, end) in enumerate(stable_intervals):
interval_files = file_list[start:end + 1]
interval_counts = box_counts[start:end + 1]
if not interval_counts:
continue
# 计算区间统计量
avg_count = np.mean(interval_counts)
min_count = np.min(interval_counts)
max_count = np.max(interval_counts)
std_dev = np.std(interval_counts)
# 绘制稳定区间 - 使用对应方法的颜色
plt.fill_between(interval_files,
min_count, max_count,
color=fill_color,
alpha=0.3,
zorder=0,
label=f'{method_name}区间' if i == 0 else "")
# 添加区间标注
mid_idx = start + (end - start) // 2
if mid_idx < len(file_list):
plt.annotate(f"区间{i + 1}: {start + 1}-{end + 1}\n均值: {avg_count:.1f}±{std_dev:.1f}",
(file_list[mid_idx], avg_count),
xytext=(0, 20),
textcoords='offset points',
ha='center',
fontsize=10,
bbox=dict(boxstyle="round,pad=0.3", fc="yellow", alpha=0.7),
zorder=10)
# 设置图表属性
plt.title(f'预测框数量变化趋势 - {method_name}{title_suffix}', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
def plot_combined_intervals(file_list, box_counts, intervals_std, intervals_zscore, intervals_slope, output_path):
"""
绘制三种方法检测结果的合并图
:param file_list: 文件名列表
:param box_counts: 原始预测框数量列表
:param intervals_std: 标准差方法检测的区间
:param intervals_zscore: Z-score方法检测的区间
:param intervals_slope: 趋势斜率方法检测的区间
:param output_path: 输出图片路径
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 为每种方法定义不同的颜色和标签(与单独图表一致)
method_colors = {
'标准差方法': ('green', '标准差区间'),
'Z-score方法': ('purple', 'Z-score区间'),
'趋势斜率方法': ('orange', '趋势斜率区间')
}
# 绘制标准差方法的区间
for i, (start, end) in enumerate(intervals_std):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['标准差方法'][0], alpha=0.3,
zorder=0,
label=method_colors['标准差方法'][1] if i == 0 else "")
# 绘制Z-score方法的区间
for i, (start, end) in enumerate(intervals_zscore):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['Z-score方法'][0], alpha=0.3,
zorder=0,
label=method_colors['Z-score方法'][1] if i == 0 else "")
# 绘制趋势斜率方法的区间
for i, (start, end) in enumerate(intervals_slope):
interval_files = file_list[start:end + 1]
min_count = min(box_counts[start:end + 1])
max_count = max(box_counts[start:end + 1])
plt.fill_between(interval_files, min_count, max_count,
color=method_colors['趋势斜率方法'][0], alpha=0.3,
zorder=0,
label=method_colors['趋势斜率方法'][1] if i == 0 else "")
# 设置图表属性
plt.title('预测框数量变化趋势及稳定区间分析 - 三种方法合并', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
# 配置路径
label_dir = "E:/0718/0718-labels" # 替换为您的标签文件夹路径
output_dir = "E:/0718/0718-stable" # 输出目录
os.makedirs(output_dir, exist_ok=True)
# 获取文件列表并按自然顺序排序
file_list = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
file_list.sort(key=natural_sort_key)
# 提取文件名(不含扩展名)
file_names = [os.path.splitext(f)[0] for f in file_list]
# 统计每个文件的预测框数量
box_counts = []
for file in file_list:
file_path = os.path.join(label_dir, file)
count = 0
with open(file_path, 'r') as f:
for line in f:
if line.strip(): # 非空行
count += 1
box_counts.append(count)
# 计算整体统计数据
total_mean = np.mean(box_counts)
total_std = np.std(box_counts)
# 使用三种不同方法找出稳定区间(直接使用原始数据)
intervals_std = find_stable_intervals(
box_counts, method='std',
min_window=500, max_window=2000,
threshold=1.1, # 标准差阈值
merge_gap=300, min_length=600
)
intervals_zscore = find_stable_intervals(
box_counts, method='zscore_avg',
min_window=500, max_window=2000,
threshold=0.75,
merge_gap=300, min_length=600
)
intervals_slope = find_stable_intervals(
box_counts, method='slope',
min_window=500, max_window=2000,
threshold=0.00015, # 趋势斜率阈值
merge_gap=300, min_length=600
)
# 生成三种方法的结果图片
output_std = os.path.join(output_dir, "box_count_stable_intervals_std.png")
output_zscore = os.path.join(output_dir, "box_count_stable_intervals_zscore.png")
output_slope = os.path.join(output_dir, "box_count_stable_intervals_slope.png")
output_combined = os.path.join(output_dir, "box_count_stable_intervals_combined.png")
# 绘制最终结果图表(使用统一的方法名称)
plot_box_count_trend(file_names, box_counts, intervals_std, output_std,
title_suffix="", method_name="标准差方法")
plot_box_count_trend(file_names, box_counts, intervals_zscore, output_zscore,
title_suffix="", method_name="Z-score方法")
plot_box_count_trend(file_names, box_counts, intervals_slope, output_slope,
title_suffix="", method_name="趋势斜率方法")
# 生成合并图
plot_combined_intervals(file_names, box_counts, intervals_std, intervals_zscore, intervals_slope,
output_combined)
# 输出详细结果
print(f"分析完成! 共处理 {len(file_list)} 个文件")
print(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}")
def print_interval_info(intervals, method_name):
print(f"\n{method_name}发现 {len(intervals)} 个稳定区间:")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率(基于原始数据)
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
print(f"区间{i + 1}:")
print(f" - 文件范围: {start + 1}-{end + 1} (共{end - start + 1}个文件)")
print(f" - 平均框数: {avg_count:.2f} ± {std_dev:.2f}")
print(f" - 变异系数: {cv:.4f}")
print(f" - 趋势斜率: {slope:.6f}")
print(f" - 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}")
print_interval_info(intervals_std, "标准差方法")
print_interval_info(intervals_zscore, "Z-score方法")
print_interval_info(intervals_slope, "趋势斜率方法")
# 合并所有检测到的区间
all_intervals = intervals_std + intervals_zscore + intervals_slope
def merge_intervals(intervals, merge_gap=300, min_length=500):
"""合并重叠或接近的区间"""
if not intervals:
return []
# 按起始索引排序
intervals.sort(key=lambda x: x[0])
merged = []
current_start, current_end = intervals[0]
for start, end in intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged.append((current_start, current_end))
current_start, current_end = start, end
merged.append((current_start, current_end))
# 过滤短区间
final_merged = [
(start, end) for start, end in merged
if (end - start + 1) >= min_length
]
return final_merged
# 合并所有检测到的区间
merged_intervals = merge_intervals(all_intervals, merge_gap=300, min_length=500)
# 保存区间信息到文本文件
def save_interval_report(intervals, method_name, file_path):
with open(file_path, 'a') as f:
f.write(f"\n{method_name}稳定区间分析报告\n")
f.write(f"稳定区间数: {len(intervals)}\n")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
f.write(f"\n区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 变异系数: {cv:.4f}\n")
f.write(f" 趋势斜率: {slope:.6f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write("=" * 80 + "\n")
# 创建报告文件
interval_info_path = os.path.join(output_dir, "stable_intervals_report.txt")
with open(interval_info_path, 'w') as f:
f.write(f"稳定区间综合分析报告\n")
f.write(f"总文件数: {len(file_list)}\n")
f.write(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}\n")
f.write(f"数据范围: {min(box_counts)}-{max(box_counts)}\n")
# 保存三种方法的区间报告
save_interval_report(intervals_std, "标准差方法", interval_info_path)
save_interval_report(intervals_zscore, "Z-score方法", interval_info_path)
save_interval_report(intervals_slope, "趋势斜率方法", interval_info_path)
# 保存合并后的区间报告
with open(interval_info_path, 'a') as f:
f.write("\n\n=== 合并区间分析报告 ===\n")
f.write("此部分展示三种方法检测到的所有稳定区间合并后的结果\n")
f.write(f"合并后稳定区间数: {len(merged_intervals)}\n")
for i, (start, end) in enumerate(merged_intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
# 检测此区间被哪些方法覆盖
covered_by = []
if any(start >= s and end <= e for s, e in intervals_std):
covered_by.append("标准差")
if any(start >= s and end <= e for s, e in intervals_zscore):
covered_by.append("Z-score")
if any(start >= s and end <= e for s, e in intervals_slope):
covered_by.append("趋势斜率")
f.write(f"\n合并区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write(f" 覆盖方法: {', '.join(covered_by) if covered_by else '无'}\n")
# 添加合并区间统计
total_covered_files = sum(end - start + 1 for start, end in merged_intervals)
coverage_percentage = (total_covered_files / len(file_list)) * 100
f.write("\n合并区间统计:\n")
f.write(f" 总覆盖文件数: {total_covered_files}/{len(file_list)} ({coverage_percentage:.2f}%)\n")
f.write(f" 平均区间长度: {np.mean([end - start + 1 for start, end in merged_intervals]):.1f} 文件\n")
f.write(f" 最长区间: {max([end - start + 1 for start, end in merged_intervals])} 文件\n")
f.write(f" 最短区间: {min([end - start + 1 for start, end in merged_intervals])} 文件\n")
print_interval_info(merged_intervals, "合并区间")
print(f"\n结果图片已保存至: {output_dir}")
print(f"详细区间报告已保存至: {interval_info_path}")
把趋势斜率,z-score,标准差的稳定区间图中合并相邻相邻平稳段之前的图也生成出来,要完整代码
最新发布