import os
import numpy as np
import matplotlib.pyplot as plt
import re
from matplotlib.ticker import MaxNLocator
from scipy.stats import linregress
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'WenQuanYi Micro Hei']
plt.rcParams['axes.unicode_minus'] = False
def natural_sort_key(s):
"""自然排序算法:确保文件名按数字顺序排列"""
return [int(text) if text.isdigit() else text.lower()
for text in re.split(r'(\d+)', s)]
def find_stable_intervals(counts, method='std', min_window=300, max_window=2000,
threshold=0.5, merge_gap=300, min_length=500,
window_step=50, return_base=False):
"""
改进版稳定区间检测:支持三种不同指标
:param counts: 预测框数量列表(原始数据)
:param method: 检测方法 ('std', 'zscore', 'slope')
:param min_window: 最小窗口尺寸
:param max_window: 最大窗口尺寸
:param threshold: 阈值(基于整体统计量)
:param merge_gap: 相邻区间合并的最大间隔
:param min_length: 最小有效区间长度
:param window_step: 窗口尺寸增加的步长
:param return_base: 是否返回基础区间(合并前)
:return: 优化后的稳定区间列表(如果return_base=True,则返回(base_intervals, merged_intervals, final_intervals))
"""
n = len(counts)
if n == 0:
return [] # 返回空列表
# 计算整体统计量(基于原始数据)
total_mean = np.mean(counts)
total_std = np.std(counts)
# 1. 多窗口尺寸检测机制
base_intervals = []
# 遍历所有窗口尺寸
for window_size in range(min_window, max_window + 1, window_step):
# 确保窗口大小不超过数据长度
if window_size > n:
continue
step_size = max(1, window_size) # 50%重叠滑动
# 使用当前窗口尺寸检测稳定区间
for i in range(0, n - window_size + 1, step_size):
window = counts[i:i + window_size]
if len(window) < 2: # 至少需要2个点计算
continue
# 根据不同方法计算稳定性指标
if method == 'std':
# 标准差方法
std_dev = np.std(window)
if std_dev < threshold:
base_intervals.append((i, i + window_size - 1))
elif method == 'zscore_avg':
# Z-score方法:基于窗口内所有点的Z-score绝对值的平均值
mean_val = np.mean(window)
std_val = np.std(window)
if std_val > 0: # 避免除以0
# 计算所有点的Z-score绝对值
z_scores = np.abs((window - mean_val) / std_val)
# 计算Z-score绝对值的平均值
avg_zscore = np.mean(z_scores)
# 与阈值比较
if avg_zscore < threshold: # 平均Z-score绝对值低于阈值
base_intervals.append((i, i + window_size - 1))
# 处理标准差为0的特殊情况(所有值相同)
elif len(window) > 0:
# 所有点相同,Z-score绝对值为0,肯定小于任何正阈值
base_intervals.append((i, i + window_size - 1))
elif method == 'slope':
# 趋势斜率方法
x = np.arange(len(window))
slope, _, _, _, _ = linregress(x, window)
if abs(slope) < threshold:
base_intervals.append((i, i + window_size - 1))
# 如果没有检测到任何区间,直接返回
if not base_intervals:
if return_base:
return [], [], []
return [] # 返回空列表
# 2. 合并相邻平稳段
base_intervals.sort(key=lambda x: x[0]) # 确保按起始索引排序
merged_intervals = []
if base_intervals: # 确保列表非空
current_start, current_end = base_intervals[0]
for start, end in base_intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged_intervals.append((current_start, current_end))
current_start, current_end = start, end
merged_intervals.append((current_start, current_end))
# 3. 过滤短时伪平稳段
final_intervals = [
(start, end) for start, end in merged_intervals
if (end - start + 1) >= min_length # 区间长度包含两端点
]
if return_base:
return base_intervals, merged_intervals, final_intervals
return final_intervals
def plot_box_count_trend(file_list, box_counts, stable_intervals, output_path,
title_suffix="", method_name="标准差", is_base=False):
"""
绘制预测框数量变化趋势图并标记稳定区间
修改:根据方法名称设置对应颜色,与合并图一致
:param is_base: 是否为基本区间图(合并前)
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 计算全局最小值和最大值(用于颜色块填充)
global_min = min(box_counts) - 0.5
global_max = max(box_counts) + 0.5
# 根据方法名称设置颜色(与合并图保持一致)
method_colors = {
'标准差方法': 'green',
'Z-score方法': 'purple',
'趋势斜率方法': 'orange'
}
# 获取当前方法的颜色
fill_color = method_colors.get(method_name, 'green') # 默认绿色
# 标记稳定区间
for i, (start, end) in enumerate(stable_intervals):
interval_files = file_list[start:end + 1]
if not interval_files:
continue
# 绘制稳定区间 - 使用对应方法的颜色
if is_base:
# 基本区间图:颜色块只覆盖数据范围(避免叠加)
interval_counts = box_counts[start:end + 1]
min_count = min(interval_counts) if interval_counts else 0
max_count = max(interval_counts) if interval_counts else 0
plt.fill_between(interval_files,
min_count, max_count,
color=fill_color,
alpha=0.3,
zorder=0,
label=f'{method_name}区间' if i == 0 else "")
else:
# 稳定区间图:颜色块顶到图表边缘
plt.fill_between(interval_files,
global_min, global_max,
color=fill_color,
alpha=0.3,
zorder=0,
label=f'{method_name}区间' if i == 0 else "")
# 如果是基本区间图(合并前),不添加标注,避免过多标注
if not is_base:
# 添加区间标注
interval_counts = box_counts[start:end + 1]
if interval_counts:
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
mid_idx = start + (end - start) // 2
if mid_idx < len(file_list):
plt.annotate(f"区间{i + 1}: {start + 1}-{end + 1}\n均值: {avg_count:.1f}±{std_dev:.1f}",
(file_list[mid_idx], avg_count),
xytext=(0, 20),
textcoords='offset points',
ha='center',
fontsize=10,
bbox=dict(boxstyle="round,pad=0.3", fc="yellow", alpha=0.7),
zorder=10)
# 设置图表属性
plot_type = "基础区间" if is_base else "稳定区间"
plt.title(f'预测框数量变化趋势 - {method_name}{title_suffix} ({plot_type})', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
def plot_combined_intervals(file_list, box_counts, intervals_std, intervals_zscore, intervals_slope, output_path):
"""
绘制三种方法检测结果的合并图
:param file_list: 文件名列表
:param box_counts: 原始预测框数量列表
:param intervals_std: 标准差方法检测的区间
:param intervals_zscore: Z-score方法检测的区间
:param intervals_slope: 趋势斜率方法检测的区间
:param output_path: 输出图片路径
"""
plt.figure(figsize=(20, 10))
# 绘制整体趋势(原始数据)
plt.plot(file_list, box_counts, 'b-', linewidth=1.5, label='预测框数量')
# 计算全局最小值和最大值(用于颜色块填充)
global_min = min(box_counts) - 0.5
global_max = max(box_counts) + 0.5
# 为每种方法定义不同的颜色和标签(与单独图表一致)
method_colors = {
'标准差方法': ('green', '标准差区间'),
'Z-score方法': ('purple', 'Z-score区间'),
'趋势斜率方法': ('orange', '趋势斜率区间')
}
# 绘制标准差方法的区间
for i, (start, end) in enumerate(intervals_std):
interval_files = file_list[start:end + 1]
if not interval_files:
continue
plt.fill_between(interval_files, global_min, global_max,
color=method_colors['标准差方法'][0], alpha=0.3,
zorder=0,
label=method_colors['标准差方法'][1] if i == 0 else "")
# 绘制Z-score方法的区间
for i, (start, end) in enumerate(intervals_zscore):
interval_files = file_list[start:end + 1]
if not interval_files:
continue
plt.fill_between(interval_files, global_min, global_max,
color=method_colors['Z-score方法'][0], alpha=0.3,
zorder=0,
label=method_colors['Z-score方法'][1] if i == 0 else "")
# 绘制趋势斜率方法的区间
for i, (start, end) in enumerate(intervals_slope):
interval_files = file_list[start:end + 1]
if not interval_files:
continue
plt.fill_between(interval_files, global_min, global_max,
color=method_colors['趋势斜率方法'][0], alpha=0.3,
zorder=0,
label=method_colors['趋势斜率方法'][1] if i == 0 else "")
# 设置图表属性
plt.title('预测框数量变化趋势及稳定区间分析 - 三种方法合并', fontsize=18)
plt.xlabel('图像文件名', fontsize=14)
plt.ylabel('预测框数量', fontsize=14)
plt.xticks(rotation=90, fontsize=7)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(loc='upper right')
plt.gca().xaxis.set_major_locator(MaxNLocator(20))
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
# 配置路径
label_dir = "E:/0706/0706-labels" # 替换为您的标签文件夹路径
output_dir = "E:/0706/0706-stable3" # 输出目录
os.makedirs(output_dir, exist_ok=True)
# 获取文件列表并按自然顺序排序
file_list = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
file_list.sort(key=natural_sort_key)
# 提取文件名(不含扩展名)
file_names = [os.path.splitext(f)[0] for f in file_list]
# 统计每个文件的预测框数量
box_counts = []
for file in file_list:
file_path = os.path.join(label_dir, file)
count = 0
with open(file_path, 'r') as f:
for line in f:
if line.strip(): # 非空行
count += 1
box_counts.append(count)
# 计算整体统计数据
total_mean = np.mean(box_counts)
total_std = np.std(box_counts)
# 使用三种不同方法找出稳定区间(直接使用原始数据)
# 修改为返回基础区间、合并区间和最终区间
base_std, merged_std, intervals_std = find_stable_intervals(
box_counts, method='std',
min_window=100, max_window=2000,
threshold=1.1, # 标准差阈值
merge_gap=300, min_length=600,
return_base=True
)
base_zscore, merged_zscore, intervals_zscore = find_stable_intervals(
box_counts, method='zscore_avg',
min_window=100, max_window=2000,
threshold=0.75,
merge_gap=300, min_length=600,
return_base=True
)
base_slope, merged_slope, intervals_slope = find_stable_intervals(
box_counts, method='slope',
min_window=100, max_window=2000,
threshold=0.00015, # 趋势斜率阈值
merge_gap=300, min_length=600,
return_base=True
)
# 生成三种方法的最终结果图片
output_std = os.path.join(output_dir, "box_count_stable_intervals_std.png")
output_zscore = os.path.join(output_dir, "box_count_stable_intervals_zscore.png")
output_slope = os.path.join(output_dir, "box_count_stable_intervals_slope.png")
# 生成三种方法的基础区间图片(合并前)
output_base_std = os.path.join(output_dir, "box_count_base_intervals_std.png")
output_base_zscore = os.path.join(output_dir, "box_count_base_intervals_zscore.png")
output_base_slope = os.path.join(output_dir, "box_count_base_intervals_slope.png")
# 生成三种方法的合并区间图片(合并后但未过滤)
output_merged_std = os.path.join(output_dir, "box_count_merged_intervals_std.png")
output_merged_zscore = os.path.join(output_dir, "box_count_merged_intervals_zscore.png")
output_merged_slope = os.path.join(output_dir, "box_count_merged_intervals_slope.png")
# 生成合并图
output_combined = os.path.join(output_dir, "box_count_stable_intervals_combined.png")
# 绘制最终结果图表(使用统一的方法名称)
plot_box_count_trend(file_names, box_counts, intervals_std, output_std,
title_suffix="", method_name="标准差方法")
plot_box_count_trend(file_names, box_counts, intervals_zscore, output_zscore,
title_suffix="", method_name="Z-score方法")
plot_box_count_trend(file_names, box_counts, intervals_slope, output_slope,
title_suffix="", method_name="趋势斜率方法")
# 绘制基础区间图(合并前)
plot_box_count_trend(file_names, box_counts, base_std, output_base_std,
title_suffix="", method_name="标准差方法", is_base=True)
plot_box_count_trend(file_names, box_counts, base_zscore, output_base_zscore,
title_suffix="", method_name="Z-score方法", is_base=True)
plot_box_count_trend(file_names, box_counts, base_slope, output_base_slope,
title_suffix="", method_name="趋势斜率方法", is_base=True)
# 绘制合并区间图(合并后但未过滤)
plot_box_count_trend(file_names, box_counts, merged_std, output_merged_std,
title_suffix="", method_name="标准差方法")
plot_box_count_trend(file_names, box_counts, merged_zscore, output_merged_zscore,
title_suffix="", method_name="Z-score方法")
plot_box_count_trend(file_names, box_counts, merged_slope, output_merged_slope,
title_suffix="", method_name="趋势斜率方法")
# 生成合并图
plot_combined_intervals(file_names, box_counts, intervals_std, intervals_zscore, intervals_slope,
output_combined)
# 输出详细结果
print(f"分析完成! 共处理 {len(file_list)} 个文件")
print(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}")
def print_interval_info(intervals, method_name):
print(f"\n{method_name}发现 {len(intervals)} 个稳定区间:")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率(基于原始数据)
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
print(f"区间{i + 1}:")
print(f" - 文件范围: {start + 1}-{end + 1} (共{end - start + 1}个文件)")
print(f" - 平均框数: {avg_count:.2f} ± {std_dev:.2f}")
print(f" - 变异系数: {cv:.4f}")
print(f" - 趋势斜率: {slope:.6f}")
print(f" - 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}")
print_interval_info(intervals_std, "标准差方法")
print_interval_info(intervals_zscore, "Z-score方法")
print_interval_info(intervals_slope, "趋势斜率方法")
# 合并所有检测到的区间
all_intervals = intervals_std + intervals_zscore + intervals_slope
def merge_intervals(intervals, merge_gap=300, min_length=500):
"""合并重叠或接近的区间"""
if not intervals:
return []
# 按起始索引排序
intervals.sort(key=lambda x: x[0])
merged = []
current_start, current_end = intervals[0]
for start, end in intervals[1:]:
if start - current_end <= merge_gap: # 间隔小于合并阈值
current_end = max(current_end, end) # 扩展当前区间
else:
merged.append((current_start, current_end))
current_start, current_end = start, end
merged.append((current_start, current_end))
# 过滤短区间
final_merged = [
(start, end) for start, end in merged
if (end - start + 1) >= min_length
]
return final_merged
# 合并所有检测到的区间
merged_intervals = merge_intervals(all_intervals, merge_gap=300, min_length=500)
# 保存区间信息到文本文件
def save_interval_report(intervals, method_name, file_path, is_base=False, is_merged=False):
interval_type = "基础区间" if is_base else "稳定区间"
interval_type = "合并区间" if is_merged else interval_type
with open(file_path, 'a') as f:
f.write(f"\n{method_name} {interval_type}分析报告\n")
f.write(f"{interval_type}数: {len(intervals)}\n")
for i, (start, end) in enumerate(intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
f.write(f"\n区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 变异系数: {cv:.4f}\n")
f.write(f" 趋势斜率: {slope:.6f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write("=" * 80 + "\n")
# 创建报告文件
interval_info_path = os.path.join(output_dir, "stable_intervals_report.txt")
with open(interval_info_path, 'w') as f:
f.write(f"稳定区间综合分析报告\n")
f.write(f"总文件数: {len(file_list)}\n")
f.write(f"整体平均框数: {total_mean:.2f} ± {total_std:.2f}\n")
f.write(f"数据范围: {min(box_counts)}-{max(box_counts)}\n")
# 保存三种方法的区间报告
save_interval_report(intervals_std, "标准差方法", interval_info_path)
save_interval_report(intervals_zscore, "Z-score方法", interval_info_path)
save_interval_report(intervals_slope, "趋势斜率方法", interval_info_path)
# 保存基础区间报告
save_interval_report(base_std, "标准差方法", interval_info_path, is_base=True)
save_interval_report(base_zscore, "Z-score方法", interval_info_path, is_base=True)
save_interval_report(base_slope, "极好斜率方法", interval_info_path, is_base=True)
# 保存合并区间报告(合并后但未过滤)
save_interval_report(merged_std, "标准差方法", interval_info_path, is_merged=True)
save_interval_report(merged_zscore, "Z-score方法", interval_info_path, is_merged=True)
save_interval_report(merged_slope, "趋势斜率方法", interval_info_path, is_merged=True)
# 保存合并后的区间报告
with open(interval_info_path, 'a') as f:
f.write("\n\n=== 合并区间分析报告 ===\n")
f.write("此部分展示三种方法检测到的所有稳定区间合并后的结果\n")
f.write(f"合并后稳定区间数: {len(merged_intervals)}\n")
for i, (start, end) in enumerate(merged_intervals):
interval_counts = box_counts[start:end + 1]
avg_count = np.mean(interval_counts)
std_dev = np.std(interval_counts)
cv = std_dev / avg_count if avg_count > 0 else 0
# 计算趋势斜率
x = np.arange(len(interval_counts))
slope, _, _, _, _ = linregress(x, interval_counts)
# 检测此区间被哪些方法覆盖
covered_by = []
if any(start >= s and end <= e for s, e in intervals_std):
covered_by.append("标准差")
if any(start >= s and end <= e for s, e in intervals_zscore):
covered_by.append("Z-score")
if any(start >= s and end <= e for s, e in intervals_slope):
covered_by.append("趋势斜率")
f.write(f"\n合并区间 {i + 1}:\n")
f.write(f" 起始文件索引: {start + 1} ({file_names[start]})\n")
f.write(f" 结束文件索引: {end + 1} ({file_names[end]})\n")
f.write(f" 文件数量: {end - start + 1}\n")
f.write(f" 平均预测框数: {avg_count:.2f} ± {std_dev:.2f}\n")
f.write(f" 最小值: {min(interval_counts)}, 最大值: {max(interval_counts)}\n")
f.write(f" 覆盖方法: {', '.join(covered_by) if covered_by else '无'}\n")
# 添加合并区间统计
total_covered_files = sum(end - start + 1 for start, end in merged_intervals)
coverage_percentage = (total_covered_files / len(file_list)) * 100
f.write("\n合并区间统计:\n")
f.write(f" 总覆盖文件数: {total_covered_files}/{len(file_list)} ({coverage_percentage:.2f}%)\n")
f.write(f" 平均区间长度: {np.mean([end - start + 1 for start, end in merged_intervals]):.1f} 文件\n")
f.write(f" 最长区间: {max([end - start + 1 for start, end in merged_intervals])} 文件\n")
f.write(f" 最短区间: {min([end - start + 1 for start, end in merged_intervals])} 文件\n")
print_interval_info(merged_intervals, "合并区间")
print(f"\n结果图片已保存至: {output_dir}")
print(f"详细区间报告已保存至: {interval_info_path}")
把多窗口尺寸检测机制改成从最小窗口尺寸开始检测,如果检测结果为不稳定后就从窗口的右边界加1的点再进行检测,如果检测结果为稳定后向右加1,再判断是否稳定,直到检测结果为不稳定或者窗口的尺寸达到最大窗口尺寸时,将这个区间标记为稳定区间,再从窗口的有边界加1的点开始检测,这一步检测出来的稳定区间为基础稳定区间,再根据相邻区间合并的最大间隔合并相邻稳定区间,再根据最小有效区间长度过滤短时伪平稳段,最后生成最终稳定区间
最新发布