import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import gaussian_kde, zscore
from scipy.signal import savgol_filter
from matplotlib.colors import Normalize
from matplotlib.pyplot import MultipleLocator
import matplotlib.font_manager as fm
from matplotlib.colors import LinearSegmentedColormap
# ========== 全局设置 ==========
FONT_SIZE = 16 # 字体大小
FONT_NAME = 'SimSun' # 使用支持中文的字体
MARKER_SIZE = 50 # 标记大小
# ========== 列名配置(根据实际数据列名修改)==========
COLUMN_NAMES = {
'rise_time': '上升时间', # 上升时间列名
'amplitude': '幅值', # 幅度列名
'ring_count': '计数', # 振铃计数列名
'duration': '持续时间' # 持续时间列名
}
# 设置支持中文的字体
plt.rcParams['font.sans-serif'] = ['SimSun', 'Microsoft YaHei', 'SimHei', 'KaiTi', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False # 正确显示负号
# 设置字体和样式
config = {
"font.family": 'sans-serif',
"font.size": FONT_SIZE,
"mathtext.fontset": 'stix'
}
plt.rcParams.update(config)
def calculate_RA_AF(rise_time_us, amplitude_uV, ring_count, duration_us):
"""
计算 RA 和 AF 的值,处理无效值
"""
# 确保数据类型正确
amplitude_uV = amplitude_uV.astype(float)
duration_us = duration_us.astype(float)
# 创建掩码过滤无效值
valid_mask = (amplitude_uV > 0) & (duration_us > 0)
# 计算 RA 和 AF
RA_values = np.zeros_like(rise_time_us, dtype=float) * np.nan
AF_values = np.zeros_like(rise_time_us, dtype=float) * np.nan
RA_values[valid_mask] = rise_time_us[valid_mask] / amplitude_uV[valid_mask]
AF_values[valid_mask] = (ring_count[valid_mask] / duration_us[valid_mask]) * 1000
return RA_values, AF_values, valid_mask
def remove_outliers_zscore(df, threshold=30):
"""
使用Z-score方法剔除异常值,避免精度损失问题
"""
# 只对数值列计算Z-score
numeric_cols = df.select_dtypes(include=[np.number]).columns
# 处理常数列问题
z_scores = np.zeros_like(df[numeric_cols], dtype=float)
for i, col in enumerate(numeric_cols):
col_data = df[col].values
if np.std(col_data) > 1e-10: # 避免几乎相同的列
z_scores[:, i] = np.abs(zscore(col_data))
else:
z_scores[:, i] = 0 # 常数列Z-score设为0
# 标记异常点
outliers = (z_scores >= threshold).any(axis=1)
print(f"剔除的异常点数量: {np.sum(outliers)}")
if np.sum(outliers) > 0:
print("部分异常点示例:")
print(df[outliers].head())
return df[~outliers]
def remove_outliers_IQR(df, factor=1.5):
"""
使用IQR方法剔除异常值
"""
# 只对数值列计算IQR
numeric_cols = df.select_dtypes(include=[np.number]).columns
# 初始化异常值掩码
outliers = np.zeros(len(df), dtype=bool)
for col in numeric_cols:
col_data = df[col].values
Q1 = np.percentile(col_data, 25)
Q3 = np.percentile(col_data, 75)
IQR = Q3 - Q1
if IQR > 0: # 避免IQR为0的情况
lower_bound = Q1 - factor * IQR
upper_bound = Q3 + factor * IQR
col_outliers = (col_data < lower_bound) | (col_data > upper_bound)
outliers |= col_outliers
print(f"剔除的异常点数量: {np.sum(outliers)}")
if np.sum(outliers) > 0:
print("部分异常点示例:")
print(df[outliers].head())
return df[~outliers]
def find_knee_point(x_values, y_values):
"""
使用kneedle算法计算曲线的拐点
基于曲率最大值确定拐点位置
"""
# 平滑曲线以减少噪声影响
if len(y_values) > 5:
window_length = min(11, len(y_values))
if window_length % 2 == 0: # 确保窗口长度为奇数
window_length += 1
smoothed_y = savgol_filter(y_values, window_length=window_length, polyorder=2)
else:
smoothed_y = y_values
# 计算一阶导数(斜率)
dx = x_values[1] - x_values[0]
first_deriv = np.gradient(smoothed_y, dx)
# 计算二阶导数(曲率)
second_deriv = np.gradient(first_deriv, dx)
# 计算曲率 (K = |f''| / (1 + (f')^2)^(3/2))
curvature = np.abs(second_deriv) / (1 + first_deriv ** 2) ** (3 / 2)
# 找到最大曲率点(拐点)
# 排除前5%和后5%的点,避免边界效应
n = len(x_values)
start_idx = max(0, int(0.05 * n))
end_idx = min(n, int(0.95 * n))
valid_indices = np.arange(start_idx, end_idx)
if len(valid_indices) == 0:
# 如果没有有效点,使用整个范围
valid_indices = np.arange(n)
max_curv_idx = valid_indices[np.argmax(curvature[valid_indices])]
knee_x = x_values[max_curv_idx]
knee_y = smoothed_y[max_curv_idx]
return knee_x, knee_y, curvature, smoothed_y
def find_b0_for_curve(b_values, percentages):
"""
为单个曲线找到临界截距b0
确保 b0 > 0,并正确反映曲线特征:
- b < b0 时:剪切裂纹占比随b增大而迅速增加
- b > b0 时:剪切裂纹占比随b增大增速放缓或水平变化
参数:
b_values: 截距b值数组
percentages: 剪切裂纹占比数组
返回:
b0: 临界截距值 (保证 b0 > 0)
"""
# 平滑曲线以减少噪声影响
if len(percentages) > 5:
window_length = min(11, len(percentages))
if window_length % 2 == 0: # 确保窗口长度为奇数
window_length += 1
smoothed_p = savgol_filter(percentages, window_length=window_length, polyorder=2)
else:
smoothed_p = percentages
# 计算一阶导数(变化率)
dx = b_values[1] - b_values[0]
first_deriv = np.gradient(smoothed_p, dx)
# 计算二阶导数(变化率的变化)
second_deriv = np.gradient(first_deriv, dx)
# 只考虑 b > 0 的区域
positive_b_mask = b_values > 0
if not np.any(positive_b_mask):
# 如果没有b>0的点,返回最小正值
return np.max(b_values) * 0.5 # 返回中间值
# 找到增速开始放缓的点(二阶导数由正变负的点)
# 在 b>0 区域内寻找二阶导数小于0的点
negative_second_deriv = second_deriv < 0
# 确保只考虑b>0的区域
valid_mask = positive_b_mask & negative_second_deriv
if not np.any(valid_mask):
# 如果找不到增速放缓的点,使用b>0区域的最大值
return np.max(b_values[positive_b_mask])
# 找到第一个增速放缓的点(在b>0区域中第一个二阶导数小于0的点)
slowdown_indices = np.where(valid_mask)[0]
if len(slowdown_indices) == 0:
return np.max(b_values[positive_b_mask])
first_slowdown_idx = slowdown_indices[0]
b0 = b_values[first_slowdown_idx]
# 确保 b0 > 0
if b0 <= 0:
# 如果计算出的b0小于等于0,使用b>0区域的最小值
b0 = np.min(b_values[positive_b_mask])
return b0
def plot_all_k_b_curves_with_b0(k_values, b_values, all_percentages, knee_k, output_path, font2):
"""
绘制所有斜率条件下剪切裂纹占比随截距b值的变化曲线,并标记临界截距b0
参数:
k_values: 斜率k值列表
b_values: 截距b值列表
all_percentages: 字典,键为k值,值为对应b值的百分比列表
knee_k: 拐点k值
output_path: 输出文件路径
font2: 字体设置
返回:
avg_b0: 所有临界截距b0的平均值
b0_values: 每个k值对应的临界截距b0列表
"""
# 创建自定义颜色映射
colors = plt.cm.viridis(np.linspace(0, 1, len(k_values)))
fig, ax = plt.subplots(figsize=(14, 10), dpi=100)
# 存储每个k值对应的临界截距b0
b0_values = []
# 绘制每条曲线
for i, k in enumerate(k_values):
# 特别标出拐点k值对应的曲线
if abs(k - knee_k) < 1e-5: # 浮点数比较容差
linewidth = 3.5
color = 'red'
label = f'拐点 k={knee_k:.2f}'
zorder = 10 # 确保在最上层
else:
linewidth = 1.0
color = colors[i]
label = f'k={k:.0f}'
zorder = 1
# 获取当前k值对应的百分比曲线
percentages = all_percentages[k]
# 计算当前曲线的临界截距b0 (确保b0>0)
b0 = find_b0_for_curve(b_values, percentages)
b0_values.append(b0)
# 绘制曲线
line, = ax.plot(b_values, percentages,
linewidth=linewidth,
color=color,
label=label,
zorder=zorder)
# 标记临界截距b0 (确保只标记b0>0的点)
if b0 > 0:
# 找到最接近b0的索引
idx = np.abs(b_values - b0).argmin()
ax.scatter(b0, percentages[idx],
s=100, marker='*',
color=color, edgecolor='black',
zorder=zorder + 1)
# 计算所有临界截距b0的平均值 (只考虑b0>0的值)
positive_b0_values = [b0 for b0 in b0_values if b0 > 0]
if positive_b0_values:
avg_b0 = np.mean(positive_b0_values)
else:
avg_b0 = np.max(b_values) * 0.5 # 默认值
# 在图中标记平均临界截距b0
ax.axvline(x=avg_b0, color='purple', linestyle='--', linewidth=2, alpha=0.8)
ax.text(avg_b0 + 5, 10, f'平均临界截距 $b_0$ = {avg_b0:.2f}',
fontsize=16, fontname=FONT_NAME, color='purple')
# 添加区域标注
ax.text(avg_b0 * 0.3, 80, '区域 I: b < b₀\n剪切裂纹占比迅速增加',
fontsize=14, fontname=FONT_NAME,
bbox=dict(facecolor='white', alpha=0.8))
ax.text(avg_b0 * 1.5, 80, '区域 II: b > b₀\n剪切裂纹占比增速放缓',
fontsize=14, fontname=FONT_NAME,
bbox=dict(facecolor='white', alpha=0.8))
# 设置坐标轴
ax.set_xlabel('截距 b', fontdict=font2)
ax.set_ylabel('剪切裂纹占比 (%)', fontdict=font2)
ax.set_title('不同斜率条件下剪切裂纹占比随截距b的变化及临界截距$b_0$', fontdict=font2)
# 添加网格
ax.grid(True, linestyle='--', alpha=0.6)
# 添加图例(只显示部分k值)
# 选择代表性的k值显示图例,避免过于拥挤
num_legend_items = min(15, len(k_values))
step = max(1, len(k_values) // num_legend_items)
# 获取图例句柄和标签
handles, labels = ax.get_legend_handles_labels()
# 创建新的图例项列表,包含拐点k值和部分其他k值
new_handles = []
new_labels = []
# 确保拐点k值在图例中
for handle, label in zip(handles, labels):
if '拐点' in label:
new_handles.append(handle)
new_labels.append(label)
break
# 添加部分其他k值
for i in range(0, len(k_values), step):
if i < len(handles) and '拐点' not in labels[i]:
new_handles.append(handles[i])
new_labels.append(labels[i])
# 添加临界截距标记的图例
b0_marker = plt.Line2D([0], [0], marker='*', color='w', markerfacecolor='black',
markersize=15, label='临界截距 $b_0$')
new_handles.append(b0_marker)
new_labels.append('临界截距 $b_0$')
# 添加图例
ax.legend(new_handles, new_labels, loc='best', fontsize=12,
title='图例说明', title_fontsize=14)
# 保存图像
plt.savefig(output_path, dpi=800, bbox_inches='tight', pad_inches=0)
plt.close()
print(f"所有斜率b值曲线图(含临界截距$b_0$)已保存至: {output_path}")
return avg_b0, b0_values
def analyze_ra_af(input_file, output_dir, remove_method='zscore',
k_range=(0, 50), k_step=0.1,
b_analysis_k_range=(0, 300, 10), # 新增参数:用于b值分析的k范围
b_analysis_b_range=(-600, 600, 1), # 新增参数:b值范围
density_cmap='Spectral_r'):
"""
执行完整的RA-AF分析流程:
1. 读取数据并计算RA和AF
2. 剔除异常值
3. 绘制RA-AF散点密度图(添加AF = knee_k + b0分界线)
4. 计算剪切裂纹占比随k值变化的曲线
5. 使用kneedle算法找到拐点k
6. 分析不同斜率条件下剪切裂纹占比随截距b值的变化
7. 对每个斜率计算临界截距b0
8. 输出包含原始数据、RA、AF、关键k值和分类结果的数据表格
9. 分别计算剪切和张拉裂纹比例
参数:
input_file: 输入数据文件路径
output_dir: 输出目录路径
remove_method: 异常值剔除方法 ('zscore' 或 'IQR')
k_range: k值的范围 (start, end)
k_step: k值的步长
b_analysis_k_range: 用于b值分析的k范围 (start, stop, step)
b_analysis_b_range: 用于b值分析的b范围 (start, stop, step)
density_cmap: 密度图的颜色映射
"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 从文件名提取基本名称用于输出
base_name = os.path.splitext(os.path.basename(input_file))[0]
# 1. 读取数据
df = pd.read_excel(input_file)
# 使用列名配置提取数据
try:
rise_time = df[COLUMN_NAMES['rise_time']].values
amplitude = df[COLUMN_NAMES['amplitude']].values
ring_count = df[COLUMN_NAMES['ring_count']].values
duration = df[COLUMN_NAMES['duration']].values
except KeyError as e:
print(f"错误:数据文件中找不到配置的列名 - {e}")
print("请检查COLUMN_NAMES配置与实际数据列名是否匹配")
print("数据文件中的列名:", df.columns.tolist())
return
# 2. 计算RA和AF
RA, AF, valid_mask = calculate_RA_AF(rise_time, amplitude, ring_count, duration)
# 创建包含原始数据和RA/AF的DataFrame
ra_af_df = df.copy()
ra_af_df['RA'] = RA
ra_af_df['AF'] = AF
# 只保留有效数据点
ra_af_df = ra_af_df[valid_mask]
# 3. 剔除异常值
if remove_method == 'zscore':
cleaned_df = remove_outliers_zscore(ra_af_df)
elif remove_method == 'IQR':
cleaned_df = remove_outliers_IQR(ra_af_df)
else:
print(f"警告: 未知的异常值剔除方法 '{remove_method}', 使用默认的zscore方法")
cleaned_df = remove_outliers_zscore(ra_af_df)
# 确保我们处理的是DataFrame的副本,避免SettingWithCopyWarning
cleaned_df = cleaned_df.copy()
# 提取处理后的数据用于绘图
x = cleaned_df['RA'].values
y = cleaned_df['AF'].values
# 4. 绘制散点密度图
xy = np.vstack([x, y])
z_density = gaussian_kde(xy)(xy)
# 排序点以便更好地可视化
idx = z_density.argsort()
x_sorted, y_sorted, z_density_sorted = x[idx], y[idx], z_density[idx]
fig, ax = plt.subplots(figsize=(12, 9), dpi=100)
scatter = ax.scatter(x_sorted, y_sorted, marker='o', c=z_density_sorted, edgecolors='None',
s=15, label='LST', cmap=density_cmap)
# 添加颜色条
cbar = plt.colorbar(scatter, shrink=1, orientation='vertical',
extend='both', pad=0.015, aspect=30, label='点密度')
cbar.ax.set_ylabel('点密度', fontsize=26, fontname=FONT_NAME)
cbar.ax.tick_params(labelsize=26)
# 设置坐标刻度
plt.tick_params(labelsize=26)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname(FONT_NAME) for label in labels]
# 使用支持中文的字体
plt.rcParams['font.sans-serif'] = ['SimSun', 'Microsoft YaHei', 'SimHei', 'KaiTi', 'Arial Unicode MS']
font2 = {'family': FONT_NAME, 'weight': 'normal', 'size': 26}
plt.ylabel("AF(kHz)", fontdict=font2)
plt.xlabel("RA(ms/mv)", fontdict=font2)
# 设置坐标轴范围和刻度
plt.axis([0, 40, 0, 600])
ax.xaxis.set_major_locator(MultipleLocator(20))
ax.yaxis.set_major_locator(MultipleLocator(50))
# 添加文本标签
plt.text(x=2, y=550, s=base_name, fontdict=font2)
# 保存散点图
scatter_plot_path = os.path.join(output_dir, f'{base_name}_scatter.tiff')
plt.savefig(scatter_plot_path, dpi=800, bbox_inches='tight', pad_inches=0)
plt.close()
print(f"散点图已保存至: {scatter_plot_path}")
# 5. 计算剪切裂纹占比随k值的变化
k_values = np.arange(k_range[0], k_range[1] + k_step, k_step)
percentages = []
for k in k_values:
y_line = k * x
mask = y < y_line
percentage = np.sum(mask) / len(y) * 100
percentages.append(percentage)
# 保存k值和百分比
output_df = pd.DataFrame({'k': k_values, 'Percentage': percentages})
csv_filename = os.path.join(output_dir, f'{base_name}_percentage_values.csv')
output_df.to_csv(csv_filename, index=False)
print(f"k值百分比数据已保存至: {csv_filename}")
# 6. 使用kneedle算法找到拐点k
knee_k, knee_percentage, curvature, smoothed_p = find_knee_point(k_values, percentages)
# 绘制曲线图
fig2, (ax2, ax3) = plt.subplots(2, 1, figsize=(12, 12), dpi=100, sharex=True)
# 使用支持中文的字体
plt.rcParams['font.sans-serif'] = ['SimSun', 'Microsoft YaHei', 'SimHei', 'KaiTi', 'Arial Unicode MS']
# 绘制原始百分比曲线
ax2.plot(k_values, percentages, 'b-', linewidth=1.5, alpha=0.6, label='原始数据')
ax2.plot(k_values, smoothed_p, 'g--', linewidth=1.5, alpha=0.7, label='平滑后数据')
# 标记拐点
ax2.axvline(x=knee_k, color='r', linestyle='--', alpha=0.7)
ax2.plot(knee_k, knee_percentage, 'ro', markersize=8)
ax2.annotate(f'拐点 k = {knee_k:.2f}',
(knee_k + 0.5, knee_percentage + 5),
fontsize=16,
fontname=FONT_NAME)
# 添加拐点说明
explanation_text = f"当斜率 k > {knee_k:.2f} 时,\n裂纹分类结果基本稳定"
ax2.text(0.65, 0.25, explanation_text, transform=ax2.transAxes,
fontsize=16, bbox=dict(facecolor='white', alpha=0.8), fontname=FONT_NAME)
# 设置坐标轴
ax2.set_ylabel('剪切裂纹占比 (%)', fontdict=font2)
ax2.set_title(f'剪切裂纹占比随分界线斜率 k 的变化 ({base_name})', fontdict=font2)
# 设置坐标范围
ax2.set_xlim(k_range)
ax2.set_ylim(0, 100)
# 设置网格和图例
ax2.grid(True, linestyle='--', alpha=0.6)
ax2.legend(loc='lower right', fontsize=16, prop={'family': FONT_NAME})
# 绘制曲率图
ax3.plot(k_values, curvature, 'm-', linewidth=2, label='曲率')
ax3.axvline(x=knee_k, color='r', linestyle='--', alpha=0.7)
ax3.plot(knee_k, curvature[np.where(k_values == knee_k)[0][0]], 'ro', markersize=8)
# 设置曲率图
ax3.set_xlabel('分界线斜率 k', fontdict=font2)
ax3.set_ylabel('曲率', fontdict=font2)
ax3.set_title('曲率变化图', fontdict=font2)
ax3.grid(True, linestyle='--', alpha=0.6)
ax3.legend(loc='upper right', fontsize=16, prop={'family': FONT_NAME})
# 设置字体
for ax in [ax2, ax3]:
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname(FONT_NAME) for label in labels]
ax.tick_params(labelsize=20)
# 保存百分比图
percentage_plot_path = os.path.join(output_dir, f'{base_name}_percentage_curve.tiff')
plt.savefig(percentage_plot_path, dpi=800, bbox_inches='tight', pad_inches=0)
plt.close(fig2)
print(f"k值百分比曲线图已保存至: {percentage_plot_path}")
# 7. 分析不同斜率条件下剪切裂纹占比随截距b值的变化
print("\n开始分析不同斜率条件下剪切裂纹占比随截距b值的变化...")
# 准备用于b值分析的k值范围
k_start, k_stop, k_step_b = b_analysis_k_range
k_values_b = np.arange(k_start, k_stop + k_step_b, k_step_b)
# 准备b值范围
b_start, b_stop, b_step = b_analysis_b_range
b_values = np.arange(b_start, b_stop + b_step, b_step)
# 存储不同k值的临界截距b0
b0_values = []
k_b0_points = []
# 存储占比数据的字典
all_percentages = {}
# 创建子目录存储b值分析结果
b_output_dir = os.path.join(output_dir, 'b_analysis')
os.makedirs(b_output_dir, exist_ok=True)
# 计算每个k值对应的b值占比
for i, k in enumerate(k_values_b):
print(f"计算斜率 k={k:.2f} 的b值占比 ({i + 1}/{len(k_values_b)})")
# 存储当前k值下不同b值的占比
percentages_b = []
for b in b_values:
y_line = k * x + b
mask = y < y_line
percentage = np.sum(mask) / len(y) * 100
percentages_b.append(percentage)
# 保存当前k值的占比数据
all_percentages[k] = percentages_b
# 找到临界截距b0
b0 = find_b0_for_curve(b_values, percentages_b)
# 存储临界截距b0
b0_values.append(b0)
k_b0_points.append(k)
# 保存数据
df_b = pd.DataFrame({
'b': b_values,
'percentage': percentages_b,
})
df_b.to_excel(os.path.join(b_output_dir, f'b_analysis_k_{k:.2f}.xlsx'), index=False)
# 绘制所有斜率条件下剪切裂纹占比随截距b的变化曲线(含临界截距b0)
all_k_b_path = os.path.join(output_dir, f'{base_name}_all_k_b_curves_with_b0.tiff')
avg_b0, b0_values = plot_all_k_b_curves_with_b0(
k_values_b, b_values, all_percentages, knee_k, all_k_b_path, font2
)
# 保存临界截距b0数据
b0_df = pd.DataFrame({
'k': k_b0_points,
'b0': b0_values
})
b0_csv = os.path.join(output_dir, f'{base_name}_critical_b0_values.csv')
b0_df.to_csv(b0_csv, index=False)
print(f"临界截距b0数据已保存至: {b0_csv}")
# 单独绘制拐点k值时占比随b的变化
fig_b, ax_b = plt.subplots(figsize=(12, 8), dpi=100)
# 获取拐点k值对应的数据
# 找到最接近拐点k值的实际k值
k_closest_idx = np.abs(k_values_b - knee_k).argmin()
k_closest = k_values_b[k_closest_idx]
percentages_b_closest = all_percentages[k_closest]
# 找到临界截距b0
b0_closest = find_b0_for_curve(b_values, percentages_b_closest)
# 绘制曲线
ax_b.plot(b_values, percentages_b_closest, 'b-', linewidth=2.5,
label=f'k={k_closest:.2f} (拐点k值)')
# 标记临界截距b0
ax_b.axvline(x=b0_closest, color='r', linestyle='--', alpha=0.7)
ax_b.plot(b0_closest, percentages_b_closest[np.abs(b_values - b0_closest).argmin()],
'ro', markersize=8)
ax_b.annotate(f'临界截距 $b_0$ = {b0_closest:.2f}',
(b0_closest + 5, percentages_b_closest[np.abs(b_values - b0_closest).argmin()] + 5),
fontsize=16, fontname=FONT_NAME)
# 添加说明
ax_b.text(0.05, 0.95, f'斜率 k = {k_closest:.2f} (拐点k值)',
transform=ax_b.transAxes, fontsize=14,
bbox=dict(facecolor='white', alpha=0.8))
# 添加区域标注
ax_b.text(b0_closest * 0.3, 80, '区域 I: b < b₀\n剪切裂纹占比迅速增加',
fontsize=14, fontname=FONT_NAME,
bbox=dict(facecolor='white', alpha=0.8))
ax_b.text(b0_closest * 1.5, 80, '区域 II: b > b₀\n剪切裂纹占比增速放缓',
fontsize=14, fontname=FONT_NAME,
bbox=dict(facecolor='white', alpha=0.8))
# 设置坐标轴
ax_b.set_xlabel('截距 b', fontdict=font2)
ax_b.set_ylabel('剪切裂纹占比 (%)', fontdict=font2)
ax_b.set_title(f'拐点k值(k={k_closest:.2f})时剪切裂纹占比随截距b的变化', fontdict=font2)
# 添加网格和图例
ax_b.grid(True, linestyle='--', alpha=0.6)
ax_b.legend(loc='best', fontsize=14)
# 保存图像
target_k_b_path = os.path.join(output_dir, f'{base_name}_knee_k_b_curve.tiff')
plt.savefig(target_k_b_path, dpi=800, bbox_inches='tight', pad_inches=0)
plt.close()
print(f"拐点k值对应的b曲线图已保存至: {target_k_b_path}")
# 8. 在散点图中添加AF = knee_k + b0分界线并重新保存
print("\n在散点图中添加AF = knee_k + b0分界线...")
fig, ax = plt.subplots(figsize=(12, 9), dpi=100)
# 重新绘制散点图
scatter = ax.scatter(x_sorted, y_sorted, marker='o', c=z_density_sorted, edgecolors='None',
s=15, label='LST', cmap=density_cmap)
# 添加颜色条
cbar = plt.colorbar(scatter, shrink=1, orientation='vertical',
extend='both', pad=0.015, aspect=30, label='点密度')
cbar.ax.set_ylabel('点密度', fontsize=26, fontname=FONT_NAME)
cbar.ax.tick_params(labelsize=26)
# 设置坐标轴
plt.tick_params(labelsize=26)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname(FONT_NAME) for label in labels]
plt.ylabel("AF(kHz)", fontdict=font2)
plt.xlabel("RA(ms/mv)", fontdict=font2)
# 设置坐标轴范围和刻度
plt.axis([0, 40, 0, 600])
ax.xaxis.set_major_locator(MultipleLocator(20))
ax.yaxis.set_major_locator(MultipleLocator(50))
# 添加文本标签
plt.text(x=2, y=550, s=base_name, fontdict=font2)
# 添加分界线
x_line = np.linspace(0, 40, 100)
# 1. 拐点k值分界线: AF = knee_k * RA
y_knee = knee_k * x_line
ax.plot(x_line, y_knee, 'r-', linewidth=2.5, label=f'分界线: AF = {knee_k:.2f} * RA')
# 2. 拐点k值 + 平均临界截距b0分界线: AF = knee_k * RA + avg_b0
y_b0 = knee_k * x_line + avg_b0
ax.plot(x_line, y_b0, 'b--', linewidth=2.5, label=f'分界线: AF = {knee_k:.2f} * RA + {avg_b0:.2f}')
# 添加图例
ax.legend(loc='upper right', fontsize=14)
# 保存带分界线的散点图
scatter_with_lines_path = os.path.join(output_dir, f'{base_name}_scatter_with_lines.tiff')
plt.savefig(scatter_with_lines_path, dpi=800, bbox_inches='tight', pad_inches=0)
plt.close()
print(f"带分界线的散点图已保存至: {scatter_with_lines_path}")
# 9. 分别计算剪切和张拉裂纹比例
print("\n计算剪切和张拉裂纹比例...")
# 使用拐点k值分界线分类
mask_shear = y < knee_k * x
mask_tensile = ~mask_shear
# 计算比例
shear_percentage = np.sum(mask_shear) / len(y) * 100
tensile_percentage = 100 - shear_percentage
# 使用拐点k值 + 平均临界截距b0分界线分类
mask_shear_b0 = y < (knee_k * x + avg_b0)
mask_tensile_b0 = ~mask_shear_b0
# 计算比例
shear_percentage_b0 = np.sum(mask_shear_b0) / len(y) * 100
tensile_percentage_b0 = 100 - shear_percentage_b0
print(f"使用拐点k值分界线:")
print(f" 剪切裂纹比例: {shear_percentage:.2f}%")
print(f" 张拉裂纹比例: {tensile_percentage:.2f}%")
print(f"\n使用拐点k值 + 平均临界截距b0分界线:")
print(f" 剪切裂纹比例: {shear_percentage_b0:.2f}%")
print(f" 张拉裂纹比例: {tensile_percentage_b0:.2f}%")
# 10. 创建包含所有计算结果的数据表格
# 添加密度值(原始顺序)
cleaned_df['Density'] = z_density
# 使用拐点k值进行分类
critical_k = knee_k
# 添加分类结果(剪切裂纹或拉伸裂纹)
cleaned_df['Crack_Type'] = np.where(
cleaned_df['AF'] < critical_k * cleaned_df['RA'],
'剪切裂纹',
'拉伸裂纹'
)
# 添加关键k值列
cleaned_df['Critical_k'] = critical_k
# 重命名列以增加可读性
column_mapping = {
COLUMN_NAMES['rise_time']: '上升时间(us)',
COLUMN_NAMES['amplitude']: '幅值(uV)',
COLUMN_NAMES['ring_count']: '振铃计数',
COLUMN_NAMES['duration']: '持续时间(us)',
'RA': 'RA(ms/mV)',
'AF': 'AF(kHz)',
'Density': '点密度',
'Crack_Type': '裂纹类型',
'Critical_k': '关键k值'
}
cleaned_df = cleaned_df.rename(columns=column_mapping)
# 选择要保存的列
output_columns = [
'上升时间(us)', '幅值(uV)', '振铃计数', '持续时间(us)',
'RA(ms/mV)', 'AF(kHz)', '点密度', '裂纹类型', '关键k值'
]
result_df = cleaned_df[output_columns]
# 保存结果表格
result_table_path = os.path.join(output_dir, f'{base_name}_results.xlsx')
with pd.ExcelWriter(result_table_path) as writer:
# 保存完整结果
result_df.to_excel(writer, sheet_name='计算结果', index=False)
# 保存关键参数汇总
shear_count = result_df[result_df['裂纹类型'] == '剪切裂纹'].shape[0]
tensile_count = result_df[result_df['裂纹类型'] == '拉伸裂纹'].shape[0]
total_count = len(result_df)
summary_data = {
'参数': [
'关键k值',
'平均临界截距b0',
'使用k值分界线的剪切裂纹比例(%)',
'使用k值分界线的张拉裂纹比例(%)',
'使用k+b0分界线的剪切裂纹比例(%)',
'使用k+b0分界线的张拉裂纹比例(%)',
'总数据点数',
'剪切裂纹点数',
'拉伸裂纹点数'
],
'值': [
critical_k,
avg_b0,
shear_percentage,
tensile_percentage,
shear_percentage_b0,
tensile_percentage_b0,
total_count,
shear_count,
tensile_count
]
}
summary_df = pd.DataFrame(summary_data)
summary_df.to_excel(writer, sheet_name='参数汇总', index=False)
# 添加临界截距b0数据到汇总表
with pd.ExcelWriter(result_table_path, mode='a', if_sheet_exists='replace') as writer:
b0_df.to_excel(writer, sheet_name='临界截距b0数据', index=False)
# 11. 打印结果摘要
print("\n" + "=" * 50)
print(f"分析完成: {base_name}")
print("=" * 50)
print(f"原始数据点数量: {len(df)}")
print(f"有效数据点数量(剔除NaN后): {len(ra_af_df)}")
print(f"剔除异常值后数据点数量: {len(cleaned_df)}")
print(f"拐点k值: {knee_k:.2f} (剪切裂纹占比 = {knee_percentage:.2f}%)")
print(f"当斜率 k > {knee_k:.2f} 时,裂纹分类结果基本稳定")
print(f"平均临界截距 b0: {avg_b0:.2f}")
print(f"临界截距b0值范围: {min(b0_values):.2f} 到 {max(b0_values):.2f}")
print(f"使用k值分界线的剪切裂纹比例: {shear_percentage:.2f}%")
print(f"使用k值分界线的张拉裂纹比例: {tensile_percentage:.2f}%")
print(f"使用k+b0分界线的剪切裂纹比例: {shear_percentage_b0:.2f}%")
print(f"使用k+b0分界线的张拉裂纹比例: {tensile_percentage_b0:.2f}%")
print(f"散点图保存至: {scatter_plot_path}")
print(f"带分界线的散点图保存至: {scatter_with_lines_path}")
print(f"k值百分比曲线图保存至: {percentage_plot_path}")
print(f"k值百分比数据保存至: {csv_filename}")
print(f"所有斜率b值曲线图(含临界截距b0)保存至: {all_k_b_path}")
print(f"临界截距b0数据保存至: {b0_csv}")
print(f"拐点k值对应的b曲线图保存至: {target_k_b_path}")
print(f"结果表格保存至: {result_table_path}")
# 返回关键结果
return {
'knee_k': knee_k,
'knee_percentage': knee_percentage,
'critical_k': critical_k,
'avg_b0': avg_b0,
'b0_values': b0_values,
'shear_percentage': shear_percentage,
'tensile_percentage': tensile_percentage,
'shear_percentage_b0': shear_percentage_b0,
'tensile_percentage_b0': tensile_percentage_b0,
'cleaned_data': cleaned_df,
'scatter_path': scatter_plot_path,
'scatter_with_lines_path': scatter_with_lines_path,
'curve_path': percentage_plot_path,
'all_k_b_path': all_k_b_path,
'target_k_b_path': target_k_b_path,
'table_path': result_table_path
}
# ========== 使用示例 ==========
if __name__ == "__main__":
# 配置参数
input_file = r'J:\321\Kneedle值\AE-S-2.xlsx'
output_dir = r'J:\321\Kneedle值\results'
# 执行分析
results = analyze_ra_af(
input_file=input_file,
output_dir=output_dir,
remove_method='zscore', # 可选 'zscore' 或 'IQR'
k_range=(0, 500), # k值范围
k_step=0.1, # k值步长
b_analysis_k_range=(0, 2000, 10), # 用于b值分析的k范围 (start, stop, step)
b_analysis_b_range=(-600, 600, 1), # 用于b值分析的b范围 (start, stop, step)
density_cmap='Spectral_r' # 密度图颜色
)
# 访问结果
print(f"拐点k值: {results['knee_k']:.2f}")
print(f"平均临界截距 b0: {results['avg_b0']:.2f}")
print(f"使用k值分界线的剪切裂纹比例: {results['shear_percentage']:.2f}%")
print(f"使用k+b0分界线的剪切裂纹比例: {results['shear_percentage_b0']:.2f}%")
print(f"结果表格路径: {results['table_path']}")
# ---------- 计算拉伸/剪切占比 ----------
total_count = len(df)
shear_count = np.sum(df['Type'] == 'Shear Crack')
tensile_count = np.sum(df['Type'] == 'Tensile Crack')
shear_ratio = shear_count / total_count * 100
tensile_ratio = tensile_count / total_count * 100
# ---------- 绘制饼状图 ----------
fig, ax = plt.subplots(figsize=(6, 6))
labels = [f'剪切裂缝 ({shear_ratio:.1f}%)', f'拉伸裂缝 ({tensile_ratio:.1f}%)']
sizes = [shear_count, tensile_count]
colors = ['#ff9999', '#66b3ff'] # 可以改颜色
explode = (0.05, 0.05) # 两块都稍微突出
wedges, texts, autotexts = ax.pie(
sizes,
labels=labels,
colors=colors,
explode=explode,
autopct='%1.1f%%',
textprops={'fontsize': FONT_SIZE, 'fontname': FONT_NAME}
)
for autotext in autotexts:
autotext.set_fontsize(FONT_SIZE)
autotext.set_fontname(FONT_NAME)
ax.set_title('拉伸与剪切裂缝占比', fontsize=FONT_SIZE + 2, fontname=FONT_NAME)
plt.tight_layout()
plt.show()