如何检验结果是否合理
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pywt
from scipy.signal import savgol_filter, find_peaks
from scipy.interpolate import interp1d
import sys
import io
import json
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 更通用的中文字体
plt.rcParams['axes.unicode_minus'] = False
def create_result_folder(folder_name="results"):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
return folder_name
def save_plot(save_path, filename, fig=None):
if fig is not None:
fig.savefig(os.path.join(save_path, filename), dpi=300, bbox_inches='tight')
plt.close(fig)
else:
plt.savefig(os.path.join(save_path, filename), dpi=300, bbox_inches='tight')
plt.close()
path = os.path.dirname(os.path.abspath(__file__))
# ✅ 1. 数据读取与异常值剔除
def get_data(file_path):
file_name = os.path.basename(file_path)
data = pd.read_excel(file_path, header=0)
if data.isna().any().any():
print(f"{file_name}存在空值:")
print(data.isna().sum())
else:
print(f"{file_name}无空值")
wavenumber = data.iloc[:, 0].values
reflectivity = data.iloc[:, 1].values
negative_num = np.sum(reflectivity < 0)
over_one_num = np.sum(reflectivity > 100)
print(f"{file_name}反射率小于0的个数:{negative_num}")
print(f"{file_name}反射率大于100的个数:{over_one_num}")
return wavenumber, reflectivity
def outlier_removal(wavenumber, reflectivity):
valid_value = (reflectivity >= 0) & (reflectivity <= 100)
return wavenumber[valid_value], reflectivity[valid_value]
# ✅ 2. 数据平滑与去噪
def apply_savgol_filter(reflectivity, window_length=11, polyorder=3):
return savgol_filter(reflectivity, window_length=window_length, polyorder=polyorder)
def wavelet_denoise(signal, wavelet='db4', level=1, mode_ext='constant'):
coeffs = pywt.wavedec(signal, wavelet, level=level, mode=mode_ext)
threshold = np.std(coeffs[-level]) * np.sqrt(2 * np.log(len(signal)))
coeffs = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]
reconstructed = pywt.waverec(coeffs, wavelet, mode=mode_ext)
return reconstructed
def equal_length(signal, target_length):
x = np.linspace(0, 1, len(signal))
x_target = np.linspace(0, 1, target_length)
f = interp1d(x, signal, kind='linear')
return f(x_target)
def normalize(reflectivity):
return reflectivity / 100.0
# ✅ 3. 导数法检测波峰波谷
def find_peaks_by_derivative(wavenumber, reflectivity, threshold=1e-3):
first_derivative = np.gradient(reflectivity, wavenumber)
second_derivative = np.gradient(first_derivative, wavenumber)
peak_indices = np.where((np.abs(first_derivative) < threshold) & (second_derivative < 0))[0]
return peak_indices
def find_valleys_by_derivative(wavenumber, reflectivity, threshold=1e-3):
first_derivative = np.gradient(reflectivity, wavenumber)
second_derivative = np.gradient(first_derivative, wavenumber)
valley_indices = np.where((np.abs(first_derivative) < threshold) & (second_derivative > 0))[0]
return valley_indices
# ✅ 4. 提取主频信息
def get_dominant_frequency(signal, fs=1.0):
n = len(signal)
fft_result = np.fft.fft(signal)
freqs = np.fft.fftfreq(n, d=1/fs)
magnitude = np.abs(fft_result)
dominant_freq = freqs[np.argmax(magnitude[1:n//2]) + 1]
return abs(dominant_freq)
def plot_fft(signal, fs=1.0, title="FFT"):
n = len(signal)
fft_result = np.fft.fft(signal)
freqs = np.fft.fftfreq(n, d=1/fs)
magnitude = np.abs(fft_result)
fig = plt.figure(figsize=(10, 4))
plt.plot(freqs[:n//2], magnitude[:n//2], label='幅值')
plt.title(title, fontsize=14)
plt.xlabel('频率(cycles per cm⁻¹)', fontsize=12)
plt.ylabel('幅值', fontsize=12)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.close()
return fig
# ✅ 5. 数据保存函数
def save_peaks_valleys(save_path, wavenumber, reflectivity, peaks, valleys):
peaks_data = pd.DataFrame({
'波数': wavenumber[peaks],
'反射率': reflectivity[peaks]
})
valleys_data = pd.DataFrame({
'波数': wavenumber[valleys],
'反射率': reflectivity[valleys]
})
peaks_data.to_csv(os.path.join(save_path, '波峰.csv'), index=False)
valleys_data.to_csv(os.path.join(save_path, '波谷.csv'), index=False)
def save_reflectivity_data(save_path, wavenumber, reflectivity):
df = pd.DataFrame({
'波数': wavenumber,
'反射率': reflectivity
})
df.to_csv(os.path.join(save_path, '去噪后反射率数据.csv'), index=False)
def save_dominant_frequency(save_path, freq):
with open(os.path.join(save_path, '主频信息.txt'), 'w', encoding='utf-8') as f:
f.write(f"信号主频(频域): {freq:.4f} cycles per cm⁻¹\n")
def save_data_stats(save_path, file_name, negative_num, over_one_num):
with open(os.path.join(save_path, '原始数据统计.txt'), 'w', encoding='utf-8') as f:
f.write(f"文件名: {file_name}\n")
f.write(f"反射率小于0的个数: {negative_num}\n")
f.write(f"反射率大于100的个数: {over_one_num}\n")
def save_log(save_path, log_content):
with open(os.path.join(save_path, '日志.txt'), 'w', encoding='utf-8') as f:
f.write(log_content)
# ✅ 6. 新增函数:波数转波长
def wavenumber_to_wavelength(wavenumber):
return 1e4 / wavenumber # cm⁻¹ -> μm
# ✅ 7. 折射率经验公式
def refractive_index(wavelength):
term1 = (0.20075 * wavelength ** 2) / (wavelength ** 2 + 12.07224)
term2 = (5.54861 * wavelength ** 2) / (wavelength ** 2 - 0.02641)
term3 = (35.65066 * wavelength ** 2) / (wavelength ** 2 - 1268.24708)
return np.sqrt(1 + term1 + term2 + term3)
def refractive_index_complex(wavelength, reflectance):
n0 = refractive_index(wavelength)
R = reflectance / 100.0
n_complex = complex(n0, 0)
numerator = R * (n0 + 1) ** 2 - (n_complex - 1) ** 2
denominator = 1 - R
if denominator <= 0 or np.real(numerator) < 0:
kappa = 0.0
else:
kappa = np.sqrt(np.real(numerator / denominator))
return complex(n0, kappa)
# ✅ 8. 厚度计算
def calculate_thickness(P, wavelength, theta1_deg):
theta1 = np.deg2rad(theta1_deg)
sin_theta1 = np.sin(theta1)
n = refractive_index(wavelength)
sqrt_n2_sin2 = np.sqrt(n ** 2 - sin_theta1 ** 2)
d = ((P - 0.5) * wavelength * sqrt_n2_sin2) / (2 * n ** 2)
return d
def calculate_thickness_complex(P, wavelength, theta1_deg, reflectance):
theta1 = np.deg2rad(theta1_deg)
sin_theta1 = np.sin(theta1)
n_complex = refractive_index_complex(wavelength, reflectance)
n0 = n_complex.real
kappa = n_complex.imag
sqrt_n2_sin2 = np.sqrt(n0 ** 2 - sin_theta1 ** 2)
absorption_correction = 1.0 + (kappa / n0) ** 2
d = ((P - 0.5) * wavelength * sqrt_n2_sin2) / (2 * n0 ** 2)*absorption_correction
return d
# ✅ 9. 干涉周期计算
def compute_interference_period_weighted(peaks_wavenumber, valleys_wavenumber):
if len(peaks_wavenumber) < 2 or len(valleys_wavenumber) < 2:
return None
from sklearn.linear_model import RANSACRegressor
peak_diffs = np.diff(peaks_wavenumber)
valley_diffs = np.diff(valleys_wavenumber)
all_diffs = np.concatenate([peak_diffs, valley_diffs])
X = np.arange(len(all_diffs)).reshape(-1, 1)
y = all_diffs
ransac = RANSACRegressor()
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
P = np.mean(all_diffs[inlier_mask])
return P
# ✅ 10. 厚度计算并保存
def compute_and_save_thickness(save_path, peaks_wavenumber, valleys_wavenumber, reflectivity_denoised, theta1_deg):
if len(peaks_wavenumber) < 2 or len(valleys_wavenumber) < 2:
print("无法计算干涉周期,波峰/波谷数量不足")
return None
P = compute_interference_period_weighted(peaks_wavenumber, valleys_wavenumber)
if P is None:
print("无法提取干涉周期")
return None
all_wavenumbers = np.concatenate([peaks_wavenumber, valleys_wavenumber])
mean_wavenumber = np.mean(all_wavenumbers)
wavelength = wavenumber_to_wavelength(mean_wavenumber)
mean_reflectance = np.mean(reflectivity_denoised)
thickness = calculate_thickness_complex(P, wavelength, theta1_deg, mean_reflectance)
result_file = os.path.join(save_path, '厚度计算结果.txt')
with open(result_file, 'w', encoding='utf-8') as f:
f.write(f"入射角: {theta1_deg}°\n")
f.write(f"干涉周期: {P:.4f} cm⁻¹\n")
f.write(f"平均波长: {wavelength:.4f} μm\n")
f.write(f"平均反射率: {mean_reflectance:.2f}%\n")
f.write(f"计算厚度: {thickness:.4f} μm\n")
f.write(f"公式:d = (P - 0.5) * λ * sqrt(n₀² - sin²θ1) / (2n₀²) * (1 + (κ/n₀)²)\n")
print(f"\n厚度计算结果已保存至:{result_file}")
print(f"厚度:{thickness:.4f} μm")
return thickness
# ✅ 11. 厚度一致性分析(新增一致性验证)
def thickness_consistency_analysis(thickness_10, thickness_15):
avg_thickness = (thickness_10 + thickness_15) / 2
relative_error = abs(thickness_10 - thickness_15) / avg_thickness * 100
print(f"\n【厚度一致性分析】")
print(f"附件1厚度估计:{thickness_10:.4f} μm")
print(f"附件2厚度估计:{thickness_15:.4f} μm")
print(f"相对误差:{relative_error:.2f}%")
print(f"散度:{np.std([thickness_10, thickness_15]):.4f} μm")
print(f"线性拟合残差:{abs(2 * thickness_10 - thickness_15):.4f} μm")
print(f"角度敏感性:{(thickness_15 - thickness_10) / 5:.4f} μm/°")
if relative_error < 1:
print("✅ 厚度估计稳定,误差 < 1%")
else:
print("⚠️ 厚度估计不稳定,误差 > 1%,可能存在系统误差")
return relative_error
# ✅ 12. 局部厚度分布图
def plot_thickness_distribution(save_path, wavenumbers, reflectivity, thickness_values):
plt.figure(figsize=(12, 6))
plt.plot(wavenumbers, reflectivity, label='反射率', color='blue')
min_len = min(len(wavenumbers), len(thickness_values))
plt.plot(wavenumbers[:min_len], thickness_values[:min_len], label='局部厚度估计', color='red', linestyle='--')
plt.title('反射率与局部厚度估计分布')
plt.xlabel('波数 (cm⁻¹)')
plt.ylabel('反射率 (%) / 厚度 (μm)')
plt.legend()
plt.grid(True)
plt.gca().invert_xaxis()
save_plot(save_path, '厚度分布图.png')
plt.close()
def plot_thickness_comparison(save_path, angles, thicknesses):
plt.figure(figsize=(8, 6))
plt.bar([str(a) + '°' for a in angles], thicknesses, color=['blue', 'green'], alpha=0.7)
plt.title('不同入射角下的厚度估计')
plt.xlabel('入射角')
plt.ylabel('厚度 (μm)')
plt.grid(True)
save_plot(save_path, '不同入射角厚度图.png')
plt.close()
def plot_interference_period_distribution(save_path, P_values):
plt.figure(figsize=(12, 6))
plt.hist(P_values, bins=20, edgecolor='black', alpha=0.7)
plt.title('干涉周期 $P$ 分布直方图')
plt.xlabel('干涉周期 (cm⁻¹)')
plt.ylabel('频次')
plt.grid(True)
save_plot(save_path, '干涉周期分布图.png')
plt.close()
# ✅ 主程序(重构顺序,符合解题四步法)
if __name__ == "__main__":
result_folder = create_result_folder("results")
file_paths = [
os.path.join(path, "附件1.xlsx"),
os.path.join(path, "附件2.xlsx")
]
error_report = {}
thickness_results = {}
for file in file_paths:
log_output = io.StringIO()
console = sys.stdout
sys.stdout = log_output
print(f"\n对于文件:{os.path.basename(file)}")
wavenumber, reflectivity = get_data(file)
if wavenumber is not None and reflectivity is not None:
file_name = os.path.splitext(os.path.basename(file))[0]
save_path = os.path.join(result_folder, file_name)
create_result_folder(save_path)
# ✅ 1. 预处理(去噪+峰谷检测)
wavenumber_clean, reflectivity_clean = outlier_removal(wavenumber, reflectivity)
reflectivity_smooth = apply_savgol_filter(reflectivity_clean)
reflectivity_denoised = wavelet_denoise(reflectivity_smooth, mode_ext='constant')
reflectivity_denoised = equal_length(reflectivity_denoised, len(wavenumber_clean))
reflectivity_normalized = normalize(reflectivity_denoised)
# ✅ 2. 波峰检测(自动识别极值,判定可用性)
peak_indices = find_peaks_by_derivative(wavenumber_clean, reflectivity_denoised, threshold=1e-3)
valley_indices = find_valleys_by_derivative(wavenumber_clean, reflectivity_denoised, threshold=1e-3)
# ✅ 3. 线性标定(单序列法)
P = compute_interference_period_weighted(wavenumber_clean[peak_indices], wavenumber_clean[valley_indices])
if P is None:
print("⚠️ 干涉周期计算失败,跳过厚度反演")
continue
# ✅ 4. 厚度反演
if "附件1" in file_name:
theta1_deg = 10
elif "附件2" in file_name:
theta1_deg = 15
else:
theta1_deg = 0
thickness = compute_and_save_thickness(save_path, wavenumber_clean[peak_indices], wavenumber_clean[valley_indices],
reflectivity_denoised, theta1_deg)
if thickness is not None:
thickness_results[file_name] = thickness
# ✅ 局部厚度计算(用于绘图)
window_size = 50
step = 10
thickness_values = []
for i in range(0, len(wavenumber_clean) - window_size, step):
w = wavenumber_clean[i:i + window_size]
r = reflectivity_denoised[i:i + window_size]
p = compute_interference_period_weighted(w, r)
if p is not None:
wl = wavenumber_to_wavelength(np.mean(w))
t = calculate_thickness_complex(p, wl, 10, np.mean(r))
thickness_values.append(t)
else:
thickness_values.append(np.nan)
if len(thickness_values) > 0:
min_len = min(len(wavenumber_clean), len(thickness_values))
plot_thickness_distribution(
result_folder,
wavenumber_clean[:min_len],
reflectivity_denoised[:min_len],
np.array(thickness_values)[:min_len]
)
error_report[file_name] = {
"去噪前后信号MSE": np.mean((reflectivity_clean - reflectivity_denoised) ** 2),
"波峰检测数量": int(len(peak_indices)),
"波谷检测数量": int(len(valley_indices))
}
sys.stdout = console
save_log(save_path, log_output.getvalue())
# ✅ 一致性验证
with open(os.path.join(result_folder, "误差分析报告.json"), "w", encoding="utf-8") as f:
json.dump(error_report, f, ensure_ascii=False, indent=4)
if "附件1" in thickness_results and "附件2" in thickness_results:
thickness_10 = thickness_results["附件1"]
thickness_15 = thickness_results["附件2"]
thickness_consistency_analysis(thickness_10, thickness_15)
plot_thickness_comparison(result_folder, [10, 15], [thickness_10, thickness_15])
# ✅ 干涉周期分布图
P_values = []
for file in file_paths:
file_name = os.path.splitext(os.path.basename(file))[0]
save_path = os.path.join(result_folder, file_name)
wavenumber, reflectivity = get_data(file)
wavenumber_clean, reflectivity_clean = outlier_removal(wavenumber, reflectivity)
peak_indices = find_peaks_by_derivative(wavenumber_clean, reflectivity_clean)
valley_indices = find_valleys_by_derivative(wavenumber_clean, reflectivity_clean)
P = compute_interference_period_weighted(wavenumber_clean[peak_indices], wavenumber_clean[valley_indices])
if P is not None:
P_values.append(P)
if len(P_values) > 0:
plot_interference_period_distribution(result_folder, P_values)
最新发布