检查是否有逻辑错误
'''
一、数据预处理
1、剔除异常值(阈值法,去除反射率小于0或大于1的数据),防止异常值干扰后续计算
2、使用Savitzky-Golay滤波器平滑数据
Savitzky-Golay 滤波器窗口大小设置为11,适中,平滑信号,保留干涉条纹特征
多项式拟合阶数设置为3,适中阶数,保留高频特征,避免过拟合
3、使用小波变换进行去噪(适用于非平稳信号)
小波去噪使用的基函数类型db4,适合信号去噪,保持细节
小波分解层数为1,仅一层分解,防止过度去噪丢失干涉特征
保留干涉特征,去除高频噪声
4、插值统一信号长度,target_length=自适应,保证不同文件数据长度一致,便于后续处理
5、将反射率归一化至[0,1],避免量纲不同。使用Min-Max方法
二、特征提取
1、波峰波谷检测,使用 scipy.signal.find_peaks 检测干涉条纹
控制波峰/波谷检测的显著性阈值prominence=0.01,提高灵敏度,避免漏检干涉条纹
设置相邻波峰/波谷最小间距distance=20,防止检测过于密集,提升稳定性
2、干涉周期P:RANSAC 回归(np.random.seed(42),保持随机种子为42以确保数据的一致性) + 中位数。剔除异常值,提取稳定周期,抗异常值能力强,鲁棒性高
三、物理关系
...
四、结果输出与分析
!!将scipy.signal.find_peaks 检测干涉条纹与导数法形成对比,最终选择scipy.signal.find_peaks
使用 find_peaks 函数检测波峰(极大值)和波谷(极小值)。准确性依赖于参数设置
对反射率数据使用 find_peaks 检测波峰时,设置高度阈值为平均值的80%,并设置最小距离为20。
对反射率的负值使用 find_peaks 检测波谷,即极小值。
使用数学原理方法(导数法)。抗噪声能力差
'''
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pywt
from scipy.signal import savgol_filter, find_peaks
from scipy.interpolate import interp1d
import sys
import io
import json
from sklearn.linear_model import RANSACRegressor
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 更通用的中文字体
plt.rcParams['axes.unicode_minus'] = False
# 设置随机种子
np.random.seed(42)
def create_result_folder(folder_name="results"):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
return folder_name
def save_plot(save_path, filename, fig=None):
if fig is not None:
fig.savefig(os.path.join(save_path, filename), dpi=300, bbox_inches='tight')
plt.close(fig)
else:
plt.savefig(os.path.join(save_path, filename), dpi=300, bbox_inches='tight')
plt.close()
path = os.path.dirname(os.path.abspath(__file__))
# 1. 数据读取与异常值剔除
def get_data(file_path):
file_name = os.path.basename(file_path)
data = pd.read_excel(file_path, header=0)
if data.isna().any().any():
print(f"{file_name}存在空值:")
print(data.isna().sum())
else:
print(f"{file_name}无空值")
wavenumber = data.iloc[:, 0].values
reflectivity = data.iloc[:, 1].values
negative_num = np.sum(reflectivity < 0)
over_one_num = np.sum(reflectivity > 100)
print(f"{file_name}反射率小于0的个数:{negative_num}")
print(f"{file_name}反射率大于100的个数:{over_one_num}")
return wavenumber, reflectivity
def outlier_removal(wavenumber, reflectivity):
valid_value = (reflectivity >= 0) & (reflectivity <= 100)
return wavenumber[valid_value], reflectivity[valid_value]
# 2. 数据平滑与去噪
def apply_savgol_filter(reflectivity, window_length=11, polyorder=3):
return savgol_filter(reflectivity, window_length=window_length, polyorder=polyorder)
def wavelet_denoise(signal, wavelet='db4', level=1, mode_ext='constant'):
coeffs = pywt.wavedec(signal, wavelet, level=level, mode=mode_ext)
threshold = np.std(coeffs[-level]) * np.sqrt(2 * np.log(len(signal)))
coeffs = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]
reconstructed = pywt.waverec(coeffs, wavelet, mode=mode_ext)
return reconstructed
def equal_length(signal, target_length):
x = np.linspace(0, 1, len(signal))
x_target = np.linspace(0, 1, target_length)
f = interp1d(x, signal, kind='linear')
return f(x_target)
def normalize(reflectivity):
return reflectivity / 100.0
# 3. 使用 find_peaks 替代导数法
def find_peaks_scipy(signal, prominence=1.0, distance=10):
peaks, _ = find_peaks(signal, prominence=prominence, distance=distance)
return peaks
def find_valleys_scipy(signal, prominence=1.0, distance=10):
valleys, _ = find_peaks(-signal, prominence=prominence, distance=distance)
return valleys
# 4. 提取主频信息
def get_dominant_frequency(signal, fs=1.0):
n = len(signal)
fft_result = np.fft.fft(signal)
freqs = np.fft.fftfreq(n, d=1/fs)
magnitude = np.abs(fft_result)
dominant_freq = freqs[np.argmax(magnitude[1:n//2]) + 1]
return abs(dominant_freq)
def plot_fft(signal, fs=1.0, title="FFT"):
n = len(signal)
fft_result = np.fft.fft(signal)
freqs = np.fft.fftfreq(n, d=1/fs)
magnitude = np.abs(fft_result)
fig = plt.figure(figsize=(10, 4))
plt.plot(freqs[:n//2], magnitude[:n//2], label='幅值')
plt.title(title, fontsize=14)
plt.xlabel('频率(cycles per cm⁻¹)', fontsize=12)
plt.ylabel('幅值', fontsize=12)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.close()
return fig
# 5. 数据保存函数
def save_peaks_valleys(save_path, wavenumber, reflectivity, peaks, valleys):
peaks_data = pd.DataFrame({
'波数': wavenumber[peaks],
'反射率': reflectivity[peaks]
})
valleys_data = pd.DataFrame({
'波数': wavenumber[valleys],
'反射率': reflectivity[valleys]
})
peaks_data.to_csv(os.path.join(save_path, '波峰.csv'), index=False)
valleys_data.to_csv(os.path.join(save_path, '波谷.csv'), index=False)
def save_reflectivity_data(save_path, wavenumber, reflectivity):
df = pd.DataFrame({
'波数': wavenumber,
'反射率': reflectivity
})
df.to_csv(os.path.join(save_path, '去噪后反射率数据.csv'), index=False)
def save_dominant_frequency(save_path, freq):
with open(os.path.join(save_path, '主频信息.txt'), 'w', encoding='utf-8') as f:
f.write(f"信号主频(频域): {freq:.4f} cycles per cm⁻¹\n")
def save_data_stats(save_path, file_name, negative_num, over_one_num):
with open(os.path.join(save_path, '原始数据统计.txt'), 'w', encoding='utf-8') as f:
f.write(f"文件名: {file_name}\n")
f.write(f"反射率小于0的个数: {negative_num}\n")
f.write(f"反射率大于100的个数: {over_one_num}\n")
def save_log(save_path, log_content):
with open(os.path.join(save_path, '日志.txt'), 'w', encoding='utf-8') as f:
f.write(log_content)
# 新增函数:波数转波长
def wavenumber_to_wavelength(wavenumber):
return 1e4 / wavenumber # cm⁻¹ -> μm
# 新增函数:折射率经验公式
def refractive_index(wavelength):
term1 = (0.20075 * wavelength ** 2) / (wavelength ** 2 + 12.07224)
term2 = (5.54861 * wavelength ** 2) / (wavelength ** 2 - 0.02641)
term3 = (35.65066 * wavelength ** 2) / (wavelength ** 2 - 1268.24708)
return np.sqrt(1 + term1 + term2 + term3)
# 复折射率
def refractive_index_complex(wavelength, reflectance):
n0 = refractive_index(wavelength)
R = reflectance / 100.0
numerator = R * (n0 + 1) ** 2 - (n0- 1) ** 2
denominator = 1 - R
if denominator <= 0 or numerator < 0:
kappa = 0.0
else:
kappa = np.sqrt(numerator / denominator)
return complex(n0, kappa)
# 厚度计算
def calculate_thickness_complex(P, wavelength, theta1_deg, reflectance):
theta1 = np.deg2rad(theta1_deg)
sin_theta1 = np.sin(theta1)
n_complex = refractive_index_complex(wavelength, reflectance)
n0 = n_complex.real
kappa = n_complex.imag
# 计算 n^2
n_squared = n_complex ** 2
# 计算 n^2 - sin^2(theta1)
expr = n_squared - sin_theta1 ** 2
# 计算 sqrt(n^2 - sin^2(theta1))
sqrt_expr = np.sqrt(expr)
# 厚度公式:d = (P - 0.5) * λ * sqrt(n^2 - sin^2θ1) / (2 * n^2)
d = ((P - 0.5) * wavelength * sqrt_expr) / (2 * n_squared)
return d.real # 返回厚度的实部作为最终结果
# 干涉周期计算(中位数 + 固定种子 RANSAC)
def compute_interference_period_weighted(peaks_wavenumber, valleys_wavenumber):
if len(peaks_wavenumber) < 2 or len(valleys_wavenumber) < 2:
return None
peak_diffs = np.diff(peaks_wavenumber)
valley_diffs = np.diff(valleys_wavenumber)
all_diffs = np.concatenate([peak_diffs, valley_diffs])
X = np.arange(len(all_diffs)).reshape(-1, 1)
y = all_diffs
ransac = RANSACRegressor(random_state=42)
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
P = np.median(all_diffs[inlier_mask]) # 使用中位数
return P
# 厚度计算并保存
def compute_and_save_thickness(save_path, peaks_wavenumber, valleys_wavenumber, reflectivity_denoised, theta1_deg):
if len(peaks_wavenumber) < 2 or len(valleys_wavenumber) < 2:
print("无法计算干涉周期,波峰/波谷数量不足")
return None
P = compute_interference_period_weighted(peaks_wavenumber, valleys_wavenumber)
if P is None:
print("无法提取干涉周期")
return None
all_wavenumbers = np.concatenate([peaks_wavenumber, valleys_wavenumber])
mean_wavenumber = np.mean(all_wavenumbers)
wavelength = wavenumber_to_wavelength(mean_wavenumber)
mean_reflectance = np.mean(reflectivity_denoised)
thickness = calculate_thickness_complex(P, wavelength, theta1_deg, mean_reflectance)
result_file = os.path.join(save_path, '厚度计算结果.txt')
with open(result_file, 'w', encoding='utf-8') as f:
f.write(f"入射角: {theta1_deg}°\n")
f.write(f"干涉周期: {P:.4f} cm⁻¹\n")
f.write(f"平均波长: {wavelength:.4f} μm\n")
f.write(f"平均反射率: {mean_reflectance:.2f}%\n")
f.write(f"计算厚度: {thickness:.4f} μm\n")
f.write(f"公式:d = (P - 0.5) * λ * sqrt(n₀² - sin²θ1) / (2n₀²) * (1 + (κ/n₀)²)\n")
print(f"\n厚度计算结果已保存至:{result_file}")
print(f"厚度:{thickness:.4f} μm")
return thickness
# 厚度一致性分析
def thickness_consistency_analysis(thickness_10, thickness_15):
avg_thickness = (thickness_10 + thickness_15) / 2
relative_error = abs(thickness_10 - thickness_15) / avg_thickness * 100
print(f"\n【厚度一致性分析】")
print(f"附件1厚度估计:{thickness_10:.4f} μm")
print(f"附件2厚度估计:{thickness_15:.4f} μm")
print(f"相对误差:{relative_error:.2f}%")
if relative_error < 1:
print("厚度估计稳定,误差 < 1%")
else:
print("厚度估计不稳定,误差 > 1%,可能存在系统误差")
return relative_error
# 绘图函数
def plot_thickness_distribution(save_path, wavenumbers, reflectivity, thickness_values):
plt.figure(figsize=(12, 6))
plt.plot(wavenumbers, reflectivity, label='反射率', color='blue')
min_len = min(len(wavenumbers), len(thickness_values))
plt.plot(wavenumbers[:min_len], thickness_values[:min_len], label='局部厚度估计', color='red', linestyle='--')
plt.title('反射率与局部厚度估计分布')
plt.xlabel('波数 (cm⁻¹)')
plt.ylabel('反射率 (%) / 厚度 (μm)')
plt.legend()
plt.grid(True)
plt.gca().invert_xaxis()
save_plot(save_path, '厚度分布图.png')
plt.close()
def plot_thickness_comparison(save_path, angles, thicknesses):
plt.figure(figsize=(8, 6))
plt.bar([str(a) + '°' for a in angles], thicknesses, color=['blue', 'green'], alpha=0.7)
plt.title('不同入射角下的厚度估计')
plt.xlabel('入射角')
plt.ylabel('厚度 (μm)')
plt.grid(True)
save_plot(save_path, '不同入射角厚度图.png')
plt.close()
def plot_interference_period_distribution(save_path, P_values):
plt.figure(figsize=(12, 6))
plt.hist(P_values, bins=20, edgecolor='black', alpha=0.7)
plt.title('干涉周期 $P$ 分布直方图')
plt.xlabel('干涉周期 (cm⁻¹)')
plt.ylabel('频次')
plt.grid(True)
save_plot(save_path, '干涉周期分布图.png')
plt.close()
# 新增函数:绘制厚度随入射角变化曲线
def plot_thickness_vs_angle(save_path, angles, thicknesses):
plt.figure(figsize=(10, 6))
plt.plot(angles, thicknesses, marker='o', linestyle='-', color='blue', label='厚度')
plt.title('薄膜厚度随入射角变化曲线')
plt.xlabel('入射角 (°)')
plt.ylabel('厚度 (μm)')
plt.grid(True)
plt.legend()
save_plot(save_path, '厚度随入射角变化曲线.png')
plt.close()
# 新增函数:保存厚度随角度变化数据
def save_thickness_vs_angle(save_path, angles, thicknesses):
df = pd.DataFrame({
'入射角': angles,
'厚度': thicknesses
})
df.to_csv(os.path.join(save_path, '厚度随入射角变化数据.csv'), index=False)
# 主程序
# 主程序中修改部分
if __name__ == "__main__":
result_folder = create_result_folder("results")
file_paths = [
os.path.join(path, "附件1.xlsx"),
os.path.join(path, "附件2.xlsx")
]
error_report = {}
thickness_results = {}
for file in file_paths:
log_output = io.StringIO()
console = sys.stdout
sys.stdout = log_output
print(f"\n对于文件:{os.path.basename(file)}")
wavenumber, reflectivity = get_data(file)
if wavenumber is not None and reflectivity is not None:
file_name = os.path.splitext(os.path.basename(file))[0]
save_path = os.path.join(result_folder, file_name)
create_result_folder(save_path)
# 数据预处理
wavenumber_clean, reflectivity_clean = outlier_removal(wavenumber, reflectivity)
reflectivity_smooth = apply_savgol_filter(reflectivity_clean)
reflectivity_denoised = wavelet_denoise(reflectivity_smooth, mode_ext='constant')
reflectivity_denoised = equal_length(reflectivity_denoised, len(wavenumber_clean))
reflectivity_normalized = normalize(reflectivity_denoised)
# 波峰波谷检测(使用 find_peaks)
peaks = find_peaks_scipy(reflectivity_denoised, prominence=0.01, distance=20)
valleys = find_valleys_scipy(reflectivity_denoised, prominence=0.01, distance=20)
# 获取入射角
if "附件1" in file_name:
theta1_deg = 10
elif "附件2" in file_name:
theta1_deg = 15
else:
theta1_deg = 0
# 厚度计算
thickness = compute_and_save_thickness(save_path, wavenumber_clean[peaks], wavenumber_clean[valleys],
reflectivity_denoised, theta1_deg)
if thickness is not None:
thickness_results[file_name] = thickness
# 【新增】连续入射角扫描与绘图
if len(peaks) > 2 and len(valleys) > 2:
P = compute_interference_period_weighted(wavenumber_clean[peaks], wavenumber_clean[valleys])
if P is not None:
all_wavenumbers = np.concatenate([peaks, valleys])
mean_wavenumber = np.mean(all_wavenumbers)
wavelength = wavenumber_to_wavelength(mean_wavenumber)
mean_reflectance = np.mean(reflectivity_denoised)
# 设置入射角范围(10° ~ 15°,步长0.5°)
angles = np.arange(10, 15.1, 0.5)
thicknesses = []
for angle in angles:
t = calculate_thickness_complex(P, wavelength, angle, mean_reflectance)
thicknesses.append(t)
# 保存并绘图
save_thickness_vs_angle(save_path, angles, thicknesses)
plot_thickness_vs_angle(save_path, angles, thicknesses)
# 局部厚度计算(保持原逻辑不变)
window_size = 50
step = 10
thickness_values = []
for i in range(0, len(wavenumber_clean) - window_size, step):
w = wavenumber_clean[i:i + window_size]
r = reflectivity_denoised[i:i + window_size]
p = compute_interference_period_weighted(w, r)
if p is not None:
wl = wavenumber_to_wavelength(np.mean(w))
t = calculate_thickness_complex(p, wl, 10, np.mean(r))
thickness_values.append(t)
else:
thickness_values.append(np.nan)
# 绘图
if len(thickness_values) > 0:
min_len = min(len(wavenumber_clean), len(thickness_values))
plot_thickness_distribution(
result_folder,
wavenumber_clean[:min_len],
reflectivity_denoised[:min_len],
np.array(thickness_values)[:min_len]
)
# 收集误差数据
error_report[file_name] = {
"去噪前后信号MSE": np.mean((reflectivity_clean - reflectivity_denoised) ** 2),
"波峰检测数量": int(len(peaks)),
"波谷检测数量": int(len(valleys))
}
# 恢复标准输出并保存日志
sys.stdout = console
save_log(save_path, log_output.getvalue())
# 保存误差分析报告(保持原逻辑不变)
with open(os.path.join(result_folder, "误差分析报告.json"), "w", encoding="utf-8") as f:
json.dump(error_report, f, ensure_ascii=False, indent=4)
# 厚度一致性分析与绘图(保持原逻辑不变)
if "附件1" in thickness_results and "附件2" in thickness_results:
thickness_10 = thickness_results["附件1"]
thickness_15 = thickness_results["附件2"]
thickness_consistency_analysis(thickness_10, thickness_15)
plot_thickness_comparison(result_folder, [10, 15], [thickness_10, thickness_15])
# 干涉周期分布图(保持原逻辑不变)
P_values = []
for file in file_paths:
file_name = os.path.splitext(os.path.basename(file))[0]
wavenumber, reflectivity = get_data(file)
wavenumber_clean, reflectivity_clean = outlier_removal(wavenumber, reflectivity)
reflectivity_denoised = wavelet_denoise(reflectivity_clean)
peaks = find_peaks_scipy(reflectivity_denoised)
valleys = find_valleys_scipy(reflectivity_denoised)
P = compute_interference_period_weighted(wavenumber_clean[peaks], wavenumber_clean[valleys])
if P is not None:
P_values.append(P)
if len(P_values) > 0:
plot_interference_period_distribution(result_folder, P_values)