随机种子设置42的合理性以及依据
'''
一、数据预处理
1、剔除异常值(阈值法,去除反射率小于0或大于1的数据),防止异常值干扰后续计算
2、使用Savitzky-Golay滤波器平滑数据
Savitzky-Golay 滤波器窗口大小设置为11,适中,平滑信号,保留干涉条纹特征
多项式拟合阶数设置为3,适中阶数,保留高频特征,避免过拟合
3、使用小波变换进行去噪(适用于非平稳信号)
小波去噪使用的基函数类型db4,适合信号去噪,保持细节
小波分解层数为1,仅一层分解,防止过度去噪丢失干涉特征
保留干涉特征,去除高频噪声
4、插值统一信号长度,target_length=自适应,保证不同文件数据长度一致,便于后续处理
5、将反射率归一化至[0,1],避免量纲不同。使用Min-Max方法
二、特征提取
1、波峰波谷检测,使用 scipy.signal.find_peaks 检测干涉条纹
控制波峰/波谷检测的显著性阈值prominence=0.01,提高灵敏度,避免漏检干涉条纹
设置相邻波峰/波谷最小间距distance=20,防止检测过于密集,提升稳定性
2、使用快速傅里叶变换(FFT)来识别干涉信号的主频,从而提取周期信息.将一个时域信号(如波数-反射率曲线)转换为频域信号(即信号中包含的频率成分)
fs(采样频率)设为1.0(Hz),表示以单位间隔采样
3、干涉周期P:RANSAC回归(np.random.seed(42),保持随机种子为42以确保数据的一致性) +中位数。剔除异常值,提取稳定周期,抗异常值能力强,鲁棒性高
三、物理关系
...
四、结果输出与分析
!!将scipy.signal.find_peaks 检测干涉条纹与导数法形成对比,最终选择scipy.signal.find_peaks
使用 find_peaks 函数检测波峰(极大值)和波谷(极小值)。准确性依赖于参数设置
对反射率数据使用 find_peaks 检测波峰时,设置高度阈值为平均值的80%,并设置最小距离为20。
对反射率的负值使用 find_peaks 检测波谷,即极小值。
使用数学原理方法(导数法)。抗噪声能力差
'''
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pywt
from scipy.signal import savgol_filter,find_peaks
from scipy.interpolate import interp1d
import sys
import io
import json
from sklearn.linear_model import RANSACRegressor
plt.rcParams['font.sans-serif']=['Microsoft YaHei']
plt.rcParams['axes.unicode_minus']=False
#设置随机种子
np.random.seed(42)
def create_result_folder(folder_name="results"):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
return folder_name
def save_plot(save_path,filename,fig=None):
if fig is not None:
fig.savefig(os.path.join(save_path,filename),dpi=300,bbox_inches='tight')
plt.close(fig)
else:
plt.savefig(os.path.join(save_path,filename), dpi=300,bbox_inches='tight')
plt.close()
path=os.path.dirname(os.path.abspath(__file__))
#异常值剔除
def get_data(file_path):
file_name=os.path.basename(file_path)
data=pd.read_excel(file_path,header=0)
if data.isna().any().any():
print(f"{file_name}存在空值:")
print(data.isna().sum())
else:
print(f"{file_name}无空值")
wavenumber=data.iloc[:, 0].values
reflectivity=data.iloc[:, 1].values
negative_num=np.sum(reflectivity<0)
over_one_num=np.sum(reflectivity>100)
print(f"{file_name}反射率小于0的个数:{negative_num}")
print(f"{file_name}反射率大于100的个数:{over_one_num}")
return wavenumber,reflectivity
def outlier_removal(wavenumber,reflectivity):
valid_value=(reflectivity>=0)&(reflectivity<=100)
return wavenumber[valid_value],reflectivity[valid_value]
#数据平滑与去噪
def apply_savgol_filter(reflectivity,window_length=11,polyorder=3):
return savgol_filter(reflectivity,window_length=window_length,polyorder=polyorder)
def wavelet_denoise(signal,wavelet='db4',level=1,mode_ext='constant'):
coeffs=pywt.wavedec(signal,wavelet,level=level,mode=mode_ext)
threshold=np.std(coeffs[-level])*np.sqrt(2*np.log(len(signal)))
coeffs=[pywt.threshold(c,threshold,mode='soft') for c in coeffs]
reconstructed=pywt.waverec(coeffs,wavelet,mode=mode_ext)
return reconstructed
def equal_length(signal,target_length):
x=np.linspace(0,1,len(signal))
x_target=np.linspace(0,1,target_length)
f=interp1d(x,signal,kind='linear')
return f(x_target)
def normalize(reflectivity):
return reflectivity/100.0
#波峰波谷检测
def find_peaks_scipy(signal,prominence=1.0,distance=10):
peaks, _ =find_peaks(signal,prominence=prominence,distance=distance)
return peaks
def find_valleys_scipy(signal,prominence=1.0,distance=10):
valleys, _ =find_peaks(-signal,prominence=prominence,distance=distance)
return valleys
#提取主频信息
def get_dominant_frequency(signal,fs=1.0):
n=len(signal)
fft_result=np.fft.fft(signal)
freqs=np.fft.fftfreq(n,d=1/fs)
magnitude=np.abs(fft_result)
dominant_freq=freqs[np.argmax(magnitude[1:n//2])+1]
return abs(dominant_freq)
def plot_fft(signal,fs=1.0,title="FFT"):
n=len(signal)
fft_result=np.fft.fft(signal)
freqs=np.fft.fftfreq(n,d=1/fs)
magnitude=np.abs(fft_result)
fig=plt.figure(figsize=(10,4))
plt.plot(freqs[:n//2],magnitude[:n//2],label='幅值')
plt.title(title,fontsize=14)
plt.xlabel('频率(cycles per cm⁻¹)',fontsize=12)
plt.ylabel('幅值',fontsize=12)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.close()
return fig
def save_peaks_valleys(save_path,wavenumber,reflectivity,peaks,valleys):
peaks_data=pd.DataFrame({
'波数': wavenumber[peaks],
'反射率': reflectivity[peaks]
})
valleys_data=pd.DataFrame({
'波数': wavenumber[valleys],
'反射率': reflectivity[valleys]
})
peaks_data.to_csv(os.path.join(save_path,'波峰.csv'),index=False)
valleys_data.to_csv(os.path.join(save_path,'波谷.csv'),index=False)
def save_reflectivity_data(save_path,wavenumber,reflectivity):
df=pd.DataFrame({
'波数': wavenumber,
'反射率': reflectivity
})
df.to_csv(os.path.join(save_path,'去噪后反射率数据.csv'),index=False)
def save_dominant_frequency(save_path,freq):
with open(os.path.join(save_path,'主频信息.txt'),'w',encoding='utf-8') as f:
f.write(f"信号主频(频域):{freq:.4f} cycles per cm⁻¹\n")
def save_data_stats(save_path, file_name, negative_num, over_one_num):
with open(os.path.join(save_path, '原始数据统计.txt'), 'w', encoding='utf-8') as f:
f.write(f"文件名: {file_name}\n")
f.write(f"反射率小于0的个数: {negative_num}\n")
f.write(f"反射率大于100的个数: {over_one_num}\n")
def save_log(save_path,log_content):
with open(os.path.join(save_path,'日志.txt'),'w',encoding='utf-8') as f:
f.write(log_content)
#将波数转化为波长
def wavenumber_to_wavelength(wavenumber):
return 1e4/wavenumber #单位为μm
#折射率经验公式
def refractive_index(wavelength):
term1=(0.20075*wavelength**2)/(wavelength**2+12.07224)
term2=(5.54861*wavelength**2)/(wavelength**2-0.02641)
term3=(35.65066*wavelength**2)/(wavelength**2-1268.24708)
return np.sqrt(1+term1+term2+term3)
#复折射率
def refractive_index_complex(wavelength,reflectance):
n0=refractive_index(wavelength)
R=reflectance/100.0
numerator=R*(n0+1)**2-(n0-1)**2
denominator=1-R
if denominator<=0 or numerator<0:
kappa=0.0
else:
kappa=np.sqrt(numerator/denominator)
return complex(n0,kappa)
#厚度
def calculate_thickness_complex(P,wavelength,theta1_deg,reflectance):
theta1=np.deg2rad(theta1_deg)
sin_theta1=np.sin(theta1)
n_complex=refractive_index_complex(wavelength,reflectance)
n0=n_complex.real
kappa=n_complex.imag
n_squared=n_complex**2
expr=n_squared-sin_theta1**2
sqrt_expr=np.sqrt(expr)
d=((P-0.5)*wavelength*sqrt_expr)/(2*n_squared)
return d.real #返回厚度的实部作为最终结果
#干涉周期
def compute_interference_period_weighted(peaks_wavenumber,valleys_wavenumber):
if len(peaks_wavenumber)<2 or len(valleys_wavenumber)<2:
return None
peak_diffs=np.diff(peaks_wavenumber)
valley_diffs=np.diff(valleys_wavenumber)
all_diffs=np.concatenate([peak_diffs,valley_diffs])
X=np.arange(len(all_diffs)).reshape(-1,1)
y=all_diffs
ransac=RANSACRegressor(random_state=42)
ransac.fit(X,y)
inlier_mask=ransac.inlier_mask_
P=np.median(all_diffs[inlier_mask])
return P
#厚度
def compute_and_save_thickness(save_path,peaks_wavenumber,valleys_wavenumber,reflectivity_denoised,theta1_deg):
if len(peaks_wavenumber)<2 or len(valleys_wavenumber)<2:
print("无法计算干涉周期,波峰/波谷数量不足")
return None
P=compute_interference_period_weighted(peaks_wavenumber,valleys_wavenumber)
if P is None:
print("无法提取干涉周期")
return None
all_wavenumbers=np.concatenate([peaks_wavenumber,valleys_wavenumber])
mean_wavenumber=np.mean(all_wavenumbers)
wavelength=wavenumber_to_wavelength(mean_wavenumber)
mean_reflectance=np.mean(reflectivity_denoised)
thickness=calculate_thickness_complex(P,wavelength,theta1_deg,mean_reflectance)
result_file=os.path.join(save_path,'厚度计算结果.txt')
with open(result_file,'w',encoding='utf-8') as f:
f.write(f"入射角: {theta1_deg}°\n")
f.write(f"干涉周期: {P:.4f} cm⁻¹\n")
f.write(f"平均波长: {wavelength:.4f} μm\n")
f.write(f"平均反射率: {mean_reflectance:.2f}%\n")
f.write(f"计算厚度: {thickness:.4f} μm\n")
f.write(f"公式:d = (P - 0.5) * λ * sqrt(n₀² - sin²θ1) / (2n₀²) * (1 + (κ/n₀)²)\n")
print(f"\n厚度计算结果已保存至:{result_file}")
print(f"厚度:{thickness:.4f} μm")
return thickness
#厚度一致性分析
def thickness_consistency_analysis(thickness_10,thickness_15):
avg_thickness=(thickness_10+thickness_15)/2
relative_error=abs(thickness_10-thickness_15)/avg_thickness*100
print(f"\n【厚度一致性分析】")
print(f"附件1厚度估计:{thickness_10:.4f} μm")
print(f"附件2厚度估计:{thickness_15:.4f} μm")
print(f"相对误差:{relative_error:.2f}%")
if relative_error<1:
print("厚度估计稳定,误差 < 1%")
else:
print("厚度估计不稳定,误差 > 1%,可能存在系统误差")
return relative_error
def plot_thickness_distribution(save_path,wavenumbers,reflectivity,thickness_values):
plt.figure(figsize=(12,6))
plt.plot(wavenumbers,reflectivity,label='反射率',color='blue')
min_len=min(len(wavenumbers),len(thickness_values))
plt.plot(wavenumbers[:min_len],thickness_values[:min_len],label='局部厚度估计',color='red',linestyle='--')
plt.title('反射率与局部厚度估计分布')
plt.xlabel('波数 (cm⁻¹)')
plt.ylabel('反射率 (%) / 厚度 (μm)')
plt.legend()
plt.grid(True)
plt.gca().invert_xaxis()
save_plot(save_path,'厚度分布图.png')
plt.close()
def plot_thickness_comparison(save_path,angles,thicknesses):
plt.figure(figsize=(8,6))
plt.bar([str(a) +'°'for a in angles],thicknesses,color=['blue','green'],alpha=0.7)
plt.title('不同入射角下的厚度估计')
plt.xlabel('入射角')
plt.ylabel('厚度 (μm)')
plt.grid(True)
save_plot(save_path,'不同入射角厚度图.png')
plt.close()
def plot_interference_period_distribution(save_path,P_values):
plt.figure(figsize=(12, 6))
plt.hist(P_values,bins=20,edgecolor='black',alpha=0.7)
plt.title('干涉周期 $P$ 分布直方图')
plt.xlabel('干涉周期 (cm⁻¹)')
plt.ylabel('频次')
plt.grid(True)
save_plot(save_path,'干涉周期分布图.png')
plt.close()
#厚度随入射角变化
def plot_thickness_vs_angle(save_path,angles,thicknesses):
plt.figure(figsize=(10,6))
plt.plot(angles,thicknesses,marker='o',linestyle='-',color='blue',label='厚度')
plt.title('薄膜厚度随入射角变化曲线')
plt.xlabel('入射角 (°)')
plt.ylabel('厚度 (μm)')
plt.grid(True)
plt.legend()
save_plot(save_path,'厚度随入射角变化曲线.png')
plt.close()
def save_thickness_vs_angle(save_path,angles,thicknesses):
df = pd.DataFrame({
'入射角': angles,
'厚度': thicknesses
})
df.to_csv(os.path.join(save_path,'厚度随入射角变化数据.csv'),index=False)
# 主程序
if __name__ =="__main__":
result_folder=create_result_folder("results")
file_paths=[
os.path.join(path,"附件1.xlsx"),
os.path.join(path,"附件2.xlsx")
]
error_report={}
thickness_results={}
for file in file_paths:
log_output=io.StringIO()
console=sys.stdout
sys.stdout=log_output
print(f"\n对于文件:{os.path.basename(file)}")
wavenumber,reflectivity=get_data(file)
if wavenumber is not None and reflectivity is not None:
file_name=os.path.splitext(os.path.basename(file))[0]
save_path=os.path.join(result_folder,file_name)
create_result_folder(save_path)
#数据预处理
wavenumber_clean,reflectivity_clean=outlier_removal(wavenumber,reflectivity)
reflectivity_smooth=apply_savgol_filter(reflectivity_clean)
reflectivity_denoised=wavelet_denoise(reflectivity_smooth,mode_ext='constant')
reflectivity_denoised=equal_length(reflectivity_denoised,len(wavenumber_clean))
reflectivity_normalized=normalize(reflectivity_denoised)
#波峰波谷检测
peaks=find_peaks_scipy(reflectivity_denoised,prominence=0.01,distance=20)
valleys=find_valleys_scipy(reflectivity_denoised,prominence=0.01,distance=20)
if "附件1" in file_name:
theta1_deg=10
elif "附件2" in file_name:
theta1_deg=15
else:
theta1_deg=0
#厚度
thickness=compute_and_save_thickness(save_path,wavenumber_clean[peaks],wavenumber_clean[valleys],
reflectivity_denoised,theta1_deg)
if thickness is not None:
thickness_results[file_name]=thickness
#入射角变化
if len(peaks)>2 and len(valleys)>2:
P=compute_interference_period_weighted(wavenumber_clean[peaks],wavenumber_clean[valleys])
if P is not None:
all_wavenumbers=np.concatenate([peaks,valleys])
mean_wavenumber=np.mean(all_wavenumbers)
wavelength=wavenumber_to_wavelength(mean_wavenumber)
mean_reflectance=np.mean(reflectivity_denoised)
angles=np.arange(10,15.1,0.5)
thicknesses=[]
for angle in angles:
t=calculate_thickness_complex(P,wavelength,angle,mean_reflectance)
thicknesses.append(t)
save_thickness_vs_angle(save_path,angles,thicknesses)
plot_thickness_vs_angle(save_path,angles,thicknesses)
#局部厚度计算
window_size=50
step=10
thickness_values=[]
for i in range(0,len(wavenumber_clean)-window_size,step):
w=wavenumber_clean[i:i+window_size]
r=reflectivity_denoised[i:i +window_size]
p=compute_interference_period_weighted(w,r)
if p is not None:
wl=wavenumber_to_wavelength(np.mean(w))
t=calculate_thickness_complex(p,wl,10,np.mean(r))
thickness_values.append(t)
else:
thickness_values.append(np.nan)
if len(thickness_values)>0:
min_len=min(len(wavenumber_clean),len(thickness_values))
plot_thickness_distribution(
result_folder,
wavenumber_clean[:min_len],
reflectivity_denoised[:min_len],
np.array(thickness_values)[:min_len]
)
#误差
error_report[file_name] = {
"去噪前后信号MSE":np.mean((reflectivity_clean-reflectivity_denoised)**2),
"波峰检测数量":int(len(peaks)),
"波谷检测数量":int(len(valleys))
}
sys.stdout=console
save_log(save_path,log_output.getvalue())
with open(os.path.join(result_folder,"误差分析报告.json"),"w",encoding="utf-8") as f:
json.dump(error_report,f,ensure_ascii=False,indent=4)
#厚度一致性分析
if "附件1" in thickness_results and "附件2" in thickness_results:
thickness_10=thickness_results["附件1"]
thickness_15=thickness_results["附件2"]
thickness_consistency_analysis(thickness_10,thickness_15)
plot_thickness_comparison(result_folder,[10,15],[thickness_10,thickness_15])
#干涉周期分布图
P_values=[]
for file in file_paths:
file_name=os.path.splitext(os.path.basename(file))[0]
wavenumber,reflectivity=get_data(file)
wavenumber_clean,reflectivity_clean=outlier_removal(wavenumber,reflectivity)
reflectivity_denoised=wavelet_denoise(reflectivity_clean)
peaks=find_peaks_scipy(reflectivity_denoised)
valleys=find_valleys_scipy(reflectivity_denoised)
P=compute_interference_period_weighted(wavenumber_clean[peaks],wavenumber_clean[valleys])
if P is not None:
P_values.append(P)
if len(P_values)>0:
plot_interference_period_distribution(result_folder,P_values)
最新发布