请分析并解析以下代码的执行过程和作用,输出的解释文字控制在500字以内
import os
import sys
import pandas as pd
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import netCDF4 as nc
# 字体加载
from matplotlib import font_manager, rcParams
font_path = r"./times+simsun.ttf"
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path)
# 字体设置
rcParams['font.family'] = 'sans-serif' # 使用字体中的无衬线体
rcParams['font.sans-serif'] = prop.get_name() # 根据名称设置字体
rcParams['font.size'] = 10 # 设置字体大小
rcParams['axes.unicode_minus'] = False # 使坐标轴刻度标签正常显示正负号
CSTHOME = os.environ.get('CSTHOME')
ENVFILE = os.path.join(os.environ.get('CSTHOME'), 'envs')
CSTFILE = os.path.join(os.environ.get('CSTHOME'), 'runs/model/configuration/speciesConstrained.config')
def get_ymd():
df_env = pd.read_csv(ENVFILE, header=None, sep='\\s+')
yyyy = round(df_env[3][df_env[1] == 'bUYY'].values[0])
mm = round(df_env[3][df_env[1] == 'bUMM'].values[0])
dd = round(df_env[3][df_env[1] == 'bUDD'].values[0])
dates = datetime(yyyy, mm, dd) + timedelta(days=1) # 修正月天数超过当月最大天数的bug
return dates.year, dates.month, dates.day
def main_entry(tasks):
run_id = os.path.basename(CSTHOME)
yyyy, mm, dd = get_ymd()
df_result = pd.DataFrame(
columns=['BYMD(BJT)', 'CH4(ppb)', 'CO(ppb)', 'O3(ppb)', 'NO2(ppb)', 'NO(ppb)', 'OH(ppb)', 'HO2(ppb)',
'CH3O2(ppb)', 'O1D(ppb)', 'NO3(ppb)', 'taskId', 'caseName'])
for task_name in tasks:
task_dir = os.path.join(CSTHOME, task_name)
cases = [f for f in os.listdir(task_dir) if os.path.isdir(os.path.join(task_dir, f))]
if task_name == 'rir':
rir_cal(task_dir, cases, run_id)
if task_name == 'ekma':
ekma_plot(task_dir, cases, run_id)
for case in cases:
flag = f'{case}.OK'
if os.path.exists(os.path.join(task_dir, flag)):
file = os.path.join(task_dir, case + os.sep + 'output' + os.sep + 'speciesConcentrations.output')
if os.stat(file).st_size < 1.0:
continue
df = pd.read_csv(file, sep='\\s+')
if df.empty:
continue
df['t'] = df['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16))) # 修正时间超过24小时的bug
df[df.columns[1:]] = df[df.columns[1:]].apply(lambda x: x / 2.46e10)
df['taskId'] = run_id
df['caseName'] = case
df.columns = df_result.columns
df_result = pd.concat([df_result, df])
df_result.to_csv(os.path.join(os.environ.get('COMBINE'), run_id + '_concentrations.csv'), index=False, encoding='gbk')
def rir_cal(rir_dir, rir_cases, run_id):
df_result = pd.DataFrame(columns=['BYMD(BJT)', 'PROD(ppb/h)', 'LOSS(ppb/h)', 'NET(ppb/h)', 'RIR', 'taskId', 'caseName'])
df_t = pd.DataFrame()
df_each_reac = pd.DataFrame()
rir_cases.append('base')
for case in rir_cases:
if case == 'base':
rir_dir = os.path.join(os.path.dirname(rir_dir), case)
flag = f'{case}.OK'
if os.path.exists(os.path.join(rir_dir, flag)):
pfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'productionRates.output')
lfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'lossRates.output')
efile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'environmentVariables.output')
sfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'speciesConcentrations.output')
if os.stat(pfile).st_size < 1.0:
continue
if os.stat(lfile).st_size < 1.0:
continue
if os.stat(efile).st_size < 1.0:
continue
if os.stat(sfile).st_size < 1.0:
continue
# production: (HO2+NO) + Σ(RO2+NO) reaction_num: 27,
df_p = pd.read_csv(pfile, sep='\\s+')
df_l = pd.read_csv(lfile, sep='\\s+')
df_e = pd.read_csv(efile, sep='\\s+')
df_s = pd.read_csv(sfile, sep='\\s+')
df_p['rate'] = df_p['rate'].apply(lambda x: x * 3600 / 2.46e10)
df_l['rate'] = df_l['rate'].apply(lambda x: x * 3600 / 2.46e10)
df_p1 = df_p[df_p['reaction'].apply(lambda x: 'HO2+NO=' in x)]
df_p_all = pd.merge(left=df_e, right=df_s, on='t')
df_p_all = pd.merge(left=df_p_all, right=df_p1, left_on='t', right_on='time')
df_p_all['p'] = df_p_all['rate'] + 2.7 * 10 ** (-12) * np.exp(360 / df_p_all['TEMP']) * df_p_all['RO2'] * df_p_all['NO'] / 2.46e10 * 3600
df_p_all['HO2NO'] = df_p_all['rate'].values
df_p_all['RO2NO'] = 2.7 * 10 ** (-12) * np.exp(360 / df_p_all['TEMP']) * df_p_all['RO2'] * df_p_all['NO'] / 2.46e10 * 3600
# lossRates(2kNO3+VOCs[NO3][VOCs], kO3+VOCs[O3][VOCs])
df_sp = pd.read_csv(CSTFILE, header=None)
df_l_all = df_e[['t', 'TEMP']].copy()
df_l_all['L'] = 0 # 初始消耗速率
df_l_all['VOCsNO3'] = 0
df_l_all['VOCsO3'] = 0
for i in df_sp[0].values:
if i in ['CO', 'NO2']:
continue
df_vocs_o3 = df_l[df_l['reaction'].apply(lambda x: (x.startswith(f'{i}+O3=')) or (x.startswith(f'O3+{i}=')))]
df_vocs_no3 = df_l[df_l['reaction'].apply(lambda x: (x.startswith(f'{i}+NO3=')) or (x.startswith(f'NO3+{i}=')))]
if not df_vocs_o3.empty:
# print(f'{i}反应存在')
for num in df_vocs_o3['reactionNumber'].unique():
df_vocs_o3_each = df_vocs_o3[df_vocs_o3['reactionNumber'] == num]
df_vocs_o3_each = df_vocs_o3_each[df_vocs_o3_each['speciesName'] == df_vocs_o3_each['speciesName'].unique()[0]]
df_l_all['L'] = df_l_all['L'].values + df_vocs_o3_each['rate'].values
df_l_all['VOCsO3'] = df_l_all['VOCsO3'].values + df_vocs_o3_each['rate'].values
if not df_vocs_no3.empty:
print(f'{i}反应存在')
for num in df_vocs_no3['reactionNumber'].unique():
df_vocs_no3_each = df_vocs_no3[df_vocs_no3['reactionNumber'] == num]
df_vocs_no3_each = df_vocs_no3_each[df_vocs_no3_each['speciesName'] == df_vocs_no3_each['speciesName'].unique()[0]]
df_l_all['L'] = df_l_all['L'].values + 2 * df_vocs_no3_each['rate'].values
df_l_all['VOCsNO3'] = df_l_all['VOCsNO3'].values + 2 * df_vocs_no3_each['rate'].values
# TODO:若机理文件变化,需要确认这四个反应的reactionNumber(一般不随机理文件改变而改变)
# lossRates(O1D+H2O:15(EQUALS O1D=OH+OH), O3+OH:16, NO2+OH:25, HO2+O3:20, )
maps = {'16': 'O1DH2O', '17': 'O3OH', '26': 'NO2OH', '21': 'HO2O3'}
for num in [16, 17, 21, 26]:
df_l_tmp = df_l[df_l['reactionNumber'] == num]
df_lx = df_l_tmp[df_l_tmp['speciesName'] == df_l_tmp['speciesName'].unique()[0]]
df_l_all['L'] = df_l_all['L'].values + df_lx['rate'].values
df_l_all[maps[str(num)]] = df_lx['rate'].values
df_p_l = pd.merge(df_p_all, df_l_all[['t', 'L', 'VOCsNO3', 'VOCsO3', 'O1DH2O', 'O3OH', 'NO2OH', 'HO2O3']], on='t')
df_p_l['NET'] = df_p_l['p'] - df_p_l['L']
df_p_l['RIR'] = np.nan
df_p_l['taskId'] = run_id
df_p_l['caseName'] = case
df_t = pd.concat([df_t, df_p_l[['t', 'p', 'L', 'NET', 'RIR', 'taskId', 'caseName']]])
df_each_reac = pd.concat([df_each_reac, df_p_l[['t', 'HO2NO', 'RO2NO', 'VOCsNO3', 'VOCsO3', 'O1DH2O', 'O3OH', 'NO2OH', 'HO2O3', 'taskId', 'caseName']]])
yyyy, mm, dd = get_ymd()
df_t['t'] = df_t['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16)))
df_each_reac['t'] = df_each_reac['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16)))
df_t.columns = ['BYMD(BJT)', 'PROD(ppb/h)', 'LOSS(ppb/h)', 'NET(ppb/h)', 'RIR', 'taskId', 'caseName']
df_rir_base = df_t[df_t['caseName'] == 'base']
# 计算RIR,削减比例为10%
for case in df_t['caseName'].unique():
df_rir_case = df_t[df_t['caseName'] == case]
#df_rir_case['RIR'] = (df_rir_base['PROD(ppb/h)'] - df_rir_base['LOSS(ppb/h)'] - df_rir_case['PROD(ppb/h)'] + df_rir_case['LOSS(ppb/h)']) / (df_rir_base['PROD(ppb/h)'] - df_rir_base['LOSS(ppb/h)']) / 0.1
df_rir_case['RIR'] = (df_rir_base['PROD(ppb/h)'] - df_rir_case['PROD(ppb/h)']) / df_rir_base['PROD(ppb/h)'] / 0.1
df_result = pd.concat([df_result, df_rir_case])
df_result.to_csv(os.path.join(os.path.dirname(rir_dir), 'result' + os.sep + str(run_id) + '_rir.csv'), index=False, encoding='gbk')
df_each_reac.to_csv(os.path.join(os.path.dirname(rir_dir), 'result' + os.sep + str(run_id) + '_each_reac.csv'), index=False, encoding='gbk')
def ekma_plot(ekma_dir, ekma_cases, run_id):
yyyy, mm, dd = get_ymd()
df_combine = pd.DataFrame([])
max_o3 = []
matched_o3 = []
c_nox = []
ekma_cases_new = ekma_cases.copy()
for f in ekma_cases:
f_path = ekma_dir + '/' + f + '/output/speciesConcentrations.output'
if not os.path.exists(f_path):
ekma_cases_new.remove(f)
continue
if os.stat(f_path).st_size < 1.0:
ekma_cases_new.remove(f)
continue
df_i = pd.read_csv(f_path, sep='\\s+')
df_i = df_i.iloc[:24]
df_i['t'] = df_i['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16)))
df_i['hour'] = df_i['t'].apply(lambda x: x.hour)
df_i = df_i.groupby('hour').mean()
max_o3 = np.append(max_o3, df_i['O3'].max())
matched_o3 = np.append(matched_o3, df_i['O3'].iloc[12])
c_nox = np.append(c_nox, np.nanmean(df_i['NO2'].iloc[9:17])) # +np.nanmean(df_i[5].iloc[9:17]))) $ NO2+NO
r_nox = pd.DataFrame([i.split('_')[0] for i in ekma_cases_new])
r_nox = r_nox[0].str.extract('(\d+)') # 从NOX20%中提取数字,下面的VOCs同理
r_avocs = pd.DataFrame([i.split('_')[1] for i in ekma_cases_new])
r_avocs = r_avocs[0].str.extract('(\d+)')
max_o3 = list([i / 2.5e10 for i in max_o3])
matched_o3 = list([i / 2.5e10 for i in matched_o3])
r_nox_ = np.unique(r_nox)
r_avocs_ = np.unique(r_avocs)
rr_avocs = np.sort([int(i) for i in r_avocs_])
rr_nox = np.sort([int(i) for i in r_nox_])
max_o3_ = np.zeros((len(r_avocs_), len(r_nox_)))
for jj in range(0, len(r_nox_)):
for j in range(0, len(r_avocs_)):
a = [max_o3[i] for i in range(0, len(max_o3)) if (r_avocs.loc[i, 0] == r_avocs_[j]) & (r_nox.loc[i, 0] == r_nox_[jj])]
if len(a) == 1:
max_o3_[j, jj] = a[0]
else:
max_o3_[j, jj] = float('nan')
fekma = interpolate.interp2d([int(i) for i in r_avocs[0]], [int(i) for i in r_nox[0]], np.reshape(max_o3, (-1, 1)), kind='cubic')
xnew = np.arange(0, 200, 1)
ynew = np.arange(0, 300, 1)
znew = fekma(xnew, ynew)
# 实测数据
df_real = pd.read_csv(os.environ.get('DATANM'))
# 将从第 8 列开始的所有列转换为数值类型
df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].apply(pd.to_numeric, errors='coerce')
lim_nan = df_real[df_real.columns[7:]].apply(lambda x: x < 0)
df_real[lim_nan] = np.nan
df_real['Date'] = pd.to_datetime(df_real['开始时间'], format="%Y-%m-%d %H:%M:%S")
# df_real['Date'] = pd.to_datetime(df_real['开始时间'], format="%Y/%m/%d %H:%M")
df_real['YMD'] = df_real['Date'].apply(lambda x: x.strftime("%Y-%m"))
df_real = df_real.fillna(method='bfill')
# df_origin = df_origin.interpolate(method='linear')
#df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='linear')
df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='bfill')
df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='ffill')
df_map = pd.read_csv(os.environ.get('ALLMAP'), sep="\\s+")
df_cst = pd.read_csv(CSTFILE, header=None)
lim_cst1 = df_map['MCM'].apply(lambda x: x in df_cst[0].values)
lim_cst2 = df_map['MCM'].apply(lambda x: x not in ['NO', 'NO2', 'NOXINJ', 'CH4', 'PAN', 'CO'])
df_var_voc = df_map['VARIABLE'][lim_cst1 & lim_cst2]
df_var_nox = df_map['VARIABLE'][df_map['MCM'].apply(lambda x: x in ['NO', 'NO2'])]
datelist = df_real['YMD'].unique()
all_day_voc = 0
all_day_nox = 0
for i in datelist:
i = datelist[0]
df_day = df_real[df_real['YMD'] == i]
df_day_voc = df_day[df_var_voc.values]
df_day_nox = df_day[df_var_nox.values]
voc = df_day_voc.iloc[9:17].mean().sum()
nox = df_day_nox.iloc[9:17].mean().sum()
all_day_voc = all_day_voc + voc
all_day_nox = all_day_nox + nox
c_vocs = np.asarray([int(i) * all_day_voc / len(datelist) / 100 for i in r_avocs[0]])
c_nox_real = np.asarray([int(i) * all_day_nox / len(datelist) / 100 for i in r_nox[0]])
xnew = np.arange(0, 200, 1) * max(c_vocs) / 200
ynew = np.arange(0, 300, 1) * max(c_nox_real) / 300
Xnew, Ynew = np.meshgrid(xnew, ynew)
fig = plt.figure(figsize=(12, 10))
ax1 = fig.add_subplot(111)
levels = np.arange(0, znew.max() + 10, 10)
line = plt.contour(Xnew, Ynew, znew, levels=levels, cmap='jet', linewidths=1.5)
# 自定义格式化函数,为标签添加 'ppb' 单位
def fmt(x):
return f'{x:.0f} ppb'
# 添加等高线标签
plt.clabel(line, inline=True, fontsize=10,colors='gray', fmt=fmt)
df = pd.DataFrame(columns=['data', 'vocs', 'nox'])
for i in datelist:
df_day = df_real[df_real['YMD'] == i]
df_day_voc = df_day[df_var_voc.values]
df_day_nox = df_day[df_var_nox.values]
voc = df_day_voc.iloc[9:17].mean().sum()
nox = df_day_nox.iloc[9:17].mean().sum()
all_day_voc = all_day_voc + voc
plt.scatter(voc, nox, 20, color='k')
plt.text(voc * 1.01, nox * 1.01, i,fontsize=12)
df = df._append({'data': i, 'vocs': voc, 'nox': nox},ignore_index=True)
plt.xlabel('AVOC/ ppb', fontsize=14)
plt.ylabel('NOx/ ppb', fontsize=14)
fig.text(0.22, 0.05, f'模型:XIANEMC1011 制图:XIANEMC1011', fontsize=10, color='grey',
alpha=1, ha='center', va='center', rotation=0)
plt.savefig(os.path.join(os.environ.get('COMBINE'), run_id + '_ekma.png'), dpi=128, bbox_inches='tight')
# 创建NetCDF文件
Path = os.path.join(os.environ.get('COMBINE'))
nc_file = nc.Dataset(f'{Path}/{run_id}-ekma.nc', 'w', format='NETCDF4')
# 创建维度
nc_file.createDimension('vocs', len(xnew))
nc_file.createDimension('nox', len(ynew))
vocs = nc_file.createVariable('vocs', 'f4', ('vocs'))
nox = nc_file.createVariable('nox', 'f4', ('nox'))
ekma = nc_file.createVariable('ekma', 'f4', ('nox','vocs'))
# 写入数据
vocs[:] = xnew
nox[:] = ynew
ekma[:] = znew
nc_file.close()
writer = pd.ExcelWriter(f'{Path}/{run_id}-ekma_jianbai.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='sheet1', index=True)
writer._save()
if __name__ == '__main__':
# task = sys.argv[1].split(' ')
task = 'base rir ekma'.split(' ')
main_entry(task)