df

df dfdfdfdf
请分析并解析以下代码的执行过程和作用,输出的解释文字控制在500字以内 import os import sys import pandas as pd import numpy as np from scipy import interpolate import matplotlib.pyplot as plt from datetime import datetime, timedelta import netCDF4 as nc # 字体加载 from matplotlib import font_manager, rcParams font_path = r"./times+simsun.ttf" font_manager.fontManager.addfont(font_path) prop = font_manager.FontProperties(fname=font_path) # 字体设置 rcParams['font.family'] = 'sans-serif' # 使用字体中的无衬线体 rcParams['font.sans-serif'] = prop.get_name() # 根据名称设置字体 rcParams['font.size'] = 10 # 设置字体大小 rcParams['axes.unicode_minus'] = False # 使坐标轴刻度标签正常显示正负号 CSTHOME = os.environ.get('CSTHOME') ENVFILE = os.path.join(os.environ.get('CSTHOME'), 'envs') CSTFILE = os.path.join(os.environ.get('CSTHOME'), 'runs/model/configuration/speciesConstrained.config') def get_ymd(): df_env = pd.read_csv(ENVFILE, header=None, sep='\\s+') yyyy = round(df_env[3][df_env[1] == 'bUYY'].values[0]) mm = round(df_env[3][df_env[1] == 'bUMM'].values[0]) dd = round(df_env[3][df_env[1] == 'bUDD'].values[0]) dates = datetime(yyyy, mm, dd) + timedelta(days=1) # 修正月天数超过当月最大天数的bug return dates.year, dates.month, dates.day def main_entry(tasks): run_id = os.path.basename(CSTHOME) yyyy, mm, dd = get_ymd() df_result = pd.DataFrame( columns=['BYMD(BJT)', 'CH4(ppb)', 'CO(ppb)', 'O3(ppb)', 'NO2(ppb)', 'NO(ppb)', 'OH(ppb)', 'HO2(ppb)', 'CH3O2(ppb)', 'O1D(ppb)', 'NO3(ppb)', 'taskId', 'caseName']) for task_name in tasks: task_dir = os.path.join(CSTHOME, task_name) cases = [f for f in os.listdir(task_dir) if os.path.isdir(os.path.join(task_dir, f))] if task_name == 'rir': rir_cal(task_dir, cases, run_id) if task_name == 'ekma': ekma_plot(task_dir, cases, run_id) for case in cases: flag = f'{case}.OK' if os.path.exists(os.path.join(task_dir, flag)): file = os.path.join(task_dir, case + os.sep + 'output' + os.sep + 'speciesConcentrations.output') if os.stat(file).st_size < 1.0: continue df = pd.read_csv(file, sep='\\s+') if df.empty: continue df['t'] = df['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16))) # 修正时间超过24小时的bug df[df.columns[1:]] = df[df.columns[1:]].apply(lambda x: x / 2.46e10) df['taskId'] = run_id df['caseName'] = case df.columns = df_result.columns df_result = pd.concat([df_result, df]) df_result.to_csv(os.path.join(os.environ.get('COMBINE'), run_id + '_concentrations.csv'), index=False, encoding='gbk') def rir_cal(rir_dir, rir_cases, run_id): df_result = pd.DataFrame(columns=['BYMD(BJT)', 'PROD(ppb/h)', 'LOSS(ppb/h)', 'NET(ppb/h)', 'RIR', 'taskId', 'caseName']) df_t = pd.DataFrame() df_each_reac = pd.DataFrame() rir_cases.append('base') for case in rir_cases: if case == 'base': rir_dir = os.path.join(os.path.dirname(rir_dir), case) flag = f'{case}.OK' if os.path.exists(os.path.join(rir_dir, flag)): pfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'productionRates.output') lfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'lossRates.output') efile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'environmentVariables.output') sfile = os.path.join(rir_dir, case + os.sep + 'output' + os.sep + 'speciesConcentrations.output') if os.stat(pfile).st_size < 1.0: continue if os.stat(lfile).st_size < 1.0: continue if os.stat(efile).st_size < 1.0: continue if os.stat(sfile).st_size < 1.0: continue # production: (HO2+NO) + Σ(RO2+NO) reaction_num: 27, df_p = pd.read_csv(pfile, sep='\\s+') df_l = pd.read_csv(lfile, sep='\\s+') df_e = pd.read_csv(efile, sep='\\s+') df_s = pd.read_csv(sfile, sep='\\s+') df_p['rate'] = df_p['rate'].apply(lambda x: x * 3600 / 2.46e10) df_l['rate'] = df_l['rate'].apply(lambda x: x * 3600 / 2.46e10) df_p1 = df_p[df_p['reaction'].apply(lambda x: 'HO2+NO=' in x)] df_p_all = pd.merge(left=df_e, right=df_s, on='t') df_p_all = pd.merge(left=df_p_all, right=df_p1, left_on='t', right_on='time') df_p_all['p'] = df_p_all['rate'] + 2.7 * 10 ** (-12) * np.exp(360 / df_p_all['TEMP']) * df_p_all['RO2'] * df_p_all['NO'] / 2.46e10 * 3600 df_p_all['HO2NO'] = df_p_all['rate'].values df_p_all['RO2NO'] = 2.7 * 10 ** (-12) * np.exp(360 / df_p_all['TEMP']) * df_p_all['RO2'] * df_p_all['NO'] / 2.46e10 * 3600 # lossRates(2kNO3+VOCs[NO3][VOCs], kO3+VOCs[O3][VOCs]) df_sp = pd.read_csv(CSTFILE, header=None) df_l_all = df_e[['t', 'TEMP']].copy() df_l_all['L'] = 0 # 初始消耗速率 df_l_all['VOCsNO3'] = 0 df_l_all['VOCsO3'] = 0 for i in df_sp[0].values: if i in ['CO', 'NO2']: continue df_vocs_o3 = df_l[df_l['reaction'].apply(lambda x: (x.startswith(f'{i}+O3=')) or (x.startswith(f'O3+{i}=')))] df_vocs_no3 = df_l[df_l['reaction'].apply(lambda x: (x.startswith(f'{i}+NO3=')) or (x.startswith(f'NO3+{i}=')))] if not df_vocs_o3.empty: # print(f'{i}反应存在') for num in df_vocs_o3['reactionNumber'].unique(): df_vocs_o3_each = df_vocs_o3[df_vocs_o3['reactionNumber'] == num] df_vocs_o3_each = df_vocs_o3_each[df_vocs_o3_each['speciesName'] == df_vocs_o3_each['speciesName'].unique()[0]] df_l_all['L'] = df_l_all['L'].values + df_vocs_o3_each['rate'].values df_l_all['VOCsO3'] = df_l_all['VOCsO3'].values + df_vocs_o3_each['rate'].values if not df_vocs_no3.empty: print(f'{i}反应存在') for num in df_vocs_no3['reactionNumber'].unique(): df_vocs_no3_each = df_vocs_no3[df_vocs_no3['reactionNumber'] == num] df_vocs_no3_each = df_vocs_no3_each[df_vocs_no3_each['speciesName'] == df_vocs_no3_each['speciesName'].unique()[0]] df_l_all['L'] = df_l_all['L'].values + 2 * df_vocs_no3_each['rate'].values df_l_all['VOCsNO3'] = df_l_all['VOCsNO3'].values + 2 * df_vocs_no3_each['rate'].values # TODO:若机理文件变化,需要确认这四个反应的reactionNumber(一般不随机理文件改变而改变) # lossRates(O1D+H2O:15(EQUALS O1D=OH+OH), O3+OH:16, NO2+OH:25, HO2+O3:20, ) maps = {'16': 'O1DH2O', '17': 'O3OH', '26': 'NO2OH', '21': 'HO2O3'} for num in [16, 17, 21, 26]: df_l_tmp = df_l[df_l['reactionNumber'] == num] df_lx = df_l_tmp[df_l_tmp['speciesName'] == df_l_tmp['speciesName'].unique()[0]] df_l_all['L'] = df_l_all['L'].values + df_lx['rate'].values df_l_all[maps[str(num)]] = df_lx['rate'].values df_p_l = pd.merge(df_p_all, df_l_all[['t', 'L', 'VOCsNO3', 'VOCsO3', 'O1DH2O', 'O3OH', 'NO2OH', 'HO2O3']], on='t') df_p_l['NET'] = df_p_l['p'] - df_p_l['L'] df_p_l['RIR'] = np.nan df_p_l['taskId'] = run_id df_p_l['caseName'] = case df_t = pd.concat([df_t, df_p_l[['t', 'p', 'L', 'NET', 'RIR', 'taskId', 'caseName']]]) df_each_reac = pd.concat([df_each_reac, df_p_l[['t', 'HO2NO', 'RO2NO', 'VOCsNO3', 'VOCsO3', 'O1DH2O', 'O3OH', 'NO2OH', 'HO2O3', 'taskId', 'caseName']]]) yyyy, mm, dd = get_ymd() df_t['t'] = df_t['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16))) df_each_reac['t'] = df_each_reac['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16))) df_t.columns = ['BYMD(BJT)', 'PROD(ppb/h)', 'LOSS(ppb/h)', 'NET(ppb/h)', 'RIR', 'taskId', 'caseName'] df_rir_base = df_t[df_t['caseName'] == 'base'] # 计算RIR,削减比例为10% for case in df_t['caseName'].unique(): df_rir_case = df_t[df_t['caseName'] == case] #df_rir_case['RIR'] = (df_rir_base['PROD(ppb/h)'] - df_rir_base['LOSS(ppb/h)'] - df_rir_case['PROD(ppb/h)'] + df_rir_case['LOSS(ppb/h)']) / (df_rir_base['PROD(ppb/h)'] - df_rir_base['LOSS(ppb/h)']) / 0.1 df_rir_case['RIR'] = (df_rir_base['PROD(ppb/h)'] - df_rir_case['PROD(ppb/h)']) / df_rir_base['PROD(ppb/h)'] / 0.1 df_result = pd.concat([df_result, df_rir_case]) df_result.to_csv(os.path.join(os.path.dirname(rir_dir), 'result' + os.sep + str(run_id) + '_rir.csv'), index=False, encoding='gbk') df_each_reac.to_csv(os.path.join(os.path.dirname(rir_dir), 'result' + os.sep + str(run_id) + '_each_reac.csv'), index=False, encoding='gbk') def ekma_plot(ekma_dir, ekma_cases, run_id): yyyy, mm, dd = get_ymd() df_combine = pd.DataFrame([]) max_o3 = [] matched_o3 = [] c_nox = [] ekma_cases_new = ekma_cases.copy() for f in ekma_cases: f_path = ekma_dir + '/' + f + '/output/speciesConcentrations.output' if not os.path.exists(f_path): ekma_cases_new.remove(f) continue if os.stat(f_path).st_size < 1.0: ekma_cases_new.remove(f) continue df_i = pd.read_csv(f_path, sep='\\s+') df_i = df_i.iloc[:24] df_i['t'] = df_i['t'].apply(lambda x: datetime(yyyy, mm, dd) + timedelta(hours=round(x / 3600 - 16))) df_i['hour'] = df_i['t'].apply(lambda x: x.hour) df_i = df_i.groupby('hour').mean() max_o3 = np.append(max_o3, df_i['O3'].max()) matched_o3 = np.append(matched_o3, df_i['O3'].iloc[12]) c_nox = np.append(c_nox, np.nanmean(df_i['NO2'].iloc[9:17])) # +np.nanmean(df_i[5].iloc[9:17]))) $ NO2+NO r_nox = pd.DataFrame([i.split('_')[0] for i in ekma_cases_new]) r_nox = r_nox[0].str.extract('(\d+)') # 从NOX20%中提取数字,下面的VOCs同理 r_avocs = pd.DataFrame([i.split('_')[1] for i in ekma_cases_new]) r_avocs = r_avocs[0].str.extract('(\d+)') max_o3 = list([i / 2.5e10 for i in max_o3]) matched_o3 = list([i / 2.5e10 for i in matched_o3]) r_nox_ = np.unique(r_nox) r_avocs_ = np.unique(r_avocs) rr_avocs = np.sort([int(i) for i in r_avocs_]) rr_nox = np.sort([int(i) for i in r_nox_]) max_o3_ = np.zeros((len(r_avocs_), len(r_nox_))) for jj in range(0, len(r_nox_)): for j in range(0, len(r_avocs_)): a = [max_o3[i] for i in range(0, len(max_o3)) if (r_avocs.loc[i, 0] == r_avocs_[j]) & (r_nox.loc[i, 0] == r_nox_[jj])] if len(a) == 1: max_o3_[j, jj] = a[0] else: max_o3_[j, jj] = float('nan') fekma = interpolate.interp2d([int(i) for i in r_avocs[0]], [int(i) for i in r_nox[0]], np.reshape(max_o3, (-1, 1)), kind='cubic') xnew = np.arange(0, 200, 1) ynew = np.arange(0, 300, 1) znew = fekma(xnew, ynew) # 实测数据 df_real = pd.read_csv(os.environ.get('DATANM')) # 将从第 8 列开始的所有列转换为数值类型 df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].apply(pd.to_numeric, errors='coerce') lim_nan = df_real[df_real.columns[7:]].apply(lambda x: x < 0) df_real[lim_nan] = np.nan df_real['Date'] = pd.to_datetime(df_real['开始时间'], format="%Y-%m-%d %H:%M:%S") # df_real['Date'] = pd.to_datetime(df_real['开始时间'], format="%Y/%m/%d %H:%M") df_real['YMD'] = df_real['Date'].apply(lambda x: x.strftime("%Y-%m")) df_real = df_real.fillna(method='bfill') # df_origin = df_origin.interpolate(method='linear') #df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='linear') df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='bfill') df_real[df_real.columns[7:]] = df_real[df_real.columns[7:]].interpolate(method='ffill') df_map = pd.read_csv(os.environ.get('ALLMAP'), sep="\\s+") df_cst = pd.read_csv(CSTFILE, header=None) lim_cst1 = df_map['MCM'].apply(lambda x: x in df_cst[0].values) lim_cst2 = df_map['MCM'].apply(lambda x: x not in ['NO', 'NO2', 'NOXINJ', 'CH4', 'PAN', 'CO']) df_var_voc = df_map['VARIABLE'][lim_cst1 & lim_cst2] df_var_nox = df_map['VARIABLE'][df_map['MCM'].apply(lambda x: x in ['NO', 'NO2'])] datelist = df_real['YMD'].unique() all_day_voc = 0 all_day_nox = 0 for i in datelist: i = datelist[0] df_day = df_real[df_real['YMD'] == i] df_day_voc = df_day[df_var_voc.values] df_day_nox = df_day[df_var_nox.values] voc = df_day_voc.iloc[9:17].mean().sum() nox = df_day_nox.iloc[9:17].mean().sum() all_day_voc = all_day_voc + voc all_day_nox = all_day_nox + nox c_vocs = np.asarray([int(i) * all_day_voc / len(datelist) / 100 for i in r_avocs[0]]) c_nox_real = np.asarray([int(i) * all_day_nox / len(datelist) / 100 for i in r_nox[0]]) xnew = np.arange(0, 200, 1) * max(c_vocs) / 200 ynew = np.arange(0, 300, 1) * max(c_nox_real) / 300 Xnew, Ynew = np.meshgrid(xnew, ynew) fig = plt.figure(figsize=(12, 10)) ax1 = fig.add_subplot(111) levels = np.arange(0, znew.max() + 10, 10) line = plt.contour(Xnew, Ynew, znew, levels=levels, cmap='jet', linewidths=1.5) # 自定义格式化函数,为标签添加 'ppb' 单位 def fmt(x): return f'{x:.0f} ppb' # 添加等高线标签 plt.clabel(line, inline=True, fontsize=10,colors='gray', fmt=fmt) df = pd.DataFrame(columns=['data', 'vocs', 'nox']) for i in datelist: df_day = df_real[df_real['YMD'] == i] df_day_voc = df_day[df_var_voc.values] df_day_nox = df_day[df_var_nox.values] voc = df_day_voc.iloc[9:17].mean().sum() nox = df_day_nox.iloc[9:17].mean().sum() all_day_voc = all_day_voc + voc plt.scatter(voc, nox, 20, color='k') plt.text(voc * 1.01, nox * 1.01, i,fontsize=12) df = df._append({'data': i, 'vocs': voc, 'nox': nox},ignore_index=True) plt.xlabel('AVOC/ ppb', fontsize=14) plt.ylabel('NOx/ ppb', fontsize=14) fig.text(0.22, 0.05, f'模型:XIANEMC1011 制图:XIANEMC1011', fontsize=10, color='grey', alpha=1, ha='center', va='center', rotation=0) plt.savefig(os.path.join(os.environ.get('COMBINE'), run_id + '_ekma.png'), dpi=128, bbox_inches='tight') # 创建NetCDF文件 Path = os.path.join(os.environ.get('COMBINE')) nc_file = nc.Dataset(f'{Path}/{run_id}-ekma.nc', 'w', format='NETCDF4') # 创建维度 nc_file.createDimension('vocs', len(xnew)) nc_file.createDimension('nox', len(ynew)) vocs = nc_file.createVariable('vocs', 'f4', ('vocs')) nox = nc_file.createVariable('nox', 'f4', ('nox')) ekma = nc_file.createVariable('ekma', 'f4', ('nox','vocs')) # 写入数据 vocs[:] = xnew nox[:] = ynew ekma[:] = znew nc_file.close() writer = pd.ExcelWriter(f'{Path}/{run_id}-ekma_jianbai.xlsx', engine='xlsxwriter') df.to_excel(writer, sheet_name='sheet1', index=True) writer._save() if __name__ == '__main__': # task = sys.argv[1].split(' ') task = 'base rir ekma'.split(' ') main_entry(task)
08-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值