import os
import pygrib
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import re
from tqdm import tqdm
import warnings
import sys
# 忽略不必要的警告
warnings.filterwarnings("ignore", category=UserWarning, module="pyproj")
# ====================== 配置参数 ======================
EC_ROOT = r"G:\data\2024_ecdata_final" # EC数据根目录
STATION_FILE = r"C:\Users\pc\Desktop\wf_historical_data\场站信息表.xlsx" # 场站信息文件
# 使用桌面作为输出目录
DESKTOP_PATH = os.path.join(os.path.expanduser("~"), "Desktop")
OUTPUT_DIR = os.path.join(DESKTOP_PATH, "风电场EC数据输出")
# 要提取的A3D层级
LEVELS = [1000, 975, 950, 925, 850]
# 变量名映射
VARIABLES = {
"A1D": {
"U风分量(m/s)": ["100 metre U wind component", "u100"],
"V风分量(m/s)": ["100 metre V wind component", "v100"]
},
"A3D": {
"位势高度": ["Geopotential height", "gh"],
"温度": ["Temperature", "t"],
"相对湿度": ["Relative humidity", "r"]
}
}
# ====================== 工具函数 ======================
def utc_to_beijing(utc_time):
"""UTC时间转北京时间"""
return utc_time + timedelta(hours=8)
def parse_ec_filename(filename, folder_date):
"""
解析EC文件名
filename: 文件名,如 'A1D01241200012502001'
folder_date: 文件夹名称对应的日期(datetime对象)
"""
# 文件夹日期的年份
folder_year = folder_date.year
# 提取生成时间和预测时间字符串
gen_str = filename[3:9] # 6位字符串,如'012412'
fcst_str = filename[11:17] # 6位字符串,如'012502'
# 解析生成时间
gen_month = int(gen_str[0:2])
gen_day = int(gen_str[2:4])
gen_hour = int(gen_str[4:6])
# 解析预测时间
fcst_month = int(fcst_str[0:2])
fcst_day = int(fcst_str[2:4])
fcst_hour = int(fcst_str[4:6])
# 预测时间的年份:默认与文件夹年份相同
fcst_year = folder_year
# 生成时间的年份:如果生成时间的月份是12月,而预测时间的月份是1月,则生成时间年份为folder_year-1
gen_year = folder_year
if gen_month == 12 and fcst_month == 1:
gen_year = folder_year - 1
gen_time = datetime(gen_year, gen_month, gen_day, gen_hour) # UTC时间
fcst_time = datetime(fcst_year, fcst_month, fcst_day, fcst_hour) # UTC时间
return gen_time, fcst_time
def extract_value(grbs, possible_names, station_lon, station_lat, level=None):
"""提取指定变量值(可指定层级)"""
for name in possible_names:
try:
# 根据是否指定层级选择提取方式
if level is not None:
msg = grbs.select(name=name, level=level) or grbs.select(shortName=name, level=level)
else:
msg = grbs.select(name=name) or grbs.select(shortName=name)
if msg:
msg = msg[0]
lats, lons = msg.latlons()
# 计算距离最近格点的索引
dist = (lons - station_lon) ** 2 + (lats - station_lat) ** 2
j, i = np.unravel_index(dist.argmin(), dist.shape)
return round(float(msg.values[j, i]), 2)
except Exception as e:
continue
return None
# ====================== 主处理函数 ======================
def process_ec_data():
try:
# 读取场站信息
stations = pd.read_excel(STATION_FILE, engine='openpyxl')
print(f"成功读取{len(stations)}个场站信息")
# 创建输出目录
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"输出目录: {OUTPUT_DIR}")
# 为每个场站创建输出文件路径
station_files = {}
for station_name in stations['场站名']:
station_dir = os.path.join(OUTPUT_DIR, station_name)
os.makedirs(station_dir, exist_ok=True)
station_files[station_name] = os.path.join(station_dir, f"{station_name}_EC_data.csv")
# 获取所有日期文件夹
date_folders = [d for d in os.listdir(EC_ROOT)
if os.path.isdir(os.path.join(EC_ROOT, d)) and re.match(r'^\d{8}$', d)]
date_folders.sort() # 按日期排序
print(f"找到{len(date_folders)}个日期文件夹")
# 处理每个日期文件夹
total_processed = 0
for date_folder in tqdm(date_folders, desc="处理日期文件夹"):
folder_path = os.path.join(EC_ROOT, date_folder)
# 解析文件夹日期
try:
folder_date = datetime.strptime(date_folder, "%Y%m%d")
except:
print(f"跳过无效文件夹: {date_folder}")
continue
# 获取该文件夹下所有EC文件
ec_files = [f for f in os.listdir(folder_path) if f.startswith('A1D') or f.startswith('A3D')]
# 仅处理12点批次生成的数据(文件名中第7-9位为"12")
ec_files_12 = [f for f in ec_files if len(f) > 9 and f[7:9] == "12"]
if not ec_files_12:
print(f"文件夹 {date_folder} 中没有12点批次数据,跳过")
continue
# 按文件类型分组
file_groups = {}
for filename in ec_files_12:
try:
# 解析文件名
gen_time, fcst_time = parse_ec_filename(filename, folder_date)
# 转换为北京时间
gen_time_bj = utc_to_beijing(gen_time)
fcst_time_bj = utc_to_beijing(fcst_time)
# 使用预测时间作为分组键
fcst_key = fcst_time_bj.strftime('%Y%m%d%H')
# 初始化组
if fcst_key not in file_groups:
file_groups[fcst_key] = {
'A1D': None,
'A3D': None,
'gen_time_bj': gen_time_bj,
'fcst_time_bj': fcst_time_bj
}
# 添加文件到对应组
data_type = filename[:3] # 'A1D' or 'A3D'
file_path = os.path.join(folder_path, filename)
file_groups[fcst_key][data_type] = file_path
except Exception as e:
print(f"解析文件{filename}出错: {str(e)}")
if not file_groups:
print(f"文件夹 {date_folder} 中没有有效的12点批次文件组,跳过")
continue
# 处理每个文件组
group_count = 0
for fcst_key, files in file_groups.items():
group_count += 1
# 确保有A1D和A3D文件
if not files['A1D'] or not files['A3D']:
print(f"文件组{fcst_key}缺少A1D或A3D文件,跳过")
continue
# 为所有场站提取该组文件的数据
for idx, station in stations.iterrows():
station_name = station['场站名']
lon = station['经度']
lat = station['纬度']
output_file = station_files[station_name]
# 初始化一行数据
row = {
'生成日期': files['gen_time_bj'].strftime('%Y-%m-%d %H:%M:%S'),
'预测日期': files['fcst_time_bj'].strftime('%Y-%m-%d %H:%M:%S'),
'场站名': station_name,
'经度': lon,
'纬度': lat
}
# 提取A1D数据
try:
with pygrib.open(files['A1D']) as grbs:
for var_name, possible_names in VARIABLES['A1D'].items():
value = extract_value(grbs, possible_names, lon, lat)
row[var_name] = value
except Exception as e:
print(f"处理A1D文件{files['A1D']}出错: {str(e)}")
# 添加错误信息到行数据
row['A1D_错误'] = str(e)
# 提取A3D数据(多层)
try:
with pygrib.open(files['A3D']) as grbs:
for level in LEVELS:
# 提取该层级的各变量
for var_type, possible_names in VARIABLES['A3D'].items():
value = extract_value(grbs, possible_names, lon, lat, level=level)
# 构建列名
col_name = f"{var_type}_{level}hPa"
if var_type == '位势高度':
col_name += '(gpm)'
elif var_type == '温度':
col_name += '(K)'
elif var_type == '相对湿度':
col_name += '(%)'
row[col_name] = value
except Exception as e:
print(f"处理A3D文件{files['A3D']}出错: {str(e)}")
# 添加错误信息到行数据
row['A3D_错误'] = str(e)
# 将数据写入场站对应的CSV文件
df_row = pd.DataFrame([row])
if not os.path.exists(output_file):
df_row.to_csv(output_file, index=False, encoding='utf-8-sig')
else:
df_row.to_csv(output_file, mode='a', header=False, index=False, encoding='utf-8-sig')
total_processed += 1
print(f"处理日期 {date_folder} 完成,提取了 {group_count} 个文件组")
print(f"\n所有数据处理完成!共处理 {total_processed} 个日期文件夹")
print(f"数据已保存到: {OUTPUT_DIR}")
# 打印各场站文件路径
print("\n各风电场数据文件:")
for station_name, file_path in station_files.items():
print(f"{station_name}: {file_path}")
except Exception as e:
print(f"\n程序运行出错: {str(e)}")
import traceback
traceback.print_exc()
# ====================== 执行主函数 ======================
if __name__ == "__main__":
print("开始处理EC数据...")
process_ec_data()
input("按Enter键退出...") # 防止窗口立即关闭 分析下这个代码
最新发布