前言
由于ERA5官网内容的更新,现在网上能找到的很多的ERA5下载的脚本都是适用不了的,因此根据官网最新的接口更新一版
CDSAPI设置
直接参考最新版CDSAPI配置教程 点击蓝色字直接跳转
完整代码
import os
import cdsapi
from calendar import monthrange
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Union, Generator
class ERA5Downloader:
def __init__(self, config: Dict):
self.config = config
self.max_workers = min(4, os.cpu_count() or 1) # 最大并行数限制为4
self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
def build_request(
self,
year: Union[int, List[int]],
month: Union[int, List[int]],
day: Union[int, List[int]],
time: Union[str, List[str]],
variables: List[str]
) -> Dict:
"""构建请求参数字典"""
request = {
"product_type": self.config.get("product_type", "reanalysis"),
"variable": variables,
"year": str(year) if isinstance(year, int) else [str(y) for y in year],
"month": f"{month:02d}" if isinstance(month, int) else [f"{m:02d}" for m in month],
"format": self.config.get("format", "netcdf"),
}
# 处理层级参数
if "pressure-levels" in self.config["dataset"]:
request["level"] = self.config.get("level", ["1000"])
# 处理时间参数
if self.config["temporal_resolution"] == "monthly":
request["day"] = "01"
request["time"] = "00:00"
else:
request["day"] = str(day) if isinstance(day, int) else [f"{d:02d}" for d in day]
request["time"] = time if isinstance(time, list) else [time]
return request
def generate_dates(self) -> Generator:
"""生成日期范围"""
start_year = self.config["start_year"]
end_year = self.config["end_year"]
for year in range(start_year, end_year + 1):
for month in self.config["months"]:
if self.config["temporal_resolution"] == "daily":
_, num_days = monthrange(year, month)
for day in range(1, num_days + 1):
yield year, month, day
else: # monthly
yield year, month, 1
def generate_filename(self, year: int, month: int, day: int = None) -> str:
"""生成标准化文件名"""
base = f"{self.config['dataset']}_{year}"
if self.config["temporal_resolution"] == "daily":
return f"{base}_{month:02d}_{day:02d}.nc"
return f"{base}_{month:02d}.nc"
def download_task(self, request: Dict, filepath: str) -> None:
"""单个下载任务"""
client = cdsapi.Client()
try:
client.retrieve(self.config["dataset"], request, filepath)
print(f"成功下载: {filepath}")
except Exception as e:
print(f"下载失败: {filepath} - {str(e)}")
def parallel_download(self) -> None:
"""并行下载控制器"""
futures = []
os.makedirs(self.config["output_dir"], exist_ok=True)
for year, month, day in self.generate_dates():
time_params = (
[f"{h:02d}:00" for h in range(24)]
if self.config["temporal_resolution"] == "hourly"
else "00:00"
)
request = self.build_request(
year=year,
month=month,
day=day,
time=time_params,
variables=self.config["variables"]
)
filename = self.generate_filename(year, month, day)
output_path = os.path.join(self.config["output_dir"], filename)
futures.append(
self.executor.submit(
self.download_task,
request,
output_path
)
)
# 监控进度
for future in as_completed(futures):
try:
future.result()
except Exception as e:
print(f"任务异常: {str(e)}")
def __del__(self):
self.executor.shutdown(wait=True)
if __name__ == "__main__":
# 配置示例(根据需求修改)
CONFIG = {
"dataset": "reanalysis-era5-single-levels",
"variables": ["10m_u_component_of_wind", "10m_v_component_of_wind"],
"start_year": 2022,
"end_year": 2022,
"months": [1, 2, 3,4,5,4,7,8,9,10,11, 12], # 指定需要下载的月份
"temporal_resolution": "hourly", # hourly/daily/monthly
"output_dir": r"D:\work\优快云/",
"product_type": "reanalysis",
"format": "netcdf",
# 压力层数据集需要添加以下参数:
# "level": ["1000", "850"],
}
downloader = ERA5Downloader(CONFIG)
downloader.parallel_download()
代码解析
"dataset"
可以从你下载的官网上直接获取
以ERA5 hourly data on single levels from 1940 to present
为例
我们可以先在这个网页上进行初步的选择,然后在页面的最下面找到API request
在这里面就能看到具体的dataset信息
"variables"
:如果大家不知道你下载的变量对应的Variables那么也可以从API Request中直接看到
接下来就是start_year、end_year、months、temporal_resolution、output_dir、format这几个参数:
其中product_type
和format
一般情况下不需要修改;
start_year
、end_year
和months
就是我们需要的数据的起止年份以及月份信息;
temporal_resolution
稍微有一点点绕,这个参数是根据选择的数据库有关,比如本次选择的数据库是ERA5 hourly data on single levels from 1940 to present
那么这个参数就是hourly
,如果你选择的数据库是ERA5 monthly averaged data on single levels from 1940 to present
那么这个参数就是monthly
。
"start_year": 2022,
"end_year": 2022,
"months": [1, 2, 3,4,5,4,7,8,9,10,11, 12], # 指定需要下载的月份
"temporal_resolution": "hourly", # hourly/daily/monthly
"output_dir": r"D:\work\优快云/",
"product_type": "reanalysis",
"format": "netcdf",