下载静止卫星水汽通量和红外波段的代码
1.代码
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 12 15:29:53 2025
@author: heklong
"""
import os
import requests
import concurrent.futures
from typing import Dict, List
BASE_URL = "https://www.ncei.noaa.gov/data/geostationary-ir-channel-brightness-temperature-gridsat-b1/access"
def load_missing_from_txt(txt_path: str) -> Dict[int, List[str]]:
"""
从 txt 文件读取缺失文件名,每行类似 GRIDSAT-B1.2018.03.23.06_irwvp
转成 GRIDSAT-B1.2018.03.23.06.v02r01.nc,并按年份分类
"""
year_map: Dict[int, List[str]] = {}
with open(txt_path, "r", encoding="utf-8") as f:
for line in f:
name = line.strip()
if not name or name.startswith("#"):
continue
# 取年份
try:
year = int(name.split(".")[1])
except Exception:
continue
# 替换后缀
fixed_name = name.replace("_irwin_cdr", ".v02r01.nc")
year_map.setdefault(year, []).append(fixed_name)
return year_map
def ensure_year_dir(out_root: str, year: int) -> str:
save_dir = os.path.join(out_root, str(year))
os.makedirs(save_dir, exist_ok=True)
return save_dir
def download_one(year: int, fname: str, out_root: str, timeout=30, chunk=1024*1024):
"""下载单个缺失文件"""
save_dir = ensure_year_dir(out_root, year)
url = f"{BASE_URL}/{year}/{fname}"
out_path = os.path.join(save_dir, fname)
if os.path.exists(out_path):
print(f"[SKIP] 已存在: {out_path}")
return
try:
with requests.get(url, stream=True, timeout=timeout) as r:
if r.status_code != 200:
print(f"[FAIL] {fname} HTTP {r.status_code}: {url}")
return
ctype = r.headers.get("Content-Type", "")
if "text/html" in ctype.lower():
print(f"[FAIL] {fname} 不是数据文件: {url}")
return
with open(out_path, "wb") as f:
for chunk_bytes in r.iter_content(chunk_size=chunk):
if chunk_bytes:
f.write(chunk_bytes)
print(f"[OK] {fname}")
except Exception as e:
print(f"[ERR] {fname}: {e}")
def download_missing_list(out_root: str, year_to_files: Dict[int, List[str]], max_workers=6):
"""并发下载缺失清单"""
tasks = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as ex:
for year, files in year_to_files.items():
for fname in files:
tasks.append(ex.submit(download_one, year, fname, out_root))
for fut in concurrent.futures.as_completed(tasks):
pass # 已在 download_one 打印
if __name__ == "__main__":
# 缺失清单 txt
txt_missing = r"H:\Gridsat\2017\missing_2017.txt"
output_path = r"H:\Gridsat\2017"
missing_files = load_missing_from_txt(txt_missing)
download_missing_list(output_path, missing_files, max_workers=2)
# import os
# import requests
# from bs4 import BeautifulSoup
# import concurrent.futures
# from typing import Dict, List
# BASE_URL = "https://www.ncei.noaa.gov/data/geostationary-ir-channel-brightness-temperature-gridsat-b1/access"
# # ============ 方式 A:在代码中直接给出缺失清单 ============
# # 每个年份对应一个文件名列表(文件名需与站点一致)
# MISSING: Dict[int, List[str]] = {
# 2018: [
# # 例子:把你的缺失文件名填到这里(含 .nc 后缀)
# "GRIDSAT-B1.2018.01.21.15.v02r01.nc",
# # "GRIDSAT-B1.2018.01.01.03.v02r01.nc",
# ],
# 2019: [
# "GRIDSAT-B1.2019.01.01.00.v02r01.nc",
# ]
# }
# # ============ 方式 B:从 txt 清单读取(可选) ============
# # txt 每行一个文件名(例如:GRIDSAT-B1.2018.01.01.00.v02r01.nc)
# # 如果你想用 txt,把上面的 MISSING 留空或删掉对应年份,然后启用下面的函数
# def load_missing_from_txt(txt_path: str) -> Dict[int, List[str]]:
# year_map: Dict[int, List[str]] = {}
# with open(txt_path, "r", encoding="utf-8") as f:
# for line in f:
# name = line.strip()
# if not name or name.startswith("#"):
# continue
# # 尝试从文件名中提取年份(GRIDSAT-B1.YYYY.MM.DD.HH.v02r01.nc)
# try:
# year = int(name.split(".")[1])
# except Exception:
# continue
# year_map.setdefault(year, []).append(name)
# return year_map
# def ensure_year_dir(out_root: str, year: int) -> str:
# save_dir = os.path.join(out_root, str(year))
# os.makedirs(save_dir, exist_ok=True)
# return save_dir
# def download_one(year: int, fname: str, out_root: str, timeout=30, chunk=1024*1024):
# """下载单个缺失文件(存在则跳过)"""
# save_dir = ensure_year_dir(out_root, year)
# url = f"{BASE_URL}/{year}/{fname}"
# out_path = os.path.join(save_dir, fname)
# if os.path.exists(out_path):
# print(f"[SKIP] 已存在: {out_path}")
# return
# try:
# with requests.get(url, stream=True, timeout=timeout) as r:
# if r.status_code != 200:
# print(f"[FAIL] {fname} HTTP {r.status_code}: {url}")
# return
# # 可选:简单检查一下目录页是否返回了 HTML(不是二进制文件)
# ctype = r.headers.get("Content-Type", "")
# if "text/html" in ctype.lower():
# print(f"[FAIL] {fname} 看起来不是数据文件(Content-Type={ctype}): {url}")
# return
# with open(out_path, "wb") as f:
# for chunk_bytes in r.iter_content(chunk_size=chunk):
# if chunk_bytes:
# f.write(chunk_bytes)
# print(f"[OK] {fname}")
# except Exception as e:
# print(f"[ERR] {fname}: {e}")
# def download_missing_list(out_root: str, year_to_files: Dict[int, List[str]], max_workers=4):
# """并发下载缺失清单"""
# tasks = []
# with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as ex:
# for year, files in year_to_files.items():
# for fname in files:
# tasks.append(ex.submit(download_one, year, fname, out_root))
# for fut in concurrent.futures.as_completed(tasks):
# # 已在 download_one 里处理打印,这里不需要额外处理
# pass
# if __name__ == "__main__":
# # 输出目录
# output_path = r"H:\Gridsat"
# # —— 如果你要从 txt 读取,取消下一行注释并指定清单路径,然后把 MISSING 按需合并/替换 ——
# # txt_missing = r"H:\Gridsat\missing_irwin_cdr_2018_2019.txt"
# # missing_from_txt = load_missing_from_txt(txt_missing)
# # MISSING = missing_from_txt # 或者把两者合并:MISSING.update(missing_from_txt)
# # 开始下载缺失
# download_missing_list(output_path, MISSING, max_workers=4)