# -*- coding: utf-8 -*-
"""整合气象数据获取与处理系统
功能:
1. 在5分钟整点(0分、5分、10分等)获取实况气象数据
2. 将5分钟数据按月保存到Excel(存储在"5分钟级数据"文件夹)
3. 将5分钟数据插值为分钟级数据
4. 计算每分钟的太阳位置(高度角、方位角)和地外辐照度(保留两位小数)
5. 将分钟级数据按月保存到独立Excel(存储在"分钟级数据"文件夹)
"""
import urllib, urllib3, sys, uuid
import ssl
import json
from openpyxl import Workbook, load_workbook
from datetime import datetime, timedelta
import warnings
from urllib3.exceptions import InsecureRequestWarning
import time
import os
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline
import math
import openpyxl
from openpyxl.styles import PatternFill, Font, Alignment
import traceback
# 强制刷新标准输出 - 确保实时输出
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
if hasattr(sys.stdout, 'reconfigure'):
sys.stdout.reconfigure(line_buffering=True) # 确保实时输出
else:
# 对于不支持reconfigure的环境
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1)
# 禁用SSL警告
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
# 创建绕过SSL验证的连接池
http = urllib3.PoolManager(cert_reqs='CERT_NONE')
# API配置
host = 'http://aliv18.data.moji.com'
condition_path = '/whapi/json/alicityweather/condition'
method = 'POST'
appcode = '2c642e12da774c918fa78ac56b5b4c50'
# 保定地理位置
LOCATION_NAME = "保定"
LONGITUDE = 115.480 # 东经
LATITUDE = 38.855 # 北纬
# 定义API请求头
API_HEADERS = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Authorization': 'APPCODE ' + appcode
}
# 实况数据API参数
CONDITION_TOKEN = '50b53ff8dd7d9fa320d3d3ca32cf8ed1'
CITY_ID = '1819' # 保定城市ID
# 创建数据存储目录
FIVEMIN_DIR = "5分钟级数据"
MINUTELY_DIR = "分钟级数据"
os.makedirs(FIVEMIN_DIR, exist_ok=True)
os.makedirs(MINUTELY_DIR, exist_ok=True)
def log_message(message):
"""记录日志到屏幕 - 确保实时输出"""
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_line = f"[{timestamp}] {message}"
# 直接输出到控制台,强制刷新
print(log_line, flush=True)
# 同时写入日志文件以便调试
with open("weather_system.log", "a", encoding="utf-8") as log_file:
log_file.write(log_line + "\n")
log_file.flush() # 立即刷新文件写入
def safe_save(wb, filename):
"""安全保存Excel文件,避免占用问题"""
log_message(f"正在保存文件: {filename}")
# 确保目标目录存在
os.makedirs(os.path.dirname(filename), exist_ok=True)
for attempt in range(5): # 增加尝试次数
try:
# 使用绝对路径避免相对路径问题
abs_filename = os.path.abspath(filename)
temp_file = abs_filename.replace(".xlsx", f"_temp_{attempt}.xlsx")
log_message(f"尝试保存临时文件: {temp_file}")
wb.save(temp_file)
# 替换原文件
if os.path.exists(abs_filename):
os.remove(abs_filename)
os.rename(temp_file, abs_filename)
log_message(f"文件保存成功: {abs_filename}")
return True
except (PermissionError, OSError) as e:
log_message(f"文件保存失败 ({attempt+1}/5): {str(e)}")
time.sleep(3)
except Exception as e:
log_message(f"文件保存时发生意外错误: {str(e)}")
traceback.print_exc()
time.sleep(3)
return False
def get_condition_data():
"""获取实况天气数据"""
try:
log_message(f"正在获取{LOCATION_NAME}实况天气数据...")
condition_bodys = {'token': CONDITION_TOKEN, 'cityId': CITY_ID}
post_data_condition = urllib.parse.urlencode(condition_bodys).encode('utf-8')
condition_url = host + condition_path
response_condition = http.request('POST', condition_url, body=post_data_condition, headers=API_HEADERS)
content_condition = response_condition.data.decode('utf-8')
condition_data = json.loads(content_condition)
log_message(f"API响应内容: {json.dumps(condition_data, ensure_ascii=False)[:200]}...")
if condition_data.get('code') != 0:
log_message(f"实况API请求失败: 代码 {condition_data.get('code')}, 消息: {condition_data.get('msg')}")
return None
condition_info = condition_data['data']['condition']
current_time = datetime.now()
# 对齐到最近的5分钟
aligned_minute = (current_time.minute // 5) * 5
aligned_time = current_time.replace(minute=aligned_minute, second=0, microsecond=0)
time_str = aligned_time.strftime("%Y-%m-%d %H:%M")
# 计算风速(m/s)并保留一位小数
raw_wind_speed = condition_info.get('windSpeed', '')
wind_speed_mps = ''
if raw_wind_speed and raw_wind_speed.replace('.', '', 1).isdigit():
try:
wind_speed_mps = round(float(raw_wind_speed) / 3.6, 1) # 保留一位小数
except:
wind_speed_mps = ''
# 处理温度值,确保保留一位小数
temp_value = condition_info.get('temp', '')
if temp_value and temp_value.replace('.', '', 1).isdigit():
try:
temp_value = round(float(temp_value), 1) # 温度保留一位小数
except:
pass
# 构建实况数据行
condition_row = [
time_str,
temp_value, # 温度保留一位小数
condition_info.get('condition', ''),
condition_info.get('humidity', ''),
condition_info.get('windDir', ''),
condition_info.get('windLevel', ''),
raw_wind_speed,
wind_speed_mps, # 风速保留一位小数
condition_info.get('precip', '0.0'),
condition_info.get('pressure', '')
]
log_message(f"成功获取实况数据: {time_str}")
log_message(f" 温度: {temp_value}℃, 天气状况: {condition_info.get('condition', '')}, 湿度: {condition_info.get('humidity', '')}%")
return condition_row
except Exception as e:
log_message(f"获取实况数据时出错: {str(e)}")
traceback.print_exc()
return None
def normalize_time(time_str):
"""统一时间格式为 YYYY-MM-DD HH:MM"""
try:
# 如果已经是字符串,尝试解析
if isinstance(time_str, str):
# 尝试解析为日期时间
if len(time_str) == 16: # 格式为 "YYYY-MM-DD HH:MM"
return time_str
elif len(time_str) == 19: # 包含秒数 "YYYY-MM-DD HH:MM:00"
return time_str[:16]
elif ' ' not in time_str: # 只有日期部分
return time_str + " 00:00"
else: # 其他格式,尝试转换
dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
return dt.strftime("%Y-%m-%d %H:%M")
# 如果是datetime对象
elif isinstance(time_str, datetime):
return time_str.strftime("%Y-%m-%d %H:%M")
except Exception:
pass
# 无法识别的格式,原样返回
return str(time_str)
def update_fivemin_excel(condition_row):
"""更新5分钟级Excel文件"""
try:
current_month = datetime.now().strftime("%Y%m")
# 确保目录存在
os.makedirs(FIVEMIN_DIR, exist_ok=True)
filename = os.path.join(os.path.abspath(FIVEMIN_DIR), f"{LOCATION_NAME}_5分钟级数据_{current_month}.xlsx")
wb = None
ws = None
existing_data = {} # 存储时间点 -> 行号的映射
headers = []
log_message(f"正在更新5分钟数据文件: {filename}")
if os.path.exists(filename):
for attempt in range(3):
try:
log_message(f"尝试加载现有5分钟数据文件... (尝试 {attempt+1}/3)")
wb = load_workbook(filename)
ws = wb.active
if ws.max_row > 0:
headers = [cell.value for cell in ws[1]]
# 读取现有数据到字典 (时间 -> 行号)
for row_idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2):
if not row or not row[0]: continue
time_key = normalize_time(row[0])
if time_key in existing_data:
log_message(f"警告: 发现重复时间点: {time_key} (行 {row_idx})")
existing_data[time_key] = row_idx
log_message(f"成功加载现有5分钟数据文件: {filename} (共 {len(existing_data)} 条记录)")
break
except PermissionError:
if attempt < 2:
log_message(f"文件被占用,等待5秒后重试 ({attempt+1}/3)")
time.sleep(5)
else:
log_message("无法打开文件,创建新工作簿")
wb = Workbook()
ws = wb.active
ws.title = f"{LOCATION_NAME}5分钟级数据"
headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级",
"原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"]
ws.append(headers)
else:
wb = Workbook()
ws = wb.active
ws.title = f"{LOCATION_NAME}5分钟极数据"
headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级",
"原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"]
ws.append(headers)
log_message(f"创建新的5分钟数据文件: {filename}")
if not headers:
headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级",
"原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"]
# 处理实况数据
if condition_row:
current_time = condition_row[0]
normalized_current = normalize_time(current_time)
if normalized_current in existing_data:
row_idx = existing_data[normalized_current]
# 更新所有字段(覆盖旧数据)
for col_idx in range(1, len(condition_row) + 1):
new_value = condition_row[col_idx - 1]
ws.cell(row=row_idx, column=col_idx, value=new_value)
log_message(f"更新5分钟时间点: {normalized_current} (行 {row_idx})")
else:
ws.append(condition_row)
# 更新字典,记录新行号
existing_data[normalized_current] = ws.max_row
log_message(f"新增5分钟时间点: {normalized_current} (行 {ws.max_row})")
# 保存文件
if safe_save(wb, filename):
log_message(f"5分钟数据保存成功: {filename}")
return filename
else:
log_message(f"5分钟数据保存失败")
return None
except Exception as e:
log_message(f"更新5分钟数据文件时出错: {str(e)}")
traceback.print_exc()
return None
def calculate_solar_parameters(dt, longitude, latitude):
"""计算太阳高度角、方位角和地外辐照度(高度角和方位角保留两位小数)"""
try:
# 将时间转换为UTC+0
utc_time = dt - timedelta(hours=8)
n = utc_time.timetuple().tm_yday # 年中的第几天
# 计算太阳赤纬 (δ)
delta = 23.45 * math.sin(math.radians(360 * (284 + n) / 365))
delta_rad = math.radians(delta)
# 计算时角 (ω)
utc_hour = utc_time.hour + utc_time.minute/60.0
omega = (utc_hour - 12) * 15 + longitude
omega_rad = math.radians(omega)
# 纬度转弧度
phi_rad = math.radians(latitude)
# 计算太阳高度角 (α)
sin_alpha = math.sin(phi_rad) * math.sin(delta_rad) + math.cos(phi_rad) * math.cos(delta_rad) * math.cos(omega_rad)
alpha = math.degrees(math.asin(sin_alpha))
# 计算太阳方位角 (γ)
sin_gamma = -math.cos(delta_rad) * math.sin(omega_rad) / math.cos(math.radians(alpha))
cos_gamma = (math.sin(delta_rad) * math.cos(phi_rad) -
math.cos(delta_rad) * math.sin(phi_rad) * math.cos(omega_rad)) / math.cos(math.radians(alpha))
gamma = math.degrees(math.atan2(sin_gamma, cos_gamma))
gamma = (gamma + 360) % 360 # 转换为0-360度范围
# 计算地外辐照度 (W/m²)
g0 = 1367 * (1 + 0.033 * math.cos(math.radians(360 * n / 365))) * sin_alpha
g0 = max(0, g0) # 确保非负
# 保留两位小数
return round(alpha, 2), round(gamma, 2), round(g0, 1)
except Exception as e:
log_message(f"计算太阳参数错误: {str(e)}")
return None, None, None
def process_minutely_data(fivemin_file):
"""处理分钟级数据并添加太阳参数"""
try:
log_message("="*50)
log_message(f"开始生成分钟级数据...")
# 检查文件是否存在
if not os.path.exists(fivemin_file):
log_message(f"错误: 5分钟数据文件不存在: {fivemin_file}")
return None
# 读取5分钟数据
log_message(f"读取5分钟数据文件: {fivemin_file}")
try:
fivemin_df = pd.read_excel(fivemin_file)
log_message(f"读取5分钟数据成功,共 {len(fivemin_df)} 行")
except Exception as e:
log_message(f"读取5分钟数据文件失败: {str(e)}")
return None
# 查找时间列
time_col = next((col for col in fivemin_df.columns if '时间' in col), None)
if not time_col:
log_message("未找到时间列")
return None
# 转换时间格式并处理重复值
fivemin_df[time_col] = pd.to_datetime(fivemin_df[time_col], errors='coerce')
# 删除无效行
original_count = len(fivemin_df)
fivemin_df = fivemin_df.dropna(subset=[time_col])
if original_count > len(fivemin_df):
log_message(f"删除了 {original_count - len(fivemin_df)} 行无效时间数据")
# 处理重复时间点 - 保留最后一个
duplicate_mask = fivemin_df.duplicated(subset=[time_col], keep='last')
if duplicate_mask.any():
log_message(f"发现 {duplicate_mask.sum()} 个重复时间点,保留最后一个")
fivemin_df = fivemin_df[~duplicate_mask]
# 按时间排序
fivemin_df = fivemin_df.sort_values(by=time_col)
fivemin_df.set_index(time_col, inplace=True)
# 创建分钟级时间索引
start_time = fivemin_df.index.min()
end_time = fivemin_df.index.max()
log_message(f"时间范围: {start_time} 至 {end_time}")
minutely_index = pd.date_range(start=start_time, end=end_time, freq='1min')
minutely_df = pd.DataFrame(index=minutely_index)
log_message(f"将生成 {len(minutely_index)} 条分钟级数据")
# 插值处理 - 确保时间戳严格递增
base_time = start_time
fivemin_timestamps = (fivemin_df.index - base_time).total_seconds()
# 检查时间戳是否严格递增
if not (np.diff(fivemin_timestamps) > 0).all():
log_message("时间戳不是严格递增,尝试修复...")
# 重新排序
fivemin_df = fivemin_df.sort_index()
fivemin_timestamps = (fivemin_df.index - base_time).total_seconds()
minutely_timestamps = (minutely_df.index - base_time).total_seconds()
# 对连续参数进行插值
log_message("\n开始数据插值处理...")
for param in ['风速(m/s)', '温度(℃)', '湿度(%)', '气压(hPa)']:
if param in fivemin_df.columns:
param_data = fivemin_df[param].dropna()
if len(param_data) >= 3:
log_message(f"对 {param} 使用三次样条插值 (数据点: {len(param_data)})")
cs = CubicSpline(fivemin_timestamps, param_data.values)
interpolated_values = cs(minutely_timestamps)
# 对温度和风速保留一位小数
if param in ['温度(℃)', '风速(m/s)']:
interpolated_values = np.round(interpolated_values, 1)
minutely_df[param] = interpolated_values
else:
log_message(f"对 {param} 使用前向填充 (有效点不足: {len(param_data)})")
minutely_df[param] = fivemin_df[param].resample('1min').ffill()
# 处理降水量
if '降水量(mm)' in fivemin_df.columns:
log_message("处理降水量数据")
# 将5分钟降水量转换为每分钟降水量
precip_min = fivemin_df['降水量(mm)'].resample('1min').asfreq().ffill()
minutely_df['降水量(mm)'] = precip_min / 5.0 # 平均分配到每分钟
# 处理文本数据
for param in ['天气状况', '风向']:
if param in fivemin_df.columns:
log_message(f"处理 {param} 数据")
minutely_df[param] = fivemin_df[param].resample('1min').ffill()
# 数值列保留适当的小数位数
# 温度和风速保留一位小数
if '温度(℃)' in minutely_df.columns:
minutely_df['温度(℃)'] = minutely_df['温度(℃)'].round(1)
if '风速(m/s)' in minutely_df.columns:
minutely_df['风速(m/s)'] = minutely_df['风速(m/s)'].round(1)
# 其他数值列保留一位小数
other_numeric_cols = ['湿度(%)', '气压(hPa)', '降水量(mm)']
for col in other_numeric_cols:
if col in minutely_df.columns:
minutely_df[col] = minutely_df[col].round(1)
# 计算太阳参数
log_message(f"\n开始计算太阳位置和辐照度...")
solar_data = []
total_points = len(minutely_df)
last_progress = -1
start_time = time.time()
for i, dt in enumerate(minutely_df.index):
alpha, gamma, g0 = calculate_solar_parameters(dt, LONGITUDE, LATITUDE)
solar_data.append({
'太阳高度角(度)': alpha,
'太阳方位角(度)': gamma,
'地外辐照度(W/m2)': g0
})
# 显示进度
progress = int((i + 1) / total_points * 100)
if progress != last_progress and progress % 10 == 0:
elapsed = time.time() - start_time
remaining = (total_points - i) * (elapsed / (i+1))
log_message(f"计算进度: {progress}% | 预计剩余时间: {remaining:.1f}秒")
last_progress = progress
solar_df = pd.DataFrame(solar_data, index=minutely_df.index)
minutely_df = pd.concat([minutely_df, solar_df], axis=1)
# 生成输出文件名
current_month = datetime.now().strftime("%Y%m")
filename = f"{LOCATION_NAME}_分钟级数据_{current_month}.xlsx"
filepath = os.path.join(os.path.abspath(MINUTELY_DIR), filename)
# 保存分钟级数据
minutely_df.reset_index(inplace=True)
minutely_df.rename(columns={'index': '时间'}, inplace=True)
# 创建Excel工作簿
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "分钟级数据"
# 写入数据
log_message(f"\n正在写入分钟级数据到Excel...")
for col_idx, col_name in enumerate(minutely_df.columns, 1):
ws.cell(row=1, column=col_idx, value=col_name)
for r_idx, row in enumerate(minutely_df.values, 2):
for c_idx, value in enumerate(row, 1):
ws.cell(row=r_idx, column=c_idx, value=value)
# 应用样式
header_fill = PatternFill(start_color="1F4E78", end_color="1F4E78", fill_type="solid")
header_font = Font(color="FFFFFF", bold=True)
header_alignment = Alignment(horizontal="center", vertical="center")
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = header_alignment
# 设置列宽
log_message("设置列宽...")
for col in ws.columns:
max_length = 0
for cell in col:
try:
if cell.value and len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2) * 1.2
ws.column_dimensions[col[0].column_letter].width = min(adjusted_width, 50)
# 保存文件
if safe_save(wb, filepath):
log_message(f"分钟级数据保存成功: {filepath}")
return filepath
else:
log_message(f"分钟级数据保存失败")
return None
except Exception as e:
log_message(f"处理分钟级数据时出错: {str(e)}")
traceback.print_exc()
return None
def get_weather_data():
"""获取天气数据并处理"""
log_message(f"\n获取{LOCATION_NAME}实况天气数据...")
try:
# 获取实况数据
condition_row = get_condition_data()
if not condition_row:
log_message("获取实况数据失败")
return False
# 更新5分钟数据文件
fivemin_file = update_fivemin_excel(condition_row)
if not fivemin_file:
log_message("5分钟数据更新失败")
return False
# 处理分钟级数据(只在5分钟整点处理)
current_minute = datetime.now().minute
if current_minute % 5 == 0: # 0分、5分、10分等
log_message(f"当前时间 {datetime.now().strftime('%H:%M')} 是5分钟整点,开始生成分钟级数据")
minutely_file = process_minutely_data(fivemin_file)
if minutely_file:
log_message(f"\n数据处理完成!")
log_message(f" 5分钟数据 -> {fivemin_file}")
log_message(f" 分钟数据 -> {minutely_file}")
else:
log_message(f"分钟数据处理失败")
else:
log_message(f"跳过分钟级数据处理(非5分钟整点)")
return True
except Exception as e:
log_message(f"获取天气数据时出错: {str(e)}")
traceback.print_exc()
return False
# 主程序循环 - 确保实时输出日志
if __name__ == '__main__':
try:
# 程序启动横幅 - 使用print确保直接输出
print("=" * 70, flush=True)
print(f"{LOCATION_NAME}气象数据自动获取与处理系统", flush=True)
print(f"系统启动时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", flush=True)
print(f"Python版本: {sys.version}", flush=True)
print(f"工作目录: {os.getcwd()}", flush=True)
print(f"5分钟数据存储目录: {os.path.abspath(FIVEMIN_DIR)}", flush=True)
print(f"分钟数据存储目录: {os.path.abspath(MINUTELY_DIR)}", flush=True)
print("=" * 70, flush=True)
print("按 Ctrl+C 停止程序", flush=True)
print("=" * 70, flush=True)
# 初始执行一次
log_message("\n执行初始数据获取...")
success = get_weather_data()
# 主循环
current_month = datetime.now().month
while True:
try:
# 计算到下一个整分钟的时间(确保在0秒执行)
now = datetime.now()
next_minute = now.replace(second=0, microsecond=0) + timedelta(minutes=1)
wait_seconds = (next_minute - now).total_seconds()
if wait_seconds > 0:
wait_minutes = wait_seconds / 60
log_message(f"\n等待 {wait_seconds:.1f} 秒 ({wait_minutes:.2f} 分钟) 到下一个整分钟")
time.sleep(wait_seconds)
# 检查月份变化
new_month = datetime.now().month
if new_month != current_month:
log_message(f"\n月份变更: {current_month} -> {new_month}")
current_month = new_month
# 获取并处理数据
log_message("\n" + "="*70)
log_message(f"开始新一轮数据采集 ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})")
success = get_weather_data()
except KeyboardInterrupt:
log_message("\n程序已停止")
break
except Exception as e:
# 获取详细错误信息
exc_type, exc_value, exc_traceback = sys.exc_info()
error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
error_msg = ''.join(error_details)
log_message(f"\n主循环错误: {str(e)}")
log_message(f"详细错误信息:\n{error_msg}")
log_message("5分钟后重试...")
time.sleep(300) # 5分钟后重试
# 重新计算下一个整分钟时间
now = datetime.now()
next_minute = now.replace(second=0, microsecond=0) + timedelta(minutes=1)
current_month = datetime.now().month
log_message(f"新的重试时间: {next_minute.strftime('%Y-%m-%d %H:%M:%S')}")
except Exception as e:
# 启动错误处理
print(f"程序启动失败: {str(e)}", flush=True)
traceback.print_exc(file=sys.stdout)把这段程序也用上面那个方法实现在Python程序在运行时在页面显示结果
最新发布