pandas:sort_values排序后concat"失效“问题解决

本文探讨了在使用Pandas进行数据排序后,如何正确地利用concat函数连接已排序的数据框,避免因索引不匹配导致的排序失效问题。通过实例演示了reset_index方法的应用,确保数据框在连接时保持正确的排序状态。

pandas:sort_values排序后concat"失效“问题解决

问题描述:

import pandas as pd
import numpy as np
data1=np.array([[1,1,1],[3,2,2],[2,3,3]])
index=np.arange(1,4,1)#起点,终点+1,步长
columns=list('ABC')
df1=pd.DataFrame(data1,index=index,columns=columns)
print('df1:\n',df1)

数据1

df1.sort_values(inplace=True,by=['A'])#排序
print('排序后的df1(注意index也变化了):\n',df1)

数据1.1

#创建一个df2
df2=pd.DataFrame([4,4,4],index=[1,2,3],columns=['D'])
connect=pd.concat([df1,df2],axis=1,sort=True)
print('希望连接排序后的df1和df2:\n',connect)

connect_data
此时我们发现,刚刚明明排好的数据df1,连接df2时,为什么又复原了?因为concat连接是根据那个索引值进行连接(第一行连接第一行……),索引值在排序的时候也跟着排好了。

我们这么解决:

#直接重置2个索引,让它们一致
df1.reset_index(inplace=True,drop=True)
df2.reset_index(inplace=True,drop=True)
connect=pd.concat([df1,df2],axis=1,sort=True)#0:纵向合并;1;横向合并
print('成功连接排序后的df1和df2:\n',connect)

在这里插入图片描述

# -*- coding: utf-8 -*- """整合气象数据获取与处理系统 功能: 1. 在5分钟整点(0分、5分、10分等)获取实况气象数据 2. 将5分钟数据按月保存到Excel(存储在"5分钟级数据"文件夹) 3. 将5分钟数据插值为分钟级数据 4. 计算每分钟的太阳位置(高度角、方位角)和地外辐照度(保留两位小数) 5. 将分钟级数据按月保存到独立Excel(存储在"分钟级数据"文件夹) """ import urllib, urllib3, sys, uuid import ssl import json from openpyxl import Workbook, load_workbook from datetime import datetime, timedelta import warnings from urllib3.exceptions import InsecureRequestWarning import time import os import pandas as pd import numpy as np from scipy.interpolate import CubicSpline import math import openpyxl from openpyxl.styles import PatternFill, Font, Alignment import traceback # 强制刷新标准输出 - 确保实时输出 sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(line_buffering=True) # 确保实时输出 else: # 对于不支持reconfigure的环境 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1) # 禁用SSL警告 warnings.filterwarnings("ignore", category=InsecureRequestWarning) # 创建绕过SSL验证的连接池 http = urllib3.PoolManager(cert_reqs='CERT_NONE') # API配置 host = 'http://aliv18.data.moji.com' condition_path = '/whapi/json/alicityweather/condition' method = 'POST' appcode = '2c642e12da774c918fa78ac56b5b4c50' # 保定地理位置 LOCATION_NAME = "保定" LONGITUDE = 115.480 # 东经 LATITUDE = 38.855 # 北纬 # 定义API请求头 API_HEADERS = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Authorization': 'APPCODE ' + appcode } # 实况数据API参数 CONDITION_TOKEN = '50b53ff8dd7d9fa320d3d3ca32cf8ed1' CITY_ID = '1819' # 保定城市ID # 创建数据存储目录 FIVEMIN_DIR = "5分钟级数据" MINUTELY_DIR = "分钟级数据" os.makedirs(FIVEMIN_DIR, exist_ok=True) os.makedirs(MINUTELY_DIR, exist_ok=True) def log_message(message): """记录日志到屏幕 - 确保实时输出""" timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') log_line = f"[{timestamp}] {message}" # 直接输出到控制台,强制刷新 print(log_line, flush=True) # 同时写入日志文件以便调试 with open("weather_system.log", "a", encoding="utf-8") as log_file: log_file.write(log_line + "\n") log_file.flush() # 立即刷新文件写入 def safe_save(wb, filename): """安全保存Excel文件,避免占用问题""" log_message(f"正在保存文件: {filename}") # 确保目标目录存在 os.makedirs(os.path.dirname(filename), exist_ok=True) for attempt in range(5): # 增加尝试次数 try: # 使用绝对路径避免相对路径问题 abs_filename = os.path.abspath(filename) temp_file = abs_filename.replace(".xlsx", f"_temp_{attempt}.xlsx") log_message(f"尝试保存临时文件: {temp_file}") wb.save(temp_file) # 替换原文件 if os.path.exists(abs_filename): os.remove(abs_filename) os.rename(temp_file, abs_filename) log_message(f"文件保存成功: {abs_filename}") return True except (PermissionError, OSError) as e: log_message(f"文件保存失败 ({attempt+1}/5): {str(e)}") time.sleep(3) except Exception as e: log_message(f"文件保存时发生意外错误: {str(e)}") traceback.print_exc() time.sleep(3) return False def get_condition_data(): """获取实况天气数据""" try: log_message(f"正在获取{LOCATION_NAME}实况天气数据...") condition_bodys = {'token': CONDITION_TOKEN, 'cityId': CITY_ID} post_data_condition = urllib.parse.urlencode(condition_bodys).encode('utf-8') condition_url = host + condition_path response_condition = http.request('POST', condition_url, body=post_data_condition, headers=API_HEADERS) content_condition = response_condition.data.decode('utf-8') condition_data = json.loads(content_condition) log_message(f"API响应内容: {json.dumps(condition_data, ensure_ascii=False)[:200]}...") if condition_data.get('code') != 0: log_message(f"实况API请求失败: 代码 {condition_data.get('code')}, 消息: {condition_data.get('msg')}") return None condition_info = condition_data['data']['condition'] current_time = datetime.now() # 对齐到最近的5分钟 aligned_minute = (current_time.minute // 5) * 5 aligned_time = current_time.replace(minute=aligned_minute, second=0, microsecond=0) time_str = aligned_time.strftime("%Y-%m-%d %H:%M") # 计算风速(m/s)并保留一位小数 raw_wind_speed = condition_info.get('windSpeed', '') wind_speed_mps = '' if raw_wind_speed and raw_wind_speed.replace('.', '', 1).isdigit(): try: wind_speed_mps = round(float(raw_wind_speed) / 3.6, 1) # 保留一位小数 except: wind_speed_mps = '' # 处理温度值,确保保留一位小数 temp_value = condition_info.get('temp', '') if temp_value and temp_value.replace('.', '', 1).isdigit(): try: temp_value = round(float(temp_value), 1) # 温度保留一位小数 except: pass # 构建实况数据行 condition_row = [ time_str, temp_value, # 温度保留一位小数 condition_info.get('condition', ''), condition_info.get('humidity', ''), condition_info.get('windDir', ''), condition_info.get('windLevel', ''), raw_wind_speed, wind_speed_mps, # 风速保留一位小数 condition_info.get('precip', '0.0'), condition_info.get('pressure', '') ] log_message(f"成功获取实况数据: {time_str}") log_message(f" 温度: {temp_value}℃, 天气状况: {condition_info.get('condition', '')}, 湿度: {condition_info.get('humidity', '')}%") return condition_row except Exception as e: log_message(f"获取实况数据时出错: {str(e)}") traceback.print_exc() return None def normalize_time(time_str): """统一时间格式为 YYYY-MM-DD HH:MM""" try: # 如果已经是字符串,尝试解析 if isinstance(time_str, str): # 尝试解析为日期时间 if len(time_str) == 16: # 格式为 "YYYY-MM-DD HH:MM" return time_str elif len(time_str) == 19: # 包含秒数 "YYYY-MM-DD HH:MM:00" return time_str[:16] elif ' ' not in time_str: # 只有日期部分 return time_str + " 00:00" else: # 其他格式,尝试转换 dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") return dt.strftime("%Y-%m-%d %H:%M") # 如果是datetime对象 elif isinstance(time_str, datetime): return time_str.strftime("%Y-%m-%d %H:%M") except Exception: pass # 无法识别的格式,原样返回 return str(time_str) def update_fivemin_excel(condition_row): """更新5分钟级Excel文件""" try: current_month = datetime.now().strftime("%Y%m") # 确保目录存在 os.makedirs(FIVEMIN_DIR, exist_ok=True) filename = os.path.join(os.path.abspath(FIVEMIN_DIR), f"{LOCATION_NAME}_5分钟级数据_{current_month}.xlsx") wb = None ws = None existing_data = {} # 存储时间点 -> 行号的映射 headers = [] log_message(f"正在更新5分钟数据文件: {filename}") if os.path.exists(filename): for attempt in range(3): try: log_message(f"尝试加载现有5分钟数据文件... (尝试 {attempt+1}/3)") wb = load_workbook(filename) ws = wb.active if ws.max_row > 0: headers = [cell.value for cell in ws[1]] # 读取现有数据到字典 (时间 -> 行号) for row_idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2): if not row or not row[0]: continue time_key = normalize_time(row[0]) if time_key in existing_data: log_message(f"警告: 发现重复时间点: {time_key} (行 {row_idx})") existing_data[time_key] = row_idx log_message(f"成功加载现有5分钟数据文件: {filename} (共 {len(existing_data)} 条记录)") break except PermissionError: if attempt < 2: log_message(f"文件被占用,等待5秒后重试 ({attempt+1}/3)") time.sleep(5) else: log_message("无法打开文件,创建新工作簿") wb = Workbook() ws = wb.active ws.title = f"{LOCATION_NAME}5分钟级数据" headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级", "原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"] ws.append(headers) else: wb = Workbook() ws = wb.active ws.title = f"{LOCATION_NAME}5分钟极数据" headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级", "原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"] ws.append(headers) log_message(f"创建新的5分钟数据文件: {filename}") if not headers: headers = ["时间", "温度(℃)", "天气状况", "湿度(%)", "风向", "风力等级", "原始风速(km/h)", "风速(m/s)", "降水量(mm)", "气压(hPa)"] # 处理实况数据 if condition_row: current_time = condition_row[0] normalized_current = normalize_time(current_time) if normalized_current in existing_data: row_idx = existing_data[normalized_current] # 更新所有字段(覆盖旧数据) for col_idx in range(1, len(condition_row) + 1): new_value = condition_row[col_idx - 1] ws.cell(row=row_idx, column=col_idx, value=new_value) log_message(f"更新5分钟时间点: {normalized_current} (行 {row_idx})") else: ws.append(condition_row) # 更新字典,记录新行号 existing_data[normalized_current] = ws.max_row log_message(f"新增5分钟时间点: {normalized_current} (行 {ws.max_row})") # 保存文件 if safe_save(wb, filename): log_message(f"5分钟数据保存成功: {filename}") return filename else: log_message(f"5分钟数据保存失败") return None except Exception as e: log_message(f"更新5分钟数据文件时出错: {str(e)}") traceback.print_exc() return None def calculate_solar_parameters(dt, longitude, latitude): """计算太阳高度角、方位角和地外辐照度(高度角和方位角保留两位小数)""" try: # 将时间转换为UTC+0 utc_time = dt - timedelta(hours=8) n = utc_time.timetuple().tm_yday # 年中的第几天 # 计算太阳赤纬 (δ) delta = 23.45 * math.sin(math.radians(360 * (284 + n) / 365)) delta_rad = math.radians(delta) # 计算时角 (ω) utc_hour = utc_time.hour + utc_time.minute/60.0 omega = (utc_hour - 12) * 15 + longitude omega_rad = math.radians(omega) # 纬度转弧度 phi_rad = math.radians(latitude) # 计算太阳高度角 (α) sin_alpha = math.sin(phi_rad) * math.sin(delta_rad) + math.cos(phi_rad) * math.cos(delta_rad) * math.cos(omega_rad) alpha = math.degrees(math.asin(sin_alpha)) # 计算太阳方位角 (γ) sin_gamma = -math.cos(delta_rad) * math.sin(omega_rad) / math.cos(math.radians(alpha)) cos_gamma = (math.sin(delta_rad) * math.cos(phi_rad) - math.cos(delta_rad) * math.sin(phi_rad) * math.cos(omega_rad)) / math.cos(math.radians(alpha)) gamma = math.degrees(math.atan2(sin_gamma, cos_gamma)) gamma = (gamma + 360) % 360 # 转换为0-360度范围 # 计算地外辐照度 (W/m²) g0 = 1367 * (1 + 0.033 * math.cos(math.radians(360 * n / 365))) * sin_alpha g0 = max(0, g0) # 确保非负 # 保留两位小数 return round(alpha, 2), round(gamma, 2), round(g0, 1) except Exception as e: log_message(f"计算太阳参数错误: {str(e)}") return None, None, None def process_minutely_data(fivemin_file): """处理分钟级数据并添加太阳参数""" try: log_message("="*50) log_message(f"开始生成分钟级数据...") # 检查文件是否存在 if not os.path.exists(fivemin_file): log_message(f"错误: 5分钟数据文件不存在: {fivemin_file}") return None # 读取5分钟数据 log_message(f"读取5分钟数据文件: {fivemin_file}") try: fivemin_df = pd.read_excel(fivemin_file) log_message(f"读取5分钟数据成功,共 {len(fivemin_df)} 行") except Exception as e: log_message(f"读取5分钟数据文件失败: {str(e)}") return None # 查找时间列 time_col = next((col for col in fivemin_df.columns if '时间' in col), None) if not time_col: log_message("未找到时间列") return None # 转换时间格式并处理重复值 fivemin_df[time_col] = pd.to_datetime(fivemin_df[time_col], errors='coerce') # 删除无效行 original_count = len(fivemin_df) fivemin_df = fivemin_df.dropna(subset=[time_col]) if original_count > len(fivemin_df): log_message(f"删除了 {original_count - len(fivemin_df)} 行无效时间数据") # 处理重复时间点 - 保留最后一个 duplicate_mask = fivemin_df.duplicated(subset=[time_col], keep='last') if duplicate_mask.any(): log_message(f"发现 {duplicate_mask.sum()} 个重复时间点,保留最后一个") fivemin_df = fivemin_df[~duplicate_mask] # 按时间排序 fivemin_df = fivemin_df.sort_values(by=time_col) fivemin_df.set_index(time_col, inplace=True) # 创建分钟级时间索引 start_time = fivemin_df.index.min() end_time = fivemin_df.index.max() log_message(f"时间范围: {start_time} 至 {end_time}") minutely_index = pd.date_range(start=start_time, end=end_time, freq='1min') minutely_df = pd.DataFrame(index=minutely_index) log_message(f"将生成 {len(minutely_index)} 条分钟级数据") # 插值处理 - 确保时间戳严格递增 base_time = start_time fivemin_timestamps = (fivemin_df.index - base_time).total_seconds() # 检查时间戳是否严格递增 if not (np.diff(fivemin_timestamps) > 0).all(): log_message("时间戳不是严格递增,尝试修复...") # 重新排序 fivemin_df = fivemin_df.sort_index() fivemin_timestamps = (fivemin_df.index - base_time).total_seconds() minutely_timestamps = (minutely_df.index - base_time).total_seconds() # 对连续参数进行插值 log_message("\n开始数据插值处理...") for param in ['风速(m/s)', '温度(℃)', '湿度(%)', '气压(hPa)']: if param in fivemin_df.columns: param_data = fivemin_df[param].dropna() if len(param_data) >= 3: log_message(f"对 {param} 使用三次样条插值 (数据点: {len(param_data)})") cs = CubicSpline(fivemin_timestamps, param_data.values) interpolated_values = cs(minutely_timestamps) # 对温度和风速保留一位小数 if param in ['温度(℃)', '风速(m/s)']: interpolated_values = np.round(interpolated_values, 1) minutely_df[param] = interpolated_values else: log_message(f"对 {param} 使用前向填充 (有效点不足: {len(param_data)})") minutely_df[param] = fivemin_df[param].resample('1min').ffill() # 处理降水量 if '降水量(mm)' in fivemin_df.columns: log_message("处理降水量数据") # 将5分钟降水量转换为每分钟降水量 precip_min = fivemin_df['降水量(mm)'].resample('1min').asfreq().ffill() minutely_df['降水量(mm)'] = precip_min / 5.0 # 平均分配到每分钟 # 处理文本数据 for param in ['天气状况', '风向']: if param in fivemin_df.columns: log_message(f"处理 {param} 数据") minutely_df[param] = fivemin_df[param].resample('1min').ffill() # 数值列保留适当的小数位数 # 温度和风速保留一位小数 if '温度(℃)' in minutely_df.columns: minutely_df['温度(℃)'] = minutely_df['温度(℃)'].round(1) if '风速(m/s)' in minutely_df.columns: minutely_df['风速(m/s)'] = minutely_df['风速(m/s)'].round(1) # 其他数值列保留一位小数 other_numeric_cols = ['湿度(%)', '气压(hPa)', '降水量(mm)'] for col in other_numeric_cols: if col in minutely_df.columns: minutely_df[col] = minutely_df[col].round(1) # 计算太阳参数 log_message(f"\n开始计算太阳位置和辐照度...") solar_data = [] total_points = len(minutely_df) last_progress = -1 start_time = time.time() for i, dt in enumerate(minutely_df.index): alpha, gamma, g0 = calculate_solar_parameters(dt, LONGITUDE, LATITUDE) solar_data.append({ '太阳高度角(度)': alpha, '太阳方位角(度)': gamma, '地外辐照度(W/m2)': g0 }) # 显示进度 progress = int((i + 1) / total_points * 100) if progress != last_progress and progress % 10 == 0: elapsed = time.time() - start_time remaining = (total_points - i) * (elapsed / (i+1)) log_message(f"计算进度: {progress}% | 预计剩余时间: {remaining:.1f}秒") last_progress = progress solar_df = pd.DataFrame(solar_data, index=minutely_df.index) minutely_df = pd.concat([minutely_df, solar_df], axis=1) # 生成输出文件名 current_month = datetime.now().strftime("%Y%m") filename = f"{LOCATION_NAME}_分钟级数据_{current_month}.xlsx" filepath = os.path.join(os.path.abspath(MINUTELY_DIR), filename) # 保存分钟级数据 minutely_df.reset_index(inplace=True) minutely_df.rename(columns={'index': '时间'}, inplace=True) # 创建Excel工作簿 wb = openpyxl.Workbook() ws = wb.active ws.title = "分钟级数据" # 写入数据 log_message(f"\n正在写入分钟级数据到Excel...") for col_idx, col_name in enumerate(minutely_df.columns, 1): ws.cell(row=1, column=col_idx, value=col_name) for r_idx, row in enumerate(minutely_df.values, 2): for c_idx, value in enumerate(row, 1): ws.cell(row=r_idx, column=c_idx, value=value) # 应用样式 header_fill = PatternFill(start_color="1F4E78", end_color="1F4E78", fill_type="solid") header_font = Font(color="FFFFFF", bold=True) header_alignment = Alignment(horizontal="center", vertical="center") for cell in ws[1]: cell.fill = header_fill cell.font = header_font cell.alignment = header_alignment # 设置列宽 log_message("设置列宽...") for col in ws.columns: max_length = 0 for cell in col: try: if cell.value and len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = (max_length + 2) * 1.2 ws.column_dimensions[col[0].column_letter].width = min(adjusted_width, 50) # 保存文件 if safe_save(wb, filepath): log_message(f"分钟级数据保存成功: {filepath}") return filepath else: log_message(f"分钟级数据保存失败") return None except Exception as e: log_message(f"处理分钟级数据时出错: {str(e)}") traceback.print_exc() return None def get_weather_data(): """获取天气数据并处理""" log_message(f"\n获取{LOCATION_NAME}实况天气数据...") try: # 获取实况数据 condition_row = get_condition_data() if not condition_row: log_message("获取实况数据失败") return False # 更新5分钟数据文件 fivemin_file = update_fivemin_excel(condition_row) if not fivemin_file: log_message("5分钟数据更新失败") return False # 处理分钟级数据(只在5分钟整点处理) current_minute = datetime.now().minute if current_minute % 5 == 0: # 0分、5分、10分等 log_message(f"当前时间 {datetime.now().strftime('%H:%M')} 是5分钟整点,开始生成分钟级数据") minutely_file = process_minutely_data(fivemin_file) if minutely_file: log_message(f"\n数据处理完成!") log_message(f" 5分钟数据 -> {fivemin_file}") log_message(f" 分钟数据 -> {minutely_file}") else: log_message(f"分钟数据处理失败") else: log_message(f"跳过分钟级数据处理(非5分钟整点)") return True except Exception as e: log_message(f"获取天气数据时出错: {str(e)}") traceback.print_exc() return False # 主程序循环 - 确保实时输出日志 if __name__ == '__main__': try: # 程序启动横幅 - 使用print确保直接输出 print("=" * 70, flush=True) print(f"{LOCATION_NAME}气象数据自动获取与处理系统", flush=True) print(f"系统启动时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", flush=True) print(f"Python版本: {sys.version}", flush=True) print(f"工作目录: {os.getcwd()}", flush=True) print(f"5分钟数据存储目录: {os.path.abspath(FIVEMIN_DIR)}", flush=True) print(f"分钟数据存储目录: {os.path.abspath(MINUTELY_DIR)}", flush=True) print("=" * 70, flush=True) print("按 Ctrl+C 停止程序", flush=True) print("=" * 70, flush=True) # 初始执行一次 log_message("\n执行初始数据获取...") success = get_weather_data() # 主循环 current_month = datetime.now().month while True: try: # 计算到下一个整分钟的时间(确保在0秒执行) now = datetime.now() next_minute = now.replace(second=0, microsecond=0) + timedelta(minutes=1) wait_seconds = (next_minute - now).total_seconds() if wait_seconds > 0: wait_minutes = wait_seconds / 60 log_message(f"\n等待 {wait_seconds:.1f} 秒 ({wait_minutes:.2f} 分钟) 到下一个整分钟") time.sleep(wait_seconds) # 检查月份变化 new_month = datetime.now().month if new_month != current_month: log_message(f"\n月份变更: {current_month} -> {new_month}") current_month = new_month # 获取并处理数据 log_message("\n" + "="*70) log_message(f"开始新一轮数据采集 ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})") success = get_weather_data() except KeyboardInterrupt: log_message("\n程序已停止") break except Exception as e: # 获取详细错误信息 exc_type, exc_value, exc_traceback = sys.exc_info() error_details = traceback.format_exception(exc_type, exc_value, exc_traceback) error_msg = ''.join(error_details) log_message(f"\n主循环错误: {str(e)}") log_message(f"详细错误信息:\n{error_msg}") log_message("5分钟后重试...") time.sleep(300) # 5分钟后重试 # 重新计算下一个整分钟时间 now = datetime.now() next_minute = now.replace(second=0, microsecond=0) + timedelta(minutes=1) current_month = datetime.now().month log_message(f"新的重试时间: {next_minute.strftime('%Y-%m-%d %H:%M:%S')}") except Exception as e: # 启动错误处理 print(f"程序启动失败: {str(e)}", flush=True) traceback.print_exc(file=sys.stdout)把这段程序也用上面那个方法实现在Python程序在运行时在页面显示结果
07-16
import os import pandas as pd import warnings from collections import defaultdict # 关闭警告 warnings.filterwarnings('ignore') # 文件路径设置 desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop') folder_path = os.path.join(desktop_path, '洛阳') output_path = os.path.join(desktop_path, '故障分析结果') os.makedirs(output_path, exist_ok=True) def smart_column_mapping(df): """智能匹配列名并返回映射字典""" # 创建列名映射字典 col_map = {} # 可能的列名变体(不区分大小写和空格) possible_names = { 'fsu': ['厂家', 'fsu厂家', '生产厂家', '厂商'], 'device': ['设备类型', '设备名称', '设备型号', '类型'], 'alarm': ['告警描述', '告警内容', '告警信息', '告警详情'] } # 遍历实际列名进行匹配 for actual_col in df.columns: lowered = str(actual_col).strip().lower().replace(' ', '') # FSU厂家列匹配 if any(kw in lowered for kw in possible_names['fsu']): col_map['fsu'] = actual_col # 设备类型列匹配 if any(kw in lowered for kw in possible_names['device']): col_map['device'] = actual_col # 告警描述列匹配 if any(kw in lowered for kw in possible_names['alarm']): col_map['alarm'] = actual_col return col_map def process_file(file_path): # 读取Excel(增加处理合并单元格的功能) df = pd.read_excel(file_path, header=None) # 先不跳过任何行 # 自动检测标题行(查找包含关键字的行) header_row = None for i in range(min(5, len(df))): # 检查前5行 if any('站点' in str(cell) for cell in df.iloc[i]) and \ any('设备类型' in str(cell) for cell in df.iloc[i]): header_row = i break # 重新读取文件,指定标题行 if header_row is not None: df = pd.read_excel(file_path, header=header_row) print(f"检测到标题行: {header_row+1}") else: print(f"警告: 无法确定标题行,使用默认读取") df = pd.read_excel(file_path, skiprows=2) # 原始逻辑 # 打印实际列名用于调试 print(f"文件列名: {list(df.columns)}") # 获取列映射 col_map = smart_column_mapping(df) print(f"列映射: {col_map}") if not all(key in col_map for key in ['fsu', 'device', 'alarm']): missing = [key for key in ['fsu', 'device', 'alarm'] if key not in col_map] print(f"缺少必要列: {missing}") return None # 更灵活的筛选条件 device_mask = df[col_map['device']].astype(str).str.contains('开关电源|电源柜|电源设备') alarm_mask = df[col_map['alarm']].astype(str).str.contains('交流输入故障|AC输入故障|市电故障') filtered_df = df[device_mask & alarm_mask] # 打印匹配结果用于调试 print(f"找到匹配记录: {len(filtered_df)}条") # 提取必要字段 result_cols = [col_map['fsu'], col_map['device'], col_map['alarm']] return filtered_df[result_cols] if not filtered_df.empty else None # 主处理流程 all_results = [] fault_counts = defaultdict(int) for file in os.listdir(folder_path): if file.lower().endswith(('.xlsx', '.xls')): full_path = os.path.join(folder_path, file) print(f"\n{'='*40}") print(f"处理文件: {file}") try: result = process_file(full_path) if result is not None: all_results.append(result) # 统计厂家故障次数 for manufacturer in result[smart_column_mapping(result)['fsu']]: fault_counts[manufacturer] += 1 print(f"✅ 成功提取{len(result)}条记录") except Exception as e: print(f"处理失败: {str(e)}") # 结果输出 if all_results: final_df = pd.concat(all_results) detail_file = os.path.join(output_path, '故障详情.xlsx') final_df.to_excel(detail_file, index=False) print(f"\n导出详情到: {detail_file}") # 厂家统计 stats_df = pd.DataFrame({ '厂家': list(fault_counts.keys()), '故障次数': list(fault_counts.values()) }).sort_values('故障次数', ascending=False) stats_file = os.path.join(output_path, '厂家统计.xlsx') stats_df.to_excel(stats_file, index=False) print(f"导出统计到: {stats_file}") # 打印示例数据 print("\n示例数据:") print(final_df.head(3)) else: print("\n⚠️ 所有文件均未找到匹配数据") print("\n处理完成!") 处理文件: 6.12.xls 检测到标题行: 3 文件列名: ['序号', '地市', '告警编号', '区域', '站点', '机房', '机房类型', '设备', '设备类型', '业务类型', '告警标准编码', 'FSU厂家', '告警量', '告警级别', '告警时间', '告警接收时间', '消除时间', '消除接收时间', '告警时长(分钟)', '告警值', '告警描述', '告警编码', '告警逻辑分类', '告警逻辑子类', '确认人', '确认时间', '确认说明', '告警标准化名称', '告警标题', '屏 蔽类型'] 列映射: {'device': '屏蔽类型', 'fsu': 'FSU厂家', 'alarm': '告警描述'} 找到匹配记录: 0条,为什么没找到匹配记录,请解决问题
08-30
import baostock as bs import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import datetime, timedelta # ======================== # 1. 初始化 Baostock # ======================== bs.login() # ======================== # 2. 参数设置 # ======================== START_DATE = "2018-01-01" END_DATE = "2023-12-31" FACTORS = { 'pb': -1, # 价值因子:市净率越低越好 → 负向 'roe': 1, # 质量因子:ROE越高越好 → 正向 'momentum': 1 # 技术因子:过去60日收益率 → 正向 } WEIGHTS = [0.3, 0.4, 0.3] # 对应三个因子权重(可调整) TOP_N_PERCENT = 0.1 # 选前10% TRANSACTION_COST = 0.0003 # 单边千三 # 存储中间结果 portfolio_values = [] # 组合净值序列 dates_recorded = [] benchmark_values = [] # 沪深300基准 # ======================== # 3. 工具函数定义 # ======================== def get_all_stock_codes(start_date): """获取指定时间点存在的所有A股股票代码""" rs = bs.query_stock_basic() data_list = [] while (rs.error_code == '0') & rs.next(): data_list.append(rs.get_row_data()) df = pd.DataFrame(data_list, columns=rs.fields) # 过滤A股(排除科创板、B股等) df = df[df['code'].str.startswith(('sh.6', 'sz.0', 'sz.3'))] return df['code'].tolist() def is_new_stock(code, trade_date, days=360): """判断是否为上市不足days天的次新股""" info = bs.query_stock_basic(code=code) if info.error_code != '0': return True info_data = info.get_row_data() if not info_data: return True ipoDate = info_data[5] # 上市日期 if not ipoDate: return True ipo = datetime.strptime(ipoDate, "%Y-%m-%d") td = datetime.strptime(trade_date, "%Y-%m-%d") return (td - ipo).days < days def get_trading_calendar(start_date, end_date): """获取交易日历(月首个交易日)""" rs = bs.query_trade_dates(start_date=start_date, end_date=end_date) data = [] while rs.next(): data.append(rs.get_row_data()) df = pd.DataFrame(data, columns=rs.fields) df = df[df['is_trading_day'] == '1'] df['calendar_date'] = pd.to_datetime(df['calendar_date']) # 提取每月第一个交易日 first_days = df.resample('M', on='calendar_date').first()['calendar_date'] return first_days.dt.strftime('%Y-%m-%d').tolist() def get_historical_data(code, start, end, freq="d"): """获取个股历史行情""" rs = bs.query_history_k_data_plus( code, "date,open,high,low,close,volume", start_date=start, end_date=end, frequency=freq) data = [] while rs.next(): data.append(rs.get_row_data()) if not data: return None df = pd.DataFrame(data, columns=rs.fields) df['close'] = pd.to_numeric(df['close']) df['open'] = pd.to_numeric(df['open']) return df def get_fundamental_data(code, date): """获取某日附近最新财报中的 ROE 和 PB""" year = int(date[:4]) quarter_map = {"01": "1", "02": "1", "03": "1", "04": "2", "05": "2", "06": "2", "07": "3", "08": "3", "09": "3", "10": "4", "11": "4", "12": "4"} quarter = quarter_map[date[5:7]] # 查询季度财报 rs = bs.query_performance_data(code, year, int(quarter)) data = [] while rs.next(): data.append(rs.get_row_data()) if not data: return None df = pd.DataFrame(data, columns=rs.fields) latest = df.iloc[0] try: pb = float(latest['PB']) if latest['PB'] else np.nan roe = float(latest['roeAvg']) if latest['roeAvg'] else np.nan return {'pb': pb, 'roe': roe} except: return None def winsorize_series(series, limits=(0.01, 0.01)): """去极值:缩尾处理""" return series.clip(lower=series.quantile(limits[0]), upper=series.quantile(1-limits[1])) def zscore_normalize(series): """Z-Score标准化""" return (series - series.mean()) / series.std() # ======================== # 4. 主循环:按月调仓 # ======================== # 获取交易日历(每月第一个交易日) rebalance_dates = get_trading_calendar(START_DATE, END_DATE) # 初始资金 cash = 1_000_000 holdings = {} # {code: shares} # 获取沪深300基准数据 bf_benchmark = get_historical_data("sh.000300", START_DATE, END_DATE) bf_benchmark['close'] = pd.to_numeric(bf_benchmark['close']) bf_benchmark.set_index('date', inplace=True) initial_benchmark_value = float(bf_benchmark.loc[rebalance_dates[0]]['close']) current_benchmark_value = initial_benchmark_value print(f"开始回测:{START_DATE} 至 {END_DATE}") for i, rebalance_date in enumerate(rebalance_dates): print(f"\n🔄 调仓日:{rebalance_date}") # 更新基准 try: current_benchmark_value = float(bf_benchmark.loc[rebalance_date]['close']) benchmark_values.append(current_benchmark_value / initial_benchmark_value) except: if benchmark_values: benchmark_values.append(benchmark_values[-1]) else: benchmark_values.append(1.0) # 下一个交易日开盘买入 next_day = (datetime.strptime(rebalance_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d") # 获取当前全市场股票 stock_codes = get_all_stock_codes(rebalance_date) factor_data = [] for code in stock_codes: # 清洗条件1:剔除ST/*ST stock_info = bs.query_stock_basic(code=code) if stock_info.error_code == '0' and stock_info.get_row_data(): name = stock_info.get_row_data()[2] if 'ST' in name: continue # 清洗条件2:剔除次新股 if is_new_stock(code, rebalance_date): continue # 清洗条件3:检查是否停牌(当日是否有成交量?简单判断) price_df = get_historical_data(code, rebalance_date, next_day, "d") if price_df is None or len(price_df) == 0: continue if float(price_df.iloc[0]['volume']) == 0: continue open_price = float(price_df.iloc[0]['open']) # 获取基本面因子 fundamental = get_fundamental_data(code, rebalance_date) if not fundamental: continue pb = fundamental['pb'] roe = fundamental['roe'] # 动量因子:过去60日收益率 hist_prices = get_historical_data(code, (datetime.strptime(rebalance_date, "%Y-%m-%d") - timedelta(days=70)).strftime("%Y-%m-%d"), rebalance_date, "d") if hist_prices is None or len(hist_prices) < 20: continue momentum = (float(hist_prices.iloc[-1]['close']) - float(hist_prices.iloc[0]['close'])) / float(hist_prices.iloc[0]['close']) factor_data.append({ 'code': code, 'open_price': open_price, 'pb': pb, 'roe': roe, 'momentum': momentum }) # 构造成DataFrame factor_df = pd.DataFrame(factor_data) if len(factor_df) == 0: print("⚠️ 当前周期无有效股票,跳过...") portfolio_values.append(cash / 1_000_000) dates_recorded.append(rebalance_date) continue # 因子处理:去极值 + Z-Score标准化 for col, weight in zip(['pb', 'roe', 'momentum'], WEIGHTS): if col not in factor_df.columns or factor_df[col].isna().all(): continue # 缩尾 factor_df[col] = winsorize_series(factor_df[col]) # Z-Score factor_df[col] = zscore_normalize(factor_df[col]) # 注意:PB 是负向因子,需反转符号以匹配“越大越好” factor_df['pb'] *= -1 # 综合得分 factor_df['score'] = ( WEIGHTS[0] * factor_df['pb'] + WEIGHTS[1] * factor_df['roe'] + WEIGHTS[2] * factor_df['momentum'] ) # 排序并选择前10% factor_df.sort_values(by='score', ascending=False, inplace=True) top_n = max(1, int(len(factor_df) * TOP_N_PERCENT)) selected_stocks = factor_df.head(top_n).copy() # ======================== # 5. 交易执行(T+1日开盘) # ======================== total_value_before = cash + sum(holding_shares * holdings[stock_code] for stock_code, holding_shares in holdings.items() if stock_code in price_df['code'].values) # 先卖出不在目标清单中的持仓 sell_value = 0 to_remove = [] for code in holdings: if code not in selected_stocks['code'].values: # 查找对应价格 pos = price_df[price_df['code'] == code] if len(pos) > 0: price = float(pos.iloc[0]['open']) sell_value += price * holdings[code] del holdings[code] to_remove.append(code) # 扣除交易费 sell_cost = sell_value * TRANSACTION_COST cash += sell_value - sell_cost # 计算每只股票的投资金额(等权) num_stocks = len(selected_stocks) if num_stocks == 0: portfolio_values.append(cash / 1_000_000) dates_recorded.append(rebalance_date) continue invest_per_stock = cash / num_stocks # 买入目标股票 for _, row in selected_stocks.iterrows(): code = row['code'] price = row['open_price'] shares = invest_per_stock / price # 扣除买入手续费 buy_cost = invest_per_stock * TRANSACTION_COST cash -= (invest_per_stock + buy_cost) holdings[code] = holdings.get(code, 0) + shares # 更新总资产 total_value_after = cash + sum( float(get_historical_data(code, next_day, next_day)['open']) * shares for code, shares in holdings.items() if get_historical_data(code, next_day, next_day) is not None ) portfolio_values.append(total_value_after / 1_000_000) dates_recorded.append(rebalance_date) print(f"📈 持仓更新完成,持有 {len(holdings)} 只股票") # ======================== # 6. 绩效评估 # ======================== dates_recorded = pd.to_datetime(dates_recorded) df_result = pd.DataFrame({ 'strategy': portfolio_values, 'benchmark': benchmark_values }, index=dates_recorded) # 补充缺失日期的净值(前向填充) all_days = pd.date_range(start=START_DATE, end=END_DATE, freq='D') df_full = df_result.reindex(all_days, method='ffill') # 年化收益率 ret_s = df_full['strategy'].iloc[-1] / df_full['strategy'].iloc[0] - 1 ann_ret_s = (df_full['strategy'].iloc[-1] / df_full['strategy'].iloc[0]) ** (252 / len(df_full)) - 1 ret_b = df_full['benchmark'].iloc[-1] / df_full['benchmark'].iloc[0] - 1 ann_ret_b = (df_full['benchmark'].iloc[-1] / df_full['benchmark'].iloc[0]) ** (252 / len(df_full)) - 1 # 波动率与夏普比率(无风险利率设为0) returns_s = df_full['strategy'].pct_change().dropna() volatility_s = returns_s.std() * np.sqrt(252) sharpe_ratio = ann_ret_s / volatility_s if volatility_s != 0 else 0 # 最大回撤 cummax = df_full['strategy'].cummax() max_drawdown = (cummax - df_full['strategy']).max() / cummax.max() # 信息比率(超额收益波动率) excess_returns = returns_s - df_full['benchmark'].pct_change().dropna() info_ratio = excess_returns.mean() * 252 / (excess_returns.std() * np.sqrt(252)) # 输出结果 print("\n" + "="*50) print("📊 策略绩效报告") print("="*50) print(f"回测周期: {START_DATE} ~ {END_DATE}") print(f"年化收益率: {ann_ret_s:.2%}") print(f"基准年化收益: {ann_ret_b:.2%}") print(f"夏普比率: {sharpe_ratio:.2f}") print(f"最大回撤: {max_drawdown:.2%}") print(f"信息比率: {info_ratio:.2f}") print(f"最终净值: {df_full['strategy'].iloc[-1]:.2f}") # ======================== # 7. 可视化 # ======================== plt.figure(figsize=(12, 6)) plt.plot(df_full.index, df_full['strategy'], label='多因子策略', linewidth=2) plt.plot(df_full.index, df_full['benchmark'], label='沪深300', linestyle='--', alpha=0.8) plt.title('多因子选股策略 vs 沪深300') plt.xlabel('时间') plt.ylabel('净值') plt.legend() plt.grid(True, alpha=0.3) plt.tight_layout() plt.show() # ======================== # 8. 登出 Baostock # ======================== bs.logout() 对上面代码进行纠错
最新发布
12-02
import os import re import sys import tkinter as tk from tkinter import ttk, Frame, Scrollbar, Canvas from tkinter import scrolledtext, messagebox import pandas as pd import numpy as np from glob import glob import seaborn as sns from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk import matplotlib.pyplot as plt from matplotlib.figure import Figure import matplotlib as mpl import matplotlib.font_manager as fm from datetime import datetime def configure_chinese_font(): # 尝试寻找可用中文字体 chinese_fonts = [] chinese_fonts = [ "SimHei", "Microsoft YaHei", "SimSun", "KaiTi", "FangSong", "Arial Unicode MS", ] # 检查系统中有哪些字体可用 available_fonts = [f.name for f in fm.fontManager.ttflist] selected_font = None # 寻找第一个可用的中文字体 for font in chinese_fonts: if font in available_fonts: selected_font = font break # 如果找不到任何中文字体,使用默认字体并警告 if selected_font is None: print(f"警告: 系统中未找到以下中文: {', '.join(chinese_fonts)}") print("将使用系统默认字体,中文可能无法正确显示") selected_font = fm.FontProperties().get_name() # 设置matplotlib使用选定的字体 plt.rcParams["font.family"] = selected_font plt.rcParams["axes.unicode_minus"] = False print(f"已选择字体: {selected_font} 用于中文显示") def find_batch_folders(path, batch_number): target_dir = "4WCSVLog" batch_folders = [] for root, dirs, files in os.walk(path): path_components = os.path.normpath(root).split(os.sep) if target_dir in path_components and batch_number in dirs: full_path = os.path.join(root, batch_number) batch_folders.append(full_path) return batch_folders def get_csv_files_path1(folder): all_files = glob(os.path.join(folder, "*-SVP1B*.csv")) return [f for f in all_files if "HEAD" not in f] def get_csv_files_path2(folder): return glob(os.path.join(folder, "*-SVP1B*.csv")) def parse_svp1b(filename): match = re.search(r"svp1b([0-9A-Z])([0-9])([0-9])", filename, re.IGNORECASE) if match: piece, line, file_num = match.groups() piece_num = ( 10 + ord(piece.upper()) - ord("A") if piece.isalpha() else int(piece) ) return piece_num, int(line), int(file_num) return None def read_csv_file(file, start_row=0, start_col=0): df = pd.read_csv(file, header=None, skiprows=start_row) return df.iloc[:, start_col:] if start_col > 0 else df def merge_files(files, path_type): piece_dict = {} for file in files: svp_info = parse_svp1b(os.path.basename(file).lower()) if not svp_info: continue piece, line, file_num = svp_info key = (piece, line) df = read_csv_file( file, start_row=(2 if path_type == 1 else 4), start_col=(0 if path_type == 1 else 18), ) if df.empty: continue if key not in piece_dict: piece_dict[key] = [] piece_dict[key].append((file_num, df)) sorted_keys = sorted(piece_dict.keys(), key=lambda x: (x[0], x[1])) global_col_counter = 1 final_dfs = [] for key in sorted_keys: file_dfs = sorted(piece_dict[key], key=lambda x: x[0]) base_df = None for file_num, df in file_dfs: if base_df is None: global_col_counter = 1 new_columns = [] for _ in df.columns: new_columns.append(f"net{global_col_counter}") global_col_counter += 1 df.columns = new_columns base_df = df else: new_columns = [] for _ in df.columns: new_columns.append(f"net{global_col_counter}") global_col_counter += 1 df.columns = new_columns base_df = pd.concat([base_df, df], axis=1) # 横向拼接 if base_df is not None: final_dfs.append(base_df) return ( pd.concat(final_dfs, axis=0, ignore_index=True) if final_dfs else pd.DataFrame() ) def collect_batch_data(batch_list): batch_data_dict = {} path1 = r"\\10.127.1.248\c1tst\7750" path2 = r"\\10.127.1.248\c1tst\7755" for idx, batch_number in enumerate(batch_list, 1): print(f"=== 处理第 {idx}/{len(batch_list)} 个批次:{batch_number} ===") batch_data = pd.DataFrame() folders1 = find_batch_folders(path1, batch_number) if folders1: for folder1 in folders1: files = get_csv_files_path1(folder1) if files: current_data = merge_files(files, path_type=1) batch_data = pd.concat( [batch_data, current_data], axis=0, ignore_index=True ) print(f"从{folder1}找到 {len(files)} 个数据文件") else: folders2 = find_batch_folders(path2, batch_number) if folders2: for folder2 in folders2: files = get_csv_files_path2(folder2) if files: current_data = merge_files(files, path_type=2) batch_data = pd.concat( [batch_data, current_data], axis=0, ignore_index=True ) print(f"从{folder2}找到 {len(files)} 个数据文件") else: print(f"批次 {batch_number}:未找到对应文件夹,跳过\n") continue if not batch_data.empty: batch_data_dict[batch_number] = batch_data print(f"批次 {batch_number} 数据合并完成,共 {len(batch_data)} 行\n") else: print(f"批次 {batch_number}:无有效数据,跳过\n") return batch_data_dict class RedirectText: def __init__(self, text_widget): self.text_widget = text_widget def write(self, string): self.text_widget.insert(tk.END, string) self.text_widget.see(tk.END) def flush(self): pass class NetPlotViewer: """高级NET图表查看器,支持分页和无限滚动""" def __init__(self, root, batch_data_dict, batch_list, net_list): self.root = root self.batch_data_dict = batch_data_dict self.batch_list = batch_list self.net_list = net_list # 预先计算并缓存所有NET的数据 self.net_data_dict = self._precompute_net_data() # 记录无效的NET self.invalid_nets = [net for net in net_list if net not in self.net_data_dict] # 打印无效NET警告 if self.invalid_nets: print(f"警告: 以下NET在批次数据中不存在: {', '.join(self.invalid_nets)}") # 只保留有效的NET self.valid_nets = [net for net in net_list if net in self.net_data_dict] # 分页配置 self.per_page = 10 self.current_page = 0 self.total_pages = max( 1, (len(self.valid_nets) + self.per_page - 1) // self.per_page ) # 无限滚动配置 self.loaded_count = 0 self.load_step = 10 self.loading = False self.view_mode = "pagination" # 或 "infinite_scroll" # 创建主窗口 self.create_main_window() # 居中显示窗口 self.center_window() def center_window(self): """使窗口在屏幕上居中显示""" self.plot_window.update_idletasks() # 确保窗口尺寸已更新 width = self.plot_window.winfo_width() height = self.plot_window.winfo_height() screen_width = self.plot_window.winfo_screenwidth() screen_height = self.plot_window.winfo_screenheight() x = (screen_width - width) // 2 y = (screen_height - height) // 2 # 设置窗口位置居中 self.plot_window.geometry(f"+{x}+{y}") def _precompute_net_data(self): """预先计算并缓存所有NET的数据""" net_data_dict = {} for net in self.net_list: net_data = pd.DataFrame() for batch in self.batch_list: if ( batch in self.batch_data_dict and net in self.batch_data_dict[batch].columns ): net_data[f"{batch}"] = self.batch_data_dict[batch][net] # 只添加有数据的NET if not net_data.empty: net_data_dict[net] = net_data return net_data_dict def create_main_window(self): """创建主窗口和控件""" self.plot_window = tk.Toplevel(self.root) self.plot_window.title("批次数据分布箱线图") self.plot_window.geometry("1200x800") # 添加无效NET警告 if self.invalid_nets: warning_frame = tk.Frame( self.plot_window, bg="#ffebee", bd=1, relief=tk.SOLID ) warning_frame.pack(fill=tk.X, padx=10, pady=5) warning_label = tk.Label( warning_frame, text=f"警告: 以下NET在批次数据中不存在: {', '.join(self.invalid_nets)}", fg="#d32f2f", bg="#ffebee", font=("SimHei", 10, "bold"), ) warning_label.pack(padx=10, pady=5) # 添加有效NET数量信息 # info_frame = tk.Frame(self.plot_window, bg="#e3f2fd", bd=1, relief=tk.SOLID) # info_frame.pack(fill=tk.X, padx=10, pady=5) # info_label = tk.Label( # info_frame, # text=f"显示 {len(self.valid_nets)} 个有效的NET中的图表", # fg="#1565c0", # bg="#e3f2fd", # font=("SimHei", 10), # ) # info_label.pack(padx=10, pady=5) # 创建顶部控制面板 self.create_control_panel() # 创建内容区域 self.create_content_area() # 初始显示 self.update_display() self.plot_window.update_idletasks() self.center_window() def create_control_panel(self): """创建顶部控制面板""" control_frame = tk.Frame( self.plot_window, bd=1, relief=tk.RIDGE, padx=10, pady=10 ) control_frame.pack(fill=tk.X, padx=10, pady=(10, 5)) # 视图模式选择 tk.Label(control_frame, text="视图模式:", font=("SimHei", 10)).pack( side=tk.LEFT, padx=5 ) self.view_mode_var = tk.StringVar( value="分页视图" if self.view_mode == "pagination" else "无限滚动" ) view_mode_menu = ttk.Combobox( control_frame, textvariable=self.view_mode_var, values=["分页视图", "无限滚动"], width=12, state="readonly", ) view_mode_menu.pack(side=tk.LEFT, padx=5) view_mode_menu.bind("<<ComboboxSelected>>", self.change_view_mode) # 分页控制 self.pagination_frame = tk.Frame(control_frame) self.pagination_frame.pack(side=tk.LEFT, padx=20) # 分页控件 self.prev_btn = tk.Button( self.pagination_frame, text="上一页", command=lambda: self.change_page(-1), state=tk.DISABLED, ) self.prev_btn.pack(side=tk.LEFT, padx=5) self.page_label = tk.Label( self.pagination_frame, text="1/1", font=("SimHei", 10) ) self.page_label.pack(side=tk.LEFT, padx=5) self.next_btn = tk.Button( self.pagination_frame, text="下一页", command=lambda: self.change_page(1), state=tk.NORMAL if self.total_pages > 1 else tk.DISABLED, ) self.next_btn.pack(side=tk.LEFT, padx=5) # 跳转控件 tk.Label(self.pagination_frame, text="跳转到:", font=("SimHei", 9)).pack( side=tk.LEFT, padx=(20, 0) ) self.page_entry = tk.Entry(self.pagination_frame, width=4, font=("SimHei", 9)) self.page_entry.pack(side=tk.LEFT, padx=2) self.page_entry.insert(0, "1") self.goto_btn = tk.Button( self.pagination_frame, text="跳转", command=self.goto_page, font=("SimHei", 9), ) self.goto_btn.pack(side=tk.LEFT, padx=2) # 每页数量选择 tk.Label(self.pagination_frame, text="每页数量:", font=("SimHei", 9)).pack( side=tk.LEFT, padx=(20, 0) ) self.per_page_var = tk.StringVar(value=str(self.per_page)) self.per_page_dropdown = ttk.Combobox( self.pagination_frame, textvariable=self.per_page_var, values=["1", "5", "10", "20", "50"], width=4, state="readonly", ) self.per_page_dropdown.pack(side=tk.LEFT, padx=2) self.per_page_dropdown.bind("<<ComboboxSelected>>", self.change_per_page) # 关闭按钮 close_btn = tk.Button( control_frame, text="关闭窗口", command=self.plot_window.destroy, bg="#f44336", fg="white", font=("SimHei", 10), ) close_btn.pack(side=tk.RIGHT, padx=10) # 状态标签 self.status_label = tk.Label( control_frame, text="", font=("SimHei", 9), fg="#666" ) self.status_label.pack(side=tk.RIGHT, padx=10) def create_content_area(self): """创建内容显示区域""" # 创建滚动框架 self.scroll_frame = tk.Frame(self.plot_window) self.scroll_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=(0, 10)) # 添加水平和垂直滚动条 self.hscrollbar = tk.Scrollbar(self.scroll_frame, orient=tk.HORIZONTAL) self.vscrollbar = tk.Scrollbar(self.scroll_frame, orient=tk.VERTICAL) # 创建画布用于滚动 self.canvas = tk.Canvas( self.scroll_frame, yscrollcommand=self.vscrollbar.set, xscrollcommand=self.hscrollbar.set, ) # 配置滚动条 self.vscrollbar.config(command=self.canvas.yview) self.hscrollbar.config(command=self.canvas.xview) # 布局 self.hscrollbar.pack(side=tk.BOTTOM, fill=tk.X) self.vscrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 创建内容框架 self.content_frame = tk.Frame(self.canvas) self.canvas_frame = self.canvas.create_window( (0, 0), window=self.content_frame, anchor="nw" ) # 绑定配置事件 self.content_frame.bind("<Configure>", self.on_frame_configure) self.canvas.bind("<Configure>", self.on_canvas_configure) # 绑定鼠标滚轮事件 self.canvas.bind_all("<MouseWheel>", self.on_mousewheel) def on_frame_configure(self, event=None): """更新滚动区域""" self.canvas.configure(scrollregion=self.canvas.bbox("all")) # 检查是否需要加载更多(无限滚动模式) if self.view_mode == "infinite_scroll": self.check_load_more() # 动态调整列宽 self.content_frame.update_idletasks() self.canvas.itemconfig( self.canvas_frame, width=self.content_frame.winfo_width() ) def on_canvas_configure(self, event): """调整内容框架宽度以适应画布""" self.canvas.itemconfig(self.canvas_frame, width=event.width) def on_mousewheel(self, event): """处理鼠标滚轮事件""" self.canvas.yview_scroll(int(-1 * (event.delta / 120)), "units") # 检查是否需要加载更多(无限滚动模式) if self.view_mode == "infinite_scroll": self.check_load_more() def change_view_mode(self, event=None): """切换视图模式""" new_mode = ( "pagination" if self.view_mode_var.get() == "分页视图" else "infinite_scroll" ) if new_mode != self.view_mode: self.view_mode = new_mode self.update_display() # 更新分页控件的可见性 if self.view_mode == "pagination": self.pagination_frame.pack(side=tk.LEFT, padx=20) else: self.pagination_frame.pack_forget() def change_per_page(self, event=None): """更改每页显示数量""" try: new_per_page = int(self.per_page_var.get()) if new_per_page != self.per_page: self.per_page = new_per_page self.total_pages = max( 1, (len(self.valid_nets) + self.per_page - 1) // self.per_page ) self.current_page = 0 self.update_display() except ValueError: pass def change_page(self, delta): """翻页""" new_page = max(0, min(self.total_pages - 1, self.current_page + delta)) if new_page != self.current_page: self.current_page = new_page self.update_display() def goto_page(self): """跳转到指定页码""" try: page_num = int(self.page_entry.get()) - 1 if 0 <= page_num < self.total_pages: self.current_page = page_num self.update_display() else: messagebox.showerror("错误", f"页码必须在 1 到 {self.total_pages} 之间") except ValueError: messagebox.showerror("错误", "请输入有效的页码数字") def check_load_more(self): """检查是否需要加载更多(无限滚动模式)""" if self.view_mode != "infinite_scroll" or self.loading: return # 获取滚动位置 scroll_position = self.canvas.yview() # 如果滚动到底部附近(90%位置),加载更多 if scroll_position[1] > 0.9 and self.loaded_count < len(self.valid_nets): self.load_more() def load_more(self): """加载更多图表(无限滚动模式)""" if self.loading or self.loaded_count >= len(self.valid_nets): return self.loading = True # 显示加载状态 self.status_label.config( text=f"正在加载数据... ({self.loaded_count}/{len(self.valid_nets)})" ) self.plot_window.update() start_idx = self.loaded_count end_idx = min(self.loaded_count + self.load_step, len(self.valid_nets)) current_nets = self.valid_nets[start_idx:end_idx] # 创建加载动画 loading_label = tk.Label( self.content_frame, text="加载中...", font=("SimHei", 10, "italic"), fg="#666", ) loading_label.grid( row=self.content_frame.grid_size()[1], column=0, sticky="ew", pady=10 ) # 短暂延迟,让用户看到加载提示 self.plot_window.after( 50, lambda: self._create_charts(current_nets, loading_label) ) def _create_charts(self, nets, loading_label): """创建图表并移除加载提示""" # 移除加载提示 loading_label.destroy() # 创建图表 for net in nets: self.create_net_chart(net) self.loaded_count += len(nets) self.loading = False # 更新状态 self.status_label.config( text=f"已加载 {self.loaded_count} 个NET,共 {len(self.valid_nets)} 个" ) # 检查是否全部加载完成 if self.loaded_count >= len(self.valid_nets): self.status_label.config( text=f"全部加载完成,共 {len(self.valid_nets)} 个NET", fg="green" ) def create_net_chart(self, net): """为单个NET创建图表""" net_data = self.net_data_dict.get(net, pd.DataFrame()) if net_data.empty: return # 获取当前内容框架中的行数 row_idx = self.content_frame.grid_size()[1] # 创建图表框架 frame = tk.Frame(self.content_frame, bd=2, relief=tk.GROOVE, padx=10, pady=10) frame.grid(row=row_idx, column=0, sticky="nsew", padx=5, pady=5) # # 添加NET标签 # net_label = tk.Label(frame, text=f" {net}", font=("SimHei", 12, "bold")) # net_label.pack(anchor=tk.W) # # 添加批次信息标签 # batch_info = ", ".join(net_data.columns) # batch_label = tk.Label( # frame, text=f"{batch_info}", font=("SimHei", 9), fg="#666" # ) # batch_label.pack(anchor=tk.W) # 创建图表 fig = Figure(figsize=(10, 4), dpi=100) ax = fig.add_subplot(111) # 准备数据 long_data = net_data.melt(var_name="批號", value_name="测量值") long_data["批次序号"] = long_data["批號"].str.extract(r"(\d+)").astype(int) long_data = long_data.sort_values("批次序号") # 绘制箱线图 sns.boxplot( x="批號", y="测量值", data=long_data, ax=ax, boxprops=dict(facecolor="lightblue", alpha=0.7), medianprops=dict(color="red", linewidth=2), ) # 设置图表样式 ax.set_title(f"{net} ", fontsize=12) ax.set_xlabel("", fontsize=10) ax.set_ylabel("NET值", fontsize=10) ax.tick_params(axis="x", labelsize=8, rotation=30) ax.grid(axis="y", linestyle="--", alpha=0.7) fig.tight_layout() # 标注统计值 all_values = long_data["测量值"].dropna() if not all_values.empty: data_range = all_values.max() - all_values.min() for i, col in enumerate(net_data.columns): col_values = net_data[col].dropna() if col_values.empty: continue q1 = col_values.quantile(0.25) median = col_values.median() q3 = col_values.quantile(0.75) text_offset = data_range * 0.03 ax.text(i, q1 - text_offset, f"Q1: {q1:.2f}", ha="center", fontsize=8) ax.text( i, median, f"Med: {median:.2f}", ha="center", fontsize=8, color="red", ) ax.text(i, q3 + text_offset, f"Q3: {q3:.2f}", ha="center", fontsize=8) # 嵌入图表到Tkinter canvas_plot = FigureCanvasTkAgg(fig, master=frame) canvas_plot.draw() canvas_plot.get_tk_widget().pack(fill=tk.X) # 添加工具栏 toolbar = NavigationToolbar2Tk(canvas_plot, frame) toolbar.update() canvas_plot.get_tk_widget().pack(fill=tk.X) def update_display(self): """根据当前视图模式更新显示""" # 清除当前内容 for widget in self.content_frame.winfo_children(): widget.destroy() # 重置无限滚动计数器 self.loaded_count = 0 if self.view_mode == "pagination": # 分页模式 page_idx = self.current_page start_idx = page_idx * self.per_page end_idx = min((page_idx + 1) * self.per_page, len(self.valid_nets)) current_nets = self.valid_nets[start_idx:end_idx] # 更新标题和页码标签 self.plot_window.title( f"批次数据分布箱线图 ({page_idx + 1}/{self.total_pages})" ) self.page_label.config(text=f"{page_idx + 1}/{self.total_pages}") # 更新按钮状态 self.prev_btn.config(state=tk.NORMAL if page_idx > 0 else tk.DISABLED) self.next_btn.config( state=tk.NORMAL if page_idx < self.total_pages - 1 else tk.DISABLED ) # 创建当前页的图表 for net in current_nets: self.create_net_chart(net) # 更新状态 self.status_label.config( text=f"显示 {start_idx+1}-{end_idx} 个NET,共 {len(self.valid_nets)} 个" ) # 居中窗口 self.center_window() else: # 无限滚动模式 self.plot_window.title("批次数据分布箱线图 (无限滚动模式)") self.status_label.config( text=f"已加载 0 个NET,共 {len(self.valid_nets)} 个" ) # 初始加载第一组图表 self.load_more() # 居中窗口 self.center_window() class BatchDataManager: """批次数据管理器,负责数据收集和缓存""" def __init__(self): self.batch_data_dict = {} self.collected_batches = set() self.current_batch_list = [] def collect_data(self, batch_list): """收集批次数据并缓存""" # 检查哪些批次尚未收集 new_batches = [b for b in batch_list if b not in self.collected_batches] if new_batches: # 只收集新增批次的数据 new_data = collect_batch_data(new_batches) self.batch_data_dict.update(new_data) self.collected_batches.update(new_batches) print(f"已收集 {len(new_batches)} 个新批次的数据") # 更新当前批次列表 self.current_batch_list = batch_list # 打印所有NET信息 self.print_net_info() return self.batch_data_dict def get_net_names(self): """获取当前批次的所有NET名称""" net_names = set() for batch in self.current_batch_list: if batch in self.batch_data_dict: net_names.update(self.batch_data_dict[batch].columns) # 转换为列表并按数字排序 net_list = sorted( net_names, key=lambda x: int(x[3:]) if x.startswith("net") and x[3:].isdigit() else 0, ) return net_list def print_net_info(self): """打印所有批次中的NET信息""" print("\n===== 批次中的NET信息 =====") total_nets = set() # 用于统计所有批次中的唯一NET for batch in self.current_batch_list: if batch in self.batch_data_dict: df = self.batch_data_dict[batch] net_columns = [col for col in df.columns if col.startswith("net")] # 添加NET到总集合 total_nets.update(net_columns) print( f"批次 {batch} 包含 {len(net_columns)} 个NET: {', '.join(net_columns[:10])}{'...' if len(net_columns) > 10 else ''}" ) else: print(f"批次 {batch} 无可用数据") # 打印总NET数量 print(f"批次总共包含 {len(total_nets)} 个不同的NET") print("=========================\n") def get_available_nets(self, batch_list): """获取特定批次中可用的NET""" available_nets = set() for batch in batch_list: if batch in self.batch_data_dict: available_nets.update(self.batch_data_dict[batch].columns) return [net for net in available_nets if net.startswith("net")] def process_batches(): """处理批次数据""" input_text = batch_entry.get().strip() if not input_text: messagebox.showwarning("输入警告", "请输入批次号后再处理!") return batch_list = re.split(r"[,\s]+", input_text) batch_list = [b for b in batch_list if b] if not batch_list: messagebox.showwarning("输入警告", "未识别到有效批次号!") return process_btn.config(state=tk.DISABLED) log_text.delete(1.0, tk.END) print(f"开始处理批次:{', '.join(batch_list)} - {datetime.now()}\n") # 收集批次数据 data_manager.collect_data(batch_list) # 获取所有NET名称 all_net_names = data_manager.get_net_names() net_entry.delete(0, tk.END) if all_net_names: net_entry.insert(0, ", ".join(all_net_names[:5])) # 默认显示前5个NET print(f"批次中包含的NET总数: {len(all_net_names)}") else: print("警告: 批次中未找到任何NET数据") # 启用显示图表按钮 show_plot_btn.config(state=tk.NORMAL) process_btn.config(state=tk.NORMAL) print(f"批次数据收集完成!现在可以输入NET列表并点击'显示图表'按钮 {datetime.now()}") def show_plots(): """显示NET图表""" # 解析NET输入 net_text = net_entry.get().strip() net_list = [] if net_text: raw_nets = re.split(r"[,\s]+", net_text) raw_nets = [n.strip() for n in raw_nets if n.strip()] for net in raw_nets: if net.startswith("net") and net[3:].isdigit(): net_list.append(net) elif net.isdigit(): net_list.append(f"net{net}") else: print(f"警告:无效NET格式 '{net}',已忽略") net_list = list(dict.fromkeys(net_list)) net_list.sort(key=lambda x: int(x[3:])) print(f"已解析NET列表(排序后):{', '.join(net_list)}") else: # 默认显示前20个NET net_list = [f"net{i}" for i in range(1, 21)] print("未指定NET编号,将显示前20个NET") if not net_list: messagebox.showwarning("输入警告", "未识别到有效的NET编号!") return # 检查NET是否在批次中 available_nets = data_manager.get_available_nets(data_manager.current_batch_list) invalid_nets = [net for net in net_list if net not in available_nets] if invalid_nets: print(f"警告: 以下NET在批次数据中不存在: {', '.join(invalid_nets)}") # 只保留有效的NET valid_nets = [net for net in net_list if net in available_nets] if not valid_nets: messagebox.showwarning("输入警告", "没有有效的NET可显示!") return print(f"将显示 {len(valid_nets)} 个有效的NET") # 创建NET图表查看器 NetPlotViewer( root, data_manager.batch_data_dict, data_manager.current_batch_list, valid_nets ) def create_gui(): global root, batch_entry, log_text, process_btn, net_entry, show_plot_btn, data_manager # 初始化数据管理器 data_manager = BatchDataManager() root = tk.Tk() root.title("電測阻值箱線圖") root.geometry("800x650") root.resizable(True, True) # 批次号输入区域 input_frame = tk.Frame(root, padx=10, pady=10) input_frame.pack(fill=tk.X) tk.Label( input_frame, text="批次号输入(多个批次用逗号/空格分隔):", font=("SimHei", 10) ).pack(side=tk.LEFT) batch_entry = tk.Entry(input_frame, width=50, font=("SimHei", 10)) batch_entry.pack(side=tk.LEFT, padx=5) batch_entry.insert(0, "25AL90070800,25AL90070400") # NET编号输入 net_frame = tk.Frame(root, padx=10, pady=5) net_frame.pack(fill=tk.X) tk.Label( net_frame, text="NET编号输入(多个用逗号/空格分隔):", font=("SimHei", 10), ).pack(side=tk.LEFT) net_entry = tk.Entry(net_frame, width=50, font=("SimHei", 10)) net_entry.pack(side=tk.LEFT, padx=5) net_entry.insert(0, "1,2,3,4,5") # 按钮 btn_frame = tk.Frame(root, padx=10, pady=5) btn_frame.pack(fill=tk.X) process_btn = tk.Button( btn_frame, text="收集批次数据", command=process_batches, bg="#4CAF50", fg="white", font=("SimHei", 10), ) process_btn.pack(side=tk.LEFT, padx=5) show_plot_btn = tk.Button( btn_frame, text="显示图表", command=show_plots, bg="#2196F3", fg="white", font=("SimHei", 10), state=tk.DISABLED, # 初始禁用 ) show_plot_btn.pack(side=tk.LEFT, padx=5) clear_btn = tk.Button( btn_frame, text="清空日志", command=lambda: log_text.delete(1.0, tk.END), bg="#f44336", fg="white", font=("SimHei", 10), ) clear_btn.pack(side=tk.LEFT, padx=5) # 日志 log_frame = tk.Frame(root, padx=10, pady=5) log_frame.pack(fill=tk.BOTH, expand=True) tk.Label(log_frame, text="处理日志:", font=("SimHei", 10)).pack(anchor=tk.W) log_text = scrolledtext.ScrolledText(log_frame, wrap=tk.WORD, font=("SimHei", 9)) log_text.pack(fill=tk.BOTH, expand=True) sys.stdout = RedirectText(log_text) root.mainloop() if __name__ == "__main__": configure_chinese_font() create_gui() 我現在想要將net畫圖的界面居中,現有程式的居中沒有起效。其外,我想要單個net可以水平移動
11-29
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值