.bin 文件用excel文件打开_如何用最少的python代码合并多个Excel文件

假如我们有三个打印设备分配订单的Excel 文件,见下图

b197826fc514d812c70c0aa8e75fb97e.png

打印机设备维修订单

现在要汇总一下所有地区的订单。

以下操作都认为你已经安装了python 3 的基础工具

第1步 确认和安装广联的扩展包

a. 确认是否安装了xlrd ,没有的话安装命令pip install xlrd安装。

b. 确认是否安装了xlrd ,安装命令是 pip install xlrd

c. 确认是否安装了pandas ,安装命令是 pip install pandas

第2步 打开python文件 录入以下代码

import os

import pandas as pd

excels = [pd.read_excel(fname) for fname in ["repairDevice1.xlsx","repairDevice2.xlsx","repairDevice3.xlsx"]]

df = pd.concat(excels)

df.to_excel('allDeviceOrder.xlsx', index=False)

这个时候在目录下面就生成了一个allDeviceOrder.xlsx的Excel文件,这个是合并完的文件,自动去除标题

ca433f2c68a030dbfd8f421c1d776c19.png

合并后的excel

如果你有100个excel 那要如何处理呢?别急这里只要把这段代码

excels = [pd.read_excel(fname) for fname in ["repairDevice1.xlsx","repairDevice2.xlsx","repairDevice3.xlsx"]]

调整为excels = [pd.read_excel(fname) for fname fname in os.listdir('./') if 'xlsx' in fname]即可

# 使用此指令前,请确保安装必要的Python库,例如使用以下命令安装: # pip install pandas openpyxl import pandas as pd import os from collections import defaultdict import openpyxl from openpyxl.styles import PatternFill, Font, Alignment, Border, Side from openpyxl.utils import get_column_letter from typing import * try: from xbot.app.logging import trace as print except: from xbot import print def process_excel_by_batch_with_config(order_file, config_file, output_file): """ title: 按工艺批次整理Excel数据(订单优化版) description: 对表格内数据进行整理,按相同工艺分组,严格控制每批货品数量不超过配置表中的"每批最多数量",确保相同工艺下的相同订单号必须排在一起。输出结果按批次号排序,只对批次汇总行应用颜色标记,使表格更加简洁清晰。 inputs: - order_file (file): 订单数据Excel文件,eg: "订单数据.xlsx" - config_file (file): 工艺配置Excel文件,包含每批最大数量,eg: "工艺配置.xlsx" - output_file (str): 处理后保存的Excel文件路径,eg: "订单数据_批次处理.xlsx" outputs: - output_file (str): 处理后的Excel文件路径,eg: "订单数据_批次处理.xlsx" """ # 检查输入文件是否存在 for file_path in [order_file, config_file]: if not os.path.exists(file_path): raise FileNotFoundError(f"输入文件 '{file_path}' 不存在") # 检查输出路径是否有效 output_dir = os.path.dirname(output_file) if output_dir and not os.path.exists(output_dir): raise FileNotFoundError(f"输出目录 '{output_dir}' 不存在") # 读取订单Excel文件 try: df_orders = pd.read_excel(order_file) except Exception as e: raise ValueError(f"读取订单Excel文件失败: {str(e)}") # 读取配置Excel文件 try: df_config = pd.read_excel(config_file) except Exception as e: raise ValueError(f"读取配置Excel文件失败: {str(e)}") # 检查订单文件中的工艺列 craft_column_name = None possible_craft_columns = ['工艺', '工艺名称'] for col in possible_craft_columns: if col in df_orders.columns: craft_column_name = col break if craft_column_name is None: raise ValueError(f"订单Excel文件中缺少工艺列,请确保文件包含 '工艺' 或 '工艺名称' 列") # 检查配置文件中的工艺列 config_craft_column = None for col in possible_craft_columns: if col in df_config.columns: config_craft_column = col break if config_craft_column is None: raise ValueError(f"配置Excel文件中缺少工艺列,请确保文件包含 '工艺' 或 '工艺名称' 列") # 检查必要的列是否存在 required_order_columns = [craft_column_name, '货品数量', '物流单编号'] for col in required_order_columns: if col not in df_orders.columns: raise ValueError(f"订单Excel文件中缺少必要的列: '{col}'") required_config_columns = [config_craft_column, '一波最少数量', '每批最多数量'] for col in required_config_columns: if col not in df_config.columns: raise ValueError(f"配置Excel文件中缺少必要的列: '{col}'") # 检查是否有订单编号列 has_order_id = '订单编号' in df_orders.columns if not has_order_id: print("警告: 订单Excel文件中缺少'订单编号'列,将无法按订单号分组") # 创建工艺配置字典 craft_config = {} for _, row in df_config.iterrows(): craft_name = row[config_craft_column] min_qty = row['一波最少数量'] max_qty = row['每批最多数量'] craft_config[craft_name] = {'min': min_qty, 'max': max_qty} # 默认配置,当工艺在配置表中找不到时使用 default_config = {'min': 5, 'max': 30} # 按工艺分组并计算批次 def _calculate_batches(): # 创建一个新的DataFrame用于存储结果 result_df = df_orders.copy() # 检查是否已存在批次列,如果存在则删除 if '批次' in result_df.columns: result_df = result_df.drop(columns=['批次']) # 添加批次列 batch_col_idx = df_orders.columns.get_loc('物流单编号') + 1 result_df.insert(batch_col_idx, '批次', '') # 按工艺分组处理 craft_groups = defaultdict(list) if has_order_id: # 首先按工艺和订单号分组 craft_order_groups = defaultdict(lambda: defaultdict(list)) for idx, row in df_orders.iterrows(): craft = row[craft_column_name] order_id = row['订单编号'] qty = row['货品数量'] craft_order_groups[craft][order_id].append((idx, qty)) # 将订单作为整体添加到工艺组 for craft, order_groups in craft_order_groups.items(): for order_id, items in order_groups.items(): indices = [item[0] for item in items] total_qty = sum(item[1] for item in items) craft_groups[craft].append((indices, total_qty, order_id)) else: # 如果没有订单编号列,直接按工艺分组 for idx, row in df_orders.iterrows(): craft = row[craft_column_name] qty = row['货品数量'] craft_groups[craft].append(([idx], qty, None)) # 为每个工艺组分配批次 batch_colors = { 0: 'FFFF00', # 黄色 1: 'FF99CC', # 粉色 2: 'CCFFFF', # 浅蓝色 3: 'CCFFCC', # 浅绿色 4: 'FFCC99', # 橙色 } batch_info = {} # 存储每行的批次信息 {index: (batch_name, color)} batch_summary = {} # 存储每个批次的汇总信息 {batch_name: (total_qty, color, craft)} for craft, items in craft_groups.items(): # 获取该工艺的配置,如果不存在则使用默认配置 # 先尝试完全匹配 config = craft_config.get(craft, None) # 如果完全匹配失败,尝试部分匹配 if config is None: for craft_name, cfg in craft_config.items(): if craft_name in craft or craft in craft_name: config = cfg break # 如果仍然没有匹配,使用默认配置 if config is None: config = default_config min_qty = config['min'] max_qty = config['max'] # 对订单按数量排序,优先处理大订单 sorted_items = sorted(items, key=lambda x: x[1], reverse=True) batch_num = 1 batches = [] # 存储所有批次 [(batch_indices, batch_qty)] current_batch_indices = [] current_batch_qty = 0 for order_indices, order_qty, order_id in sorted_items: # 如果当前订单的数量已经超过最大批次数量,单独作为一个批次 if order_qty > max_qty: batches.append((order_indices, order_qty)) continue # 尝试将订单添加到现有批次中 added_to_existing = False # 尝试填充现有批次 for i, (batch_indices, batch_qty) in enumerate(batches): if batch_qty + order_qty <= max_qty: # 更新批次 batches[i] = (batch_indices + order_indices, batch_qty + order_qty) added_to_existing = True break if not added_to_existing: # 如果添加当前订单会超过最大批次数量,先完成当前批次 if current_batch_qty + order_qty > max_qty and current_batch_indices: batches.append((current_batch_indices, current_batch_qty)) current_batch_indices = order_indices current_batch_qty = order_qty else: # 添加到当前批次 current_batch_indices.extend(order_indices) current_batch_qty += order_qty # 处理最后一个批次 if current_batch_indices: batches.append((current_batch_indices, current_batch_qty)) # 分配批次号和颜色 for i, (batch_indices, batch_qty) in enumerate(batches): if batch_qty >= min_qty: # 只处理达到最小数量的批次 color_idx = i % len(batch_colors) color = batch_colors[color_idx] batch_name = f"{craft}-第{batch_num}批" for idx in batch_indices: batch_info[idx] = (batch_name, color) # 记录批次汇总信息 batch_summary[batch_name] = (batch_qty, color, craft) batch_num += 1 else: # 对于不满足最小数量的批次,尝试合并到其他批次 # 如果没有其他批次或无法合并,仍然创建一个新批次 if batches and i > 0: # 尝试合并到前一个批次 prev_batch_indices, prev_batch_qty = batches[i-1] if prev_batch_qty + batch_qty <= max_qty: # 可以合并 color_idx = (i-1) % len(batch_colors) color = batch_colors[color_idx] batch_name = f"{craft}-第{batch_num-1}批" for idx in batch_indices: batch_info[idx] = (batch_name, color) # 更新批次汇总信息 batch_summary[batch_name] = (prev_batch_qty + batch_qty, color, craft) else: # 无法合并,创建新批次 color_idx = i % len(batch_colors) color = batch_colors[color_idx] batch_name = f"{craft}-第{batch_num}批" for idx in batch_indices: batch_info[idx] = (batch_name, color) # 记录批次汇总信息 batch_summary[batch_name] = (batch_qty, color, craft) batch_num += 1 else: # 没有其他批次,创建新批次 color_idx = i % len(batch_colors) color = batch_colors[color_idx] batch_name = f"{craft}-第{batch_num}批" for idx in batch_indices: batch_info[idx] = (batch_name, color) # 记录批次汇总信息 batch_summary[batch_name] = (batch_qty, color, craft) batch_num += 1 return result_df, batch_info, batch_summary # 计算批次 result_df, batch_info, batch_summary = _calculate_batches() # 将批次信息填入DataFrame for idx, (batch_name, _) in batch_info.items(): result_df.loc[idx, '批次'] = batch_name # 按批次排序,确保同一批次的订单排在一起 # 如果有订单编号列,则按批次、订单编号排序,确保相同订单号排在一起 if has_order_id: result_df = result_df.sort_values(by=['批次', '订单编号', craft_column_name]) else: result_df = result_df.sort_values(by=['批次', craft_column_name]) # 保存到Excel文件,并设置不同批次的颜色和汇总行 try: # 先将DataFrame保存到Excel result_df.to_excel(output_file, index=False) # 然后使用openpyxl添加汇总行和设置样式 workbook = openpyxl.load_workbook(output_file) worksheet = workbook.active # 获取列数 num_cols = len(result_df.columns) # 获取批次列的索引 batch_col_idx = result_df.columns.get_loc('批次') + 1 # Excel列从1开始 qty_col_idx = result_df.columns.get_loc('货品数量') + 1 # Excel列从1开始 craft_col_idx = result_df.columns.get_loc(craft_column_name) + 1 # Excel列从1开始 # 获取物流单编号和订单编号列的索引(如果存在) logistics_col_idx = result_df.columns.get_loc('物流单编号') + 1 if '物流单编号' in result_df.columns else None order_col_idx = result_df.columns.get_loc('订单编号') + 1 if has_order_id else None # 检查是否有商家编码列 merchant_code_col_idx = None possible_merchant_code_columns = ['商家编码', '商品编码', '货号'] for col in possible_merchant_code_columns: if col in result_df.columns: merchant_code_col_idx = result_df.columns.get_loc(col) + 1 break # 设置单元格样式 - 只用于汇总行 def _set_summary_cell_style(cell, color): cell.fill = PatternFill(start_color=color, end_color=color, fill_type="solid") cell.font = Font(bold=True) cell.alignment = Alignment(horizontal='center', vertical='center') thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) cell.border = thin_border # 设置普通单元格样式 - 只添加边框,不添加颜色 def _set_normal_cell_style(cell): cell.alignment = Alignment(vertical='center', wrap_text=True) thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) cell.border = thin_border # 设置标题行样式 header_fill = PatternFill(start_color="D9D9D9", end_color="D9D9D9", fill_type="solid") header_font = Font(bold=True) header_alignment = Alignment(horizontal='center', vertical='center') header_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) for col_idx in range(1, num_cols + 1): cell = worksheet.cell(row=1, column=col_idx) cell.fill = header_fill cell.font = header_font cell.alignment = header_alignment cell.border = header_border # 为所有数据行添加边框 for row_idx in range(2, worksheet.max_row + 1): for col_idx in range(1, num_cols + 1): cell = worksheet.cell(row=row_idx, column=col_idx) _set_normal_cell_style(cell) # 找出每个批次的最后一行 batch_last_rows = {} current_batch = None current_batch_last_row = None for row_idx in range(2, worksheet.max_row + 1): batch_cell = worksheet.cell(row=row_idx, column=batch_col_idx) if batch_cell.value: if batch_cell.value != current_batch: if current_batch: batch_last_rows[current_batch] = current_batch_last_row current_batch = batch_cell.value current_batch_last_row = row_idx # 添加最后一个批次 if current_batch: batch_last_rows[current_batch] = current_batch_last_row # 按批次添加汇总行 # 记录已插入的行数,用于调整后续行的索引 inserted_rows = 0 # 按批次顺序处理 for batch_name, last_row in sorted(batch_last_rows.items()): if batch_name in batch_summary: batch_qty, color, craft = batch_summary[batch_name] # 在批次的最后一行后面插入汇总行 adjusted_last_row = last_row + inserted_rows worksheet.insert_rows(adjusted_last_row + 1) inserted_rows += 1 # 设置汇总行 summary_row = adjusted_last_row + 1 # 设置工艺列 craft_summary_cell = worksheet.cell(row=summary_row, column=craft_col_idx) craft_summary_cell.value = craft _set_summary_cell_style(craft_summary_cell, color) # 设置批次列 batch_summary_cell = worksheet.cell(row=summary_row, column=batch_col_idx) batch_summary_cell.value = f"{batch_name}汇总" _set_summary_cell_style(batch_summary_cell, color) # 设置数量列 qty_summary_cell = worksheet.cell(row=summary_row, column=qty_col_idx) qty_summary_cell.value = batch_qty _set_summary_cell_style(qty_summary_cell, color) # 设置其他列 for col_idx in range(1, num_cols + 1): if col_idx != batch_col_idx and col_idx != qty_col_idx and col_idx != craft_col_idx: cell = worksheet.cell(row=summary_row, column=col_idx) _set_summary_cell_style(cell, color) # 自动调整列宽 column_widths = {} # 首先计算每列的最大宽度 for col_idx in range(1, num_cols + 1): column = get_column_letter(col_idx) max_length = 0 for row_idx in range(1, worksheet.max_row + 1): cell = worksheet.cell(row=row_idx, column=col_idx) if cell.value: cell_length = len(str(cell.value)) if cell_length > max_length: max_length = cell_length # 设置列宽,但对特定列进行特殊处理 if col_idx == logistics_col_idx or col_idx == order_col_idx or col_idx == merchant_code_col_idx: # 物流单号、订单编号和商家编码列设置更宽一些,确保完整显示 adjusted_width = max(max_length + 2, 20) else: # 其他列根据内容自动调整,但设置最小和最大宽度 adjusted_width = min(max(max_length + 2, 10), 30) worksheet.column_dimensions[column].width = adjusted_width # 设置物流单号、订单编号和商家编码列的数字格式为文本格式,确保完整显示 text_format_columns = [logistics_col_idx, order_col_idx, merchant_code_col_idx] for col_idx in text_format_columns: if col_idx: for row_idx in range(2, worksheet.max_row + 1): cell = worksheet.cell(row=row_idx, column=col_idx) if cell.value: cell.number_format = '@' # 设置为文本格式 # 冻结首行 worksheet.freeze_panes = 'A2' # 保存修改后的Excel文件 workbook.save(output_file) except Exception as e: raise ValueError(f"保存Excel文件失败: {str(e)}") return output_file
09-07
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Towin_robot.py 多股票量化策略总报告生成器(修复版) 功能:解决PDF黑色块问题+自动识别表头+内存流传图+同目录字体加载+PDF防乱码 """ import tushare as ts # 交易日历 from sklearn.linear_model import LogisticRegression from sklearn.metrics import roc_auc_score import joblib, pickle, datetime as dt import sys import os import re import time import warnings from io import BytesIO from datetime import datetime import numpy as np import pandas as pd import matplotlib.pyplot as plt # ReportLab相关(中文字体支持核心) from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.lib import colors from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image from reportlab.lib.utils import ImageReader from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # PDF合并 from PyPDF2 import PdfMerger # 屏蔽警告 warnings.filterwarnings("ignore") # ------------------------------------------------- # 核心修复:中文字体初始化(适配脚本同目录下的simhei.ttf) # ------------------------------------------------- def init_chinese_font(): """初始化中文字体(优先加载脚本同目录下的simhei.ttf)""" font_name = "SimHei" script_dir = os.path.dirname(os.path.abspath(__file__)) font_path = os.path.join(script_dir, "simhei.ttf") if not os.path.exists(font_path): print(f"❌ 未找到字体文件:{font_path}") print(f" 请将simhei.ttf字体文件放在与脚本同一目录下(当前目录:{script_dir})") sys.exit(1) try: pdfmetrics.registerFont(TTFont(font_name, font_path)) print(f"✅ 成功注册中文字体:{font_name}(路径:{font_path})") return font_name except Exception as e: print(f"❌ 字体注册失败:{str(e)}") sys.exit(1) # ------------------------------------------------- # Matplotlib图表中文字体设置(防图表乱码) # ------------------------------------------------- def init_matplotlib_font(font_name): """初始化Matplotlib字体(确保图表中文正常显示)""" plt.rcParams.update({ 'font.sans-serif': [font_name, 'DejaVu Sans'], 'axes.unicode_minus': False, 'figure.dpi': 100, 'savefig.facecolor': 'white' }) print("✅ Matplotlib图表字体初始化完成") # ------------------------------------------------- # 工具函数:证券代码提取+Excel文件获取 # ------------------------------------------------- def extract_stock_code(filename: str) -> str: """从文件名提取6位证券代码,失败则生成唯一哈希标识""" pattern = r'\((\d{6})\)' match = re.search(pattern, filename) if match: return match.group(1) else: import hashlib filename_clean = filename.replace(".xlsx", "").strip() return hashlib.md5(filename_clean.encode()).hexdigest()[:6] def get_all_excel_files(folder_path: str = ".") -> list: """获取当前文件夹下所有有效.xlsx文件(排除临时文件)""" excel_files = [ os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".xlsx") and not f.startswith("~$") ] if not excel_files: print("⚠️ 未在当前文件夹找到Excel文件,程序退出!") sys.exit() return excel_files # ------------------------------------------------- # 1. 数据读取(自动识别表头+清洗) # ------------------------------------------------- def load_data(file_path: str) -> tuple[pd.DataFrame, str, str]: filename = os.path.basename(file_path).replace(".xlsx", "") stock_name = re.sub(r'\(\d{6}\)', '', filename).strip() stock_code = extract_stock_code(filename) print(f"\n🔍 正在处理 {filename},自动探测表头...") header_row = None for test_row in range(5): try: temp_df = pd.read_excel(file_path, header=test_row, nrows=1, engine='openpyxl') temp_cols = [str(col).strip() for col in temp_df.columns] has_date = any("日期" in col for col in temp_cols) has_close = any("收盘" in col for col in temp_cols) if has_date and has_close: header_row = test_row print(f"✅ 自动识别表头行:第{header_row + 1}行(Excel中显示行号)") break except Exception: continue if header_row is None: print(f"❌ {filename} 未找到有效表头,跳过该文件") return None, None, None try: df = pd.read_excel(file_path, header=header_row, engine='openpyxl') df.columns = [re.sub(r'[\s_\-]+', '', str(col)).strip() for col in df.columns] print(f"📋 清洗后列名:{list(df.columns)}") except Exception as e: print(f"❌ 读取 {filename} 失败:{str(e)},跳过") return None, None, None col_mapping = {} date_candidates = [col for col in df.columns if "日期" in col] for col in date_candidates: try: pd.to_datetime(df[col], errors="raise") col_mapping["date"] = col break except Exception: continue if "date" not in col_mapping: print(f"❌ {filename} 无有效日期列,跳过") return None, None, None close_candidates = [col for col in df.columns if "收盘" in col] for col in close_candidates: try: pd.to_numeric(df[col], errors="raise") col_mapping["close"] = col break except Exception: continue if "close" not in col_mapping: print(f"❌ {filename} 无有效收盘价列,跳过") return None, None, None vol_candidates = [col for col in df.columns if any(k in col for k in ["成交", "量"])] if vol_candidates: for col in vol_candidates: try: pd.to_numeric(df[col], errors="raise") col_mapping["vol"] = col break except Exception: continue col_mapping["vol"] = col_mapping.get("vol") selected_cols = ["date", "close"] + (["vol"] if col_mapping["vol"] else []) df_clean = df[[col_mapping[col] for col in selected_cols]].copy() df_clean.columns = selected_cols df_clean["date"] = pd.to_datetime(df_clean["date"], errors="coerce") df_clean = df_clean.dropna(subset=["date", "close"]).set_index("date").sort_index() if "vol" not in df_clean.columns: df_clean["vol"] = 0 df_clean["has_vol"] = False else: df_clean["vol"] = pd.to_numeric(df_clean["vol"], errors="coerce").fillna(0) df_clean["has_vol"] = (df_clean["vol"] > 0).any() print(f"📊 数据预处理完成:{len(df_clean)} 条有效数据") return df_clean, stock_name, stock_code # ------------------------------------------------- # 2. 技术指标计算 # ------------------------------------------------- def calculate_tech_indicators(df: pd.DataFrame) -> pd.DataFrame: df_copy = df.copy() df_copy["slope10"] = (df_copy["close"] - df_copy["close"].shift(10)) / df_copy["close"].shift(10) * 100 / 10 df_copy["slope10_ok"] = df_copy["slope10"].between(0.5, 2) df_copy["ma5"] = df_copy["close"].rolling(5).mean() df_copy["ma10"] = df_copy["close"].rolling(10).mean() df_copy["gold_cross"] = (df_copy["ma5"] > df_copy["ma10"]) & (df_copy["ma5"].shift(1) <= df_copy["ma10"].shift(1)) ema12 = df_copy["close"].ewm(span=12, adjust=False).mean() ema26 = df_copy["close"].ewm(span=26, adjust=False).mean() df_copy["dif"] = ema12 - ema26 df_copy["dea"] = df_copy["dif"].ewm(span=9, adjust=False).mean() df_copy["macd"] = 2 * (df_copy["dif"] - df_copy["dea"]) df_copy["macd_inc"] = (df_copy["macd"] > 0) & (df_copy["macd"] > df_copy["macd"].shift(1)) if df_copy["has_vol"].iloc[0]: df_copy["ma5vol"] = df_copy["vol"].rolling(5).mean() df_copy["vol_ratio_ok"] = df_copy.apply( lambda x: 1.2 <= (x["vol"] / x["ma5vol"]) <= 1.5 if x["ma5vol"] > 0 else False, axis=1 ) else: df_copy["vol_ratio_ok"] = True df_copy["buy_signal"] = df_copy["slope10_ok"] & df_copy["gold_cross"] & df_copy["macd_inc"] & df_copy[ "vol_ratio_ok"] return df_copy # ------------------------------------------------- # 3. 策略回测 # ------------------------------------------------- def backtest_strategy(df: pd.DataFrame, stock_code: str, init_cash: float = 1_000_000) -> tuple[pd.DataFrame, float]: cash = init_cash position = {} trade_log = [] for dt, row in df.iterrows(): current_close = row["close"] current_slope = row["slope10"] if position: code = list(position.keys())[0] pos = position[code] buy_price = pos["buy_price"] shares = pos["shares"] profit_rate = (current_close - buy_price) / buy_price if profit_rate >= 0.15: sell_amount = current_close * shares cash += sell_amount trade_log.append( [dt.strftime("%Y-%m-%d"), code, "止盈全平", round(current_close, 2), shares, round(sell_amount, 2)]) del position[code] elif 0.08 <= profit_rate < 0.15: sell_shares = shares // 2 sell_amount = current_close * sell_shares cash += sell_amount trade_log.append([dt.strftime("%Y-%m-%d"), code, "止盈半平", round(current_close, 2), sell_shares, round(sell_amount, 2)]) position[code]["shares"] -= sell_shares stop_loss_rate = 0.03 if (0.5 <= pos["buy_slope"] < 1) else 0.02 if current_close <= buy_price * (1 - stop_loss_rate): sell_amount = current_close * shares cash += sell_amount trade_log.append( [dt.strftime("%Y-%m-%d"), code, "止损", round(current_close, 2), shares, round(sell_amount, 2)]) del position[code] if row["buy_signal"] and not position: max_buy_amount = min(cash * 0.05, cash * 0.15) if max_buy_amount < current_close: continue buy_shares = int(max_buy_amount / current_close) buy_amount = current_close * buy_shares cash -= buy_amount position[stock_code] = {"shares": buy_shares, "buy_price": current_close, "buy_slope": current_slope} trade_log.append([dt.strftime("%Y-%m-%d"), stock_code, "买入", round(current_close, 2), buy_shares, round(buy_amount, 2)]) if position: code = list(position.keys())[0] pos = position[code] sell_amount = df["close"].iloc[-1] * pos["shares"] cash += sell_amount trade_log.append( [df.index[-1].strftime("%Y-%m-%d"), code, "期末清仓", round(df["close"].iloc[-1], 2), pos["shares"], round(sell_amount, 2)]) log_cols = ["日期", "证券代码", "操作类型", "价格(元)", "股数", "金额(元)"] log_df = pd.DataFrame(trade_log, columns=log_cols) if trade_log else pd.DataFrame(columns=log_cols) return log_df, round(cash, 2) # ------------------------------------------------- # 4. 图表生成(修复黑色块核心优化) # ------------------------------------------------- def plot_quant_analysis(df: pd.DataFrame, stock_name: str, stock_code: str) -> BytesIO: """生成4合一分析图表,返回内存流(修复背景与渲染)""" # 创建图表对象并设置背景 fig = plt.figure(figsize=(16, 12), facecolor='white', edgecolor='none') fig.suptitle(f"{stock_name}({stock_code}) 量化分析图", fontsize=16, fontweight="bold", y=0.98) # 创建子图 ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3) ax4 = fig.add_subplot(2, 2, 4) # 子图1:价格+买入信号+均线 ax1.plot(df.index, df["close"], color="#1f77b4", linewidth=1.5, label="收盘价") buy_points = df[df["buy_signal"]] if not buy_points.empty: ax1.scatter(buy_points.index, buy_points["close"], color="red", s=80, marker="o", edgecolors="white", linewidth=1.5, label="买入信号") ax1.plot(df.index, df["ma5"], color="#ff7f0e", linewidth=1, label="5日均线") ax1.plot(df.index, df["ma10"], color="#2ca02c", linewidth=1, label="10日均线") ax1.set_title("价格走势与买入信号", fontsize=12) ax1.set_ylabel("价格(元)") ax1.legend(fontsize=10) ax1.grid(alpha=0.3) ax1.set_facecolor('white') # 子图背景强制白色 # 子图2:10日斜率 ax2.plot(df.index, df["slope10"], color="#d62728", linewidth=1.5, label="10日斜率") ax2.axhline(y=0.5, color="orange", linestyle="--", alpha=0.7, label="最佳下界(0.5%)") ax2.axhline(y=2, color="orange", linestyle="--", alpha=0.7, label="最佳上界(2%)") ax2.fill_between(df.index, 0.5, 2, color="orange", alpha=0.1, label="最佳中速区间") ax2.set_title("10日斜率变化", fontsize=12) ax2.set_ylabel("斜率(%)") ax2.legend(fontsize=10) ax2.grid(alpha=0.3) ax2.set_facecolor('white') # 子图背景强制白色 # 子图3:MACD ax3.plot(df.index, df["dif"], color="#1f77b4", linewidth=1.5, label="DIF") ax3.plot(df.index, df["dea"], color="#ff7f0e", linewidth=1.5, label="DEA") macd_pos = df[df["macd"] > 0] macd_neg = df[df["macd"] <= 0] ax3.bar(macd_pos.index, macd_pos["macd"], color="red", alpha=0.5, label="MACD红柱") ax3.bar(macd_neg.index, macd_neg["macd"], color="green", alpha=0.5, label="MACD绿柱") ax3.set_title("MACD指标", fontsize=12) ax3.set_ylabel("MACD值") ax3.legend(fontsize=10) ax3.grid(alpha=0.3) ax3.set_facecolor('white') # 子图背景强制白色 # 子图4:成交量(修复无数据时的黑色块) ax4.set_facecolor('white') # 子图背景强制白色 if df["has_vol"].iloc[0]: ax4.bar(df.index, df["vol"], color="#2ca02c", alpha=0.6, label="成交量") ax4.set_title("成交量变化", fontsize=12) ax4.set_ylabel("成交量") ax4.legend(fontsize=10) else: # 无数据时,用白色背景+黑色文本,避免黑色块 ax4.text(0.5, 0.5, "无成交量数据", ha="center", va="center", transform=ax4.transAxes, fontsize=12, color="black", fontweight="bold") ax4.set_title("成交量变化", fontsize=12) ax4.grid(alpha=0.3) # 优化渲染:避免边缘裁剪 plt.tight_layout() plt.subplots_adjust(top=0.93) # 生成图像流:降低dpi避免流过大,确保完整 img_stream = BytesIO() fig.savefig( img_stream, dpi=150, # 降低dpi减少内存占用,避免流损坏 bbox_inches="tight", format="png", facecolor='white', # 再次确认背景色 edgecolor='none', # 去除图像边缘 transparent=False # 禁用透明 ) img_stream.seek(0) # 重置流指针 plt.close(fig) # 关闭图表释放内存 print(f"📊 图表生成成功(内存流大小:{img_stream.getbuffer().nbytes / 1024:.2f} KB)") return img_stream # ------------------------------------------------- # 5. 生成PDF子模块(修复图像显示) # ------------------------------------------------- def generate_sub_pdf(df_indicators: pd.DataFrame, log_df: pd.DataFrame, stock_name: str, stock_code: str, final_cash: float, font_name: str) -> str: """生成单股票PDF子模块""" current_dir = os.getcwd() sub_pdf_path = os.path.join(current_dir, f"temp_{stock_code}_sub.pdf") styles = getSampleStyleSheet() # 创建自定义样式确保字体正确应用(修复小标题字体问题) custom_heading1 = ParagraphStyle( name='CustomHeading1', parent=styles['Heading1'], fontName=font_name, fontSize=16, spaceAfter=15 ) custom_heading2 = ParagraphStyle( name='CustomHeading2', parent=styles['Heading2'], fontName=font_name, fontSize=12, spaceAfter=6 ) custom_normal = ParagraphStyle( name='CustomNormal', parent=styles['Normal'], fontName=font_name, fontSize=10, spaceAfter=6 ) elements = [] # 子模块标题 title = Paragraph(f"<b>{stock_name}({stock_code}) 量化分析子模块</b>", custom_heading1) title.alignment = 1 elements.append(title) elements.append(Spacer(1, 10)) # 交易记录(修复表格字体) elements.append(Paragraph("<b>1. 交易记录</b>", custom_heading2)) elements.append(Spacer(1, 8)) if log_df.empty: elements.append(Paragraph("⚠️ 无符合条件的买入信号,未产生交易记录", custom_normal)) else: table_data = [log_df.columns.tolist()] for _, row in log_df.iterrows(): row_data = row.tolist() row_data[3] = round(row_data[3], 2) row_data[5] = round(row_data[5], 2) table_data.append(row_data) col_widths = [100, 100, 120, 100, 80, 120] table = Table(table_data, colWidths=col_widths, repeatRows=1) # 修复表格样式:所有单元格应用字体,避免黑色块 table_style = TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightblue), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), # 所有文本强制黑色 ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), # 所有单元格用指定字体 ("FONTSIZE", (0, 0), (-1, -1), 10), ("FONTWEIGHT", (0, 0), (-1, 0), "BOLD"), ("GRID", (0, 0), (-1, -1), 1, colors.black), ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.lightgrey]), ]) table.setStyle(table_style) elements.append(table) elements.append(Spacer(1, 20)) # 量化分析图表(修复图像尺寸) elements.append(Paragraph("<b>2. 量化分析图表</b>", custom_heading2)) elements.append(Spacer(1, 8)) try: img_stream = plot_quant_analysis(df_indicators, stock_name, stock_code) img_data = img_stream.getvalue() # 直接获取二进制数据 # 使用ReportLab的ImageReader处理内存数据 img_reader = ImageReader(BytesIO(img_data)) # 计算合适的图像尺寸(A4页面宽度减去边距) page_width = A4[0] - 60 # 左右各30mm边距 img_width = page_width img_height = img_width * 0.75 # 保持4:3比例 img = Image( img_reader, width=img_width, height=img_height, kind='direct' # 直接嵌入图像数据 ) img.hAlign = "CENTER" elements.append(img) except Exception as e: error_msg = f"⚠️ 图表生成失败:{str(e)}" elements.append(Paragraph(error_msg, custom_normal)) print(f"❌ {error_msg}") finally: if 'img_stream' in locals(): img_stream.close() # 确保关闭内存流 elements.append(Spacer(1, 20)) # 子模块小结 elements.append(Paragraph("<b>3. 子模块小结</b>", custom_heading2)) elements.append(Spacer(1, 8)) buy_count = df_indicators["buy_signal"].sum() init_cash = 1_000_000 profit = final_cash - init_cash profit_rate = (profit / init_cash) * 100 summary_texts = [ f"• 识别买入信号数:{buy_count} 个", f"• 初始资金:{init_cash:,.2f} 元", f"• 最终资金:{final_cash:,.2f} 元", f"• 绝对收益:{profit:,.2f} 元({profit_rate:.2f}%)", f"• 数据时间范围:{df_indicators.index.min().strftime('%Y-%m-%d')} ~ {df_indicators.index.max().strftime('%Y-%m-%d')}" ] for text in summary_texts: elements.append(Paragraph(text, custom_normal)) # 生成子PDF try: doc = SimpleDocTemplate( sub_pdf_path, pagesize=A4, rightMargin=30, leftMargin=30, topMargin=30, bottomMargin=20, defaultFontName=font_name # 设置默认字体 ) doc.build(elements) print(f"✅ 生成 {stock_name} 子模块PDF:{os.path.basename(sub_pdf_path)}") return sub_pdf_path except Exception as e: print(f"❌ 子PDF生成失败:{str(e)}") raise # ------------------------------------------------- # 6. 合并PDF报告(修复合并问题) # ------------------------------------------------- def merge_to_total_pdf(sub_pdf_paths: list, stock_info_list: list, font_name: str, total_pdf_name: str = "多股票量化策略总报告.pdf") -> None: """合并所有子PDF为总报告""" current_dir = os.getcwd() total_pdf_path = os.path.join(current_dir, total_pdf_name) cover_dir_pdf = os.path.join(current_dir, "temp_cover_dir.pdf") styles = getSampleStyleSheet() # 创建自定义样式 custom_heading1 = ParagraphStyle( name='CustomHeading1', parent=styles['Heading1'], fontName=font_name, fontSize=18, spaceAfter=20 ) custom_heading2 = ParagraphStyle( name='CustomHeading2', parent=styles['Heading2'], fontName=font_name, fontSize=14, spaceAfter=12 ) custom_normal = ParagraphStyle( name='CustomNormal', parent=styles['Normal'], fontName=font_name, fontSize=11, spaceAfter=8 ) elements = [] # 封面 cover_title = Paragraph("<b>多股票中速上升段量化策略总报告</b>", custom_heading1) cover_title.alignment = 1 elements.append(cover_title) elements.append(Spacer(1, 30)) report_date = Paragraph(f"报告生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", custom_normal) report_date.alignment = 1 elements.append(report_date) elements.append(Spacer(1, 10)) stock_count = Paragraph(f"分析股票数量:{len(stock_info_list)} 只", custom_normal) stock_count.alignment = 1 elements.append(stock_count) elements.append(Spacer(1, 60)) # 股票列表(修复表格字体) elements.append(Paragraph("<b>参与分析的股票列表</b>", custom_heading2)) elements.append(Spacer(1, 8)) stock_table_data = [["股票名称", "证券代码"]] + stock_info_list table = Table(stock_table_data, colWidths=[250, 100], repeatRows=1) table.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightblue), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), # 所有单元格用指定字体 ("FONTSIZE", (0, 0), (-1, -1), 10), ("FONTWEIGHT", (0, 0), (-1, 0), "BOLD"), ("GRID", (0, 0), (-1, -1), 1, colors.black), ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.lightgrey]), ])) elements.append(table) elements.append(Spacer(1, 60)) # 总目录 elements.append(Paragraph("<b>总目录</b>", custom_heading2)) elements.append(Spacer(1, 8)) elements.append(Paragraph("1. 报告说明与风险提示", custom_normal)) for i, (stock_name, stock_code) in enumerate(stock_info_list, 2): elements.append(Paragraph(f"{i}. {stock_name}({stock_code}) 量化分析子模块", custom_normal)) elements.append(Spacer(1, 40)) # 风险提示 elements.append(Paragraph("<b>1. 报告说明与风险提示</b>", custom_heading2)) elements.append(Spacer(1, 8)) note_texts = [ "• 本报告基于历史数据回测,不构成投资建议。", "• 策略核心:10日斜率0.5%-2%+均线金叉+MACD红柱放大+量比1.2-1.5。", "• 仓位规则:单票首仓≤5%,严格止盈止损(8%-15%止盈,2%-3%止损)。", "• 风险提示:市场波动可能导致策略失效,投资需谨慎。" ] for text in note_texts: elements.append(Paragraph(text, custom_normal)) # 生成封面+目录PDF doc = SimpleDocTemplate( cover_dir_pdf, pagesize=A4, rightMargin=30, leftMargin=30, topMargin=30, bottomMargin=20, defaultFontName=font_name # 设置默认字体 ) doc.build(elements) print(f"\n📄 封面+目录PDF生成完成") # 合并PDF前验证子PDF有效性 valid_sub_pdfs = [] for sub_pdf in sub_pdf_paths: if os.path.exists(sub_pdf) and os.path.getsize(sub_pdf) > 1024: # 排除空文件 valid_sub_pdfs.append(sub_pdf) print(f"🔗 待合并PDF:{os.path.basename(sub_pdf)}(大小:{os.path.getsize(sub_pdf) / 1024:.2f} KB)") else: print(f"❌ 跳过损坏的子PDF:{os.path.basename(sub_pdf)}") # 合并PDF merger = PdfMerger() merger.append(cover_dir_pdf) for sub_pdf in valid_sub_pdfs: merger.append(sub_pdf) print(f"✅ 合并PDF:{os.path.basename(sub_pdf)}") merger.write(total_pdf_path) merger.close() print(f"\n🎉 总报告生成完成:{total_pdf_path}") # 确认总PDF正常后删除临时文件 if os.path.exists(total_pdf_path) and os.path.getsize(total_pdf_path) > 1024: os.remove(cover_dir_pdf) for sub_pdf in valid_sub_pdfs: os.remove(sub_pdf) print("🗑️ 临时文件已删除") else: print("⚠️ 总PDF生成异常,保留临时文件以便排查") # ------------------------------------------------- # 主流程 # ------------------------------------------------- def main(): print("=" * 70) print("多股票量化策略总报告生成器(修复版)") print("=" * 70) # 1. 初始化中文字体 font_name = init_chinese_font() # 2. 初始化Matplotlib字体 init_matplotlib_font(font_name) # 3. 获取Excel文件 excel_files = get_all_excel_files() print(f"\n📂 找到 {len(excel_files)} 个Excel文件,处理顺序:") for i, file in enumerate(excel_files, 1): print(f" {i}. {os.path.basename(file)}") # 4. 批量处理 sub_pdf_paths = [] stock_info_list = [] init_cash = 1_000_000 for file in excel_files: print(f"\n" + "-" * 50) print(f"开始处理:{os.path.basename(file)}") print("-" * 50) df_raw, stock_name, stock_code = load_data(file) if df_raw is None: continue df_indicators = calculate_tech_indicators(df_raw) print(f"📈 技术指标计算完成:买入信号数 = {df_indicators['buy_signal'].sum()} 个") log_df, final_cash = backtest_strategy(df_indicators, stock_code, init_cash) print(f"📊 回测完成:最终资金 = {final_cash:,.2f} 元") sub_pdf = generate_sub_pdf(df_indicators, log_df, stock_name, stock_code, final_cash, font_name) sub_pdf_paths.append(sub_pdf) stock_info_list.append((stock_name, stock_code)) # 5. 合并总报告 if sub_pdf_paths: merge_to_total_pdf(sub_pdf_paths, stock_info_list, font_name) else: print("\n⚠️ 无有效子PDF可合并,无法生成总报告!") print("\n" + "=" * 70) print("所有流程执行完毕!") print("=" * 70) # ========================================================= # Towin_robot.py 增量补丁(红♥历史信号 + 蓝♥未来节点 + 最优价) # 使用方式:直接粘到原文件尾部,原重复定义已剔除 # ========================================================= # ---------------- 新增 import ---------------- import matplotlib.dates as mdates from reportlab.platypus import PageBreak # --------------------------------------------------------- # 1. 扩展指标:计算最优买入价 & 未来时间窗口 # --------------------------------------------------------- def enrich_future_nodes(df: pd.DataFrame) -> pd.DataFrame: """ 1) 为历史信号日计算“最优买入价” 优先用 (开盘+最低)/2,缺开盘则用 close*0.98 2) 外推 2025-12-31 前所有「未来潜在节点」 规则:中速 0.5–2 %/日 或 高速 >2 %/日 """ # ---- 最优价 ---- if "open" not in df.columns: df["open"] = df["close"].shift(1).fillna(df["close"] * 0.99) if "low" not in df.columns: df["low"] = df["close"] * 0.97 df["optimal_buy"] = (df["open"] + df["low"]) / 2 # ---- 未来节点 ---- last_day = df.index[-1] future_end = pd.Timestamp("2025-12-31") holiday_keys = pd.date_range(last_day + pd.Timedelta(days=1), future_end, freq="B") latest_slope = df["slope10"].iloc[-1] / 100 base_price = df["close"].iloc[-1] future_prices = [base_price * (1 + latest_slope) ** i for i in range(1, len(holiday_keys) + 1)] future_df = pd.DataFrame(index=holiday_keys, data={"close": future_prices}) future_df["slope10"] = (future_df["close"] - future_df["close"].shift(10)) / future_df["close"].shift(10) * 100 / 10 future_df["future_node"] = False future_df.loc[future_df["slope10"].between(0.5, 2), "future_node"] = True future_df.loc[future_df["slope10"] > 2, "future_node"] = True future_df["optimal_buy"] = future_df["close"] * 0.98 df_future = future_df[future_df["future_node"]].copy() # 合并回主表(仅用于绘图) df["is_future"] = False for dt in df_future.index: df.loc[dt, "is_future"] = True df.loc[dt, "optimal_buy"] = df_future.loc[dt, "optimal_buy"] return df, df_future # --------------------------------------------------------- # 2. 绘图:红♥历史 + 蓝♥未来 + 最优价标注 # --------------------------------------------------------- def plot_quant_analysis_plus(df: pd.DataFrame, stock_name: str, stock_code: str): """返回内存图 + 未来节点df""" df, df_future = enrich_future_nodes(df) fig = plt.figure(figsize=(16, 14), facecolor='white') fig.suptitle(f"{stock_name}({stock_code}) 量化分析图(红♥历史信号 / 蓝♥未来节点)", fontsize=16, y=0.98) ax1 = plt.subplot(3, 2, 1) ax2 = plt.subplot(3, 2, 2) ax3 = plt.subplot(3, 2, 3) ax4 = plt.subplot(3, 2, 4) ax5 = plt.subplot(3, 1, 3) # 1) 价格+红♥ ax1.plot(df.index, df["close"], color="#1f77b4", lw=1.5, label="收盘价") hist_buy = df[df["buy_signal"].fillna(False)] if not hist_buy.empty: ax1.scatter(hist_buy.index, hist_buy["close"], marker="$♥$", s=120, color="red", label="历史买入信号", zorder=5) ax1.plot(df.index, df["ma5"], "#ff7f0e", lw=1, label="5日均线") ax1.plot(df.index, df["ma10"], "#2ca02c", lw=1, label="10日均线") ax1.set_title("收盘价与历史买入信号(红♥)"); ax1.legend(); ax1.grid(alpha=0.3) # 2) 斜率 ax2.plot(df.index, df["slope10"], color="#d62728", lw=1.5) ax2.axhline(0.5, ls="--", c="orange"); ax2.axhline(2, ls="--", c="orange") ax2.fill_between(df.index, 0.5, 2, color="orange", alpha=0.1) ax2.set_title("10 日斜率(中速 0.5–2 %/日)"); ax2.grid(alpha=0.3) # 3) MACD ax3.plot(df.index, df["dif"], "#1f77b4", label="DIF") ax3.plot(df.index, df["dea"], "#ff7f0e", label="DEA") macd_pos = (df["macd"] > 0).fillna(False) ax3.bar(df.index[macd_pos], df["macd"][macd_pos], color="red", alpha=0.5, label="红柱") ax3.bar(df.index[~macd_pos], df["macd"][~macd_pos], color="green", alpha=0.5, label="绿柱") ax3.set_title("MACD"); ax3.legend(); ax3.grid(alpha=0.3) # 4) 成交量 ax4.bar(df.index, df["vol"], color="#2ca02c", alpha=0.6) ax4.set_title("成交量"); ax4.grid(alpha=0.3) # 5) 未来节点(蓝♥) ax5.plot(df.index, df["close"], color="gray", lw=1, alpha=0.7, label="历史收盘") if not hist_buy.empty: ax5.scatter(hist_buy.index, hist_buy["close"], marker="$♥$", s=120, color="red", label="历史买入节点", zorder=6) if not df_future.empty: ax5.scatter(df_future.index, df_future["close"], marker="$♥$", s=120, color="blue", label="未来潜在节点", zorder=6) # 标注最优价(前 10 个) for dt, row in df_future.head(10).iterrows(): ax5.text(dt, row["close"] * 0.95, f"{row['optimal_buy']:.2f}", ha="center", va="top", fontsize=8, color="blue", alpha=0.9) ax5.set_title("未来买入节点预测(蓝♥,含最优价)") ax5.legend(); ax5.grid(alpha=0.3) ax5.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) fig.autofmt_xdate() # 内存流 img_stream = BytesIO() fig.savefig(img_stream, dpi=150, bbox_inches="tight", facecolor='white', format="png") img_stream.seek(0) plt.close(fig) return img_stream, df_future # --------------------------------------------------------- # 3. 生成子 PDF(新增“未来节点”一页) # --------------------------------------------------------- def generate_sub_pdf_plus(df_indicators: pd.DataFrame, log_df: pd.DataFrame, stock_name: str, stock_code: str, final_cash: float, font_name: str) -> str: current_dir = os.getcwd() sub_pdf_path = os.path.join(current_dir, f"temp_{stock_code}_sub.pdf") styles = getSampleStyleSheet() for st in [styles['Heading1'], styles['Heading2'], styles['Normal']]: st.fontName = font_name elements = [] # 1) 封面/交易记录/图表 title = Paragraph(f"<b>{stock_name}({stock_code}) 量化分析子模块</b>", styles['Heading1']) title.alignment = 1; elements.extend([title, Spacer(1, 10)]) elements.append(Paragraph("<b>1. 交易记录</b>", styles['Heading2'])) if log_df.empty: elements.append(Paragraph("⚠️ 无交易记录", styles['Normal'])) else: table_data = [log_df.columns.tolist()] + log_df.round(2).values.tolist() tbl = Table(table_data, colWidths=[100, 100, 120, 100, 80, 120], repeatRows=1) tbl.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightblue), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), ("GRID", (0, 0), (-1, -1), 1, colors.black), ])) elements.append(tbl) elements.append(Spacer(1, 20)) # 2) 图表(红蓝双心) elements.append(Paragraph("<b>2. 量化分析图表(红♥历史 / 蓝♥未来)</b>", styles['Heading2'])) img_stream, df_future = plot_quant_analysis_plus(df_indicators, stock_name, stock_code) img = Image(img_stream, width=A4[0] - 60, height=(A4[0] - 60) * 0.75) img.hAlign = "CENTER" elements.append(img) elements.append(PageBreak()) # 3) 未来节点 elements.append(Paragraph("<b>3. 未来潜在买入节点(外推至 2025-12-31)</b>", styles['Heading2'])) if df_future.empty: elements.append(Paragraph("• 未探测到满足中/高速区间的未来节点。", styles['Normal'])) else: future_tbl_data = [["日期", "预估收盘价(元)", "最优买入价(元)", "斜率(%)"]] + \ [[d.strftime("%Y-%m-%d"), f"{r['close']:.2f}", f"{r['optimal_buy']:.2f}", f"{r['slope10']:.2f}"] for d, r in df_future.head(20).iterrows()] ftbl = Table(future_tbl_data, colWidths=[120, 120, 120, 80], repeatRows=1) ftbl.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), ("GRID", (0, 0), (-1, -1), 1, colors.black), ])) elements.append(ftbl) elements.append(Spacer(1, 10)) elements.append(Paragraph(f"• 共探测到 <b>{len(df_future)}</b> 个未来节点,上表展示前 20 个。", styles['Normal'])) # ========== 第七步:胜率 & 蒙特卡洛可信度 & 自动重训 ========== # 1) 计算胜率 win_rate, sig_cnt, avg_ret = calc_win_rate(df_indicators) model_auc = auto_retrain(df_indicators) # 2) 蒙特卡洛外推可信度节点 mask_days = get_trade_days(df_indicators.index[-1].strftime('%Y%m%d'), '20251231') mc_df = monte_carlo_nodes( df_indicators['close'].iloc[-1], df_indicators['close'].pct_change().mean() * 252, df_indicators['close'].pct_change().std() * np.sqrt(252), df_indicators.index[-1], '20251231', mask_days ) # 3) 写进 PDF elements.append(Spacer(1, 12)) elements.append(Paragraph("<b>5. 胜率与蒙特卡洛可信度</b>", styles['Heading2'])) if win_rate is None: elements.append(Paragraph("• 信号样本不足,暂无法计算胜率。", styles['Normal'])) else: elements.append(Paragraph( f"• 历史胜率:<b>{win_rate:.1%}</b> 信号次数:<b>{sig_cnt}</b> 平均收益:<b>{avg_ret:.2%}</b>", styles['Normal'])) if mc_df.empty: elements.append(Paragraph("• 蒙特卡洛外推:未探测到可信度 ≥80% 节点。", styles['Normal'])) else: mc_tbl_data = [["日期", "预估价(元)", "可信度"]] + \ [[d.strftime('%Y-%m-%d'), f"{p:.2f}", f"{c:.0%}"] for d, p, c in mc_df.values] mc_tbl = Table(mc_tbl_data, colWidths=[120, 100, 80], repeatRows=1) mc_tbl.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), ("GRID", (0, 0), (-1, -1), 1, colors.black), ])) elements.append(mc_tbl) # 4) 推送钩子 push_lark(f"{stock_name}({stock_code}) " f"胜率={(win_rate or 0):.1%} " f"信号={sig_cnt or 0} " f"可信度节点={len(mc_df)}") # ========== 第七步结束 ========== # 4) 小结 elements.append(Spacer(1, 20)) elements.append(Paragraph("<b>4. 子模块小结</b>", styles['Heading2'])) buy_cnt = df_indicators["buy_signal"].sum() profit = final_cash - 1_000_000 profit_rate = profit / 1_000_000 * 100 summary = f""" • 历史买入信号:<b>{buy_cnt}</b> 个<br/> • 最终资金:<b>{final_cash:,.2f}</b> 元(收益 <b>{profit_rate:.2f}%</b>)<br/> • 未来节点:<b>{len(df_future)}</b> 个(至 2025-12-31)<br/> • 数据范围:{df_indicators.index.min().strftime('%Y-%m-%d')} ~ {df_indicators.index.max().strftime('%Y-%m-%d')} """ elements.append(Paragraph(summary, styles['Normal'])) # 5) 生成 SimpleDocTemplate(sub_pdf_path, pagesize=A4, rightMargin=30, leftMargin=30, topMargin=30, bottomMargin=20, defaultFontName=font_name).build(elements) print(f"✅ 生成 {stock_name} 子模块PDF(含未来节点):{os.path.basename(sub_pdf_path)}") return sub_pdf_path # ---------- 自动重训 & 监控参数 ---------- ROLL_WINDOW = 240 # 滚动 240 日训练集 RETRAIN_WEEKDAY = 4 # 4=周五 MIN_SIGNAL_COUNT = 20 # 最少信号样本 WIN_THRESHOLD = 0.08 # 8% 以上视为盈利 IC_DECAY_ALERT = 0.30 # IC 衰减 30% 触发警告 MONTE_PATHS = 5000 # 蒙特卡洛路径 MONTE_CONF = 0.80 # 可信度置信水平 WEBHOOK_URL = "" # 飞书机器人 webhook,留空则不推送 #新增工具函数 def get_trade_days(start, end): """本地交易日历(周一到周五,剔除法定节假日简单列表)""" # 2024-2026 主要节假日(可再自行补充) holidays = [ '20241001', '20241002', '20241003', '20241004', '20241007', # 国庆 '20250203', '20250204', '20250205', '20250206', '20250207', # 春节 '20250501', '20250502', '20250609', '20250610', # 劳动、端午 '20251001', '20251002', '20251003', '20251006', '20251007', # 2025 国庆 '20260101', '20260216', '20260217', '20260218', '20260219', '20260220' # 2026 春节 ] # 生成自然工作日 bdays = pd.bdate_range(start, end) # 剔除节假日 hdays = pd.to_datetime(holidays) return bdays.drop(bdays.intersection(hdays)) def calc_win_rate(df_raw, signal_col='buy_signal', ret_col='fwd_ret'): """计算胜率百分比""" df = df_raw.copy() # 未来 5 日收益 df[ret_col] = df['close'].shift(-5) / df['close'] - 1 sig = df[signal_col].fillna(False) if sig.sum() < MIN_SIGNAL_COUNT: return None, None, None win = (df[ret_col] > WIN_THRESHOLD) & sig win_rate = win.sum() / sig.sum() return win_rate, sig.sum(), df[ret_col][sig].mean() def ic_decay(df, factor, ret): """因子 IC 及环比衰减""" df = df.copy() df['factor'] = factor df['ret'] = ret ic = df[['factor', 'ret']].corr().iloc[0, 1] ic_prev = df[['factor', 'ret']].shift(60).corr().iloc[0, 1] decay = abs(ic - ic_prev) / (abs(ic_prev) + 1e-6) return ic, decay #-------------蒙特卡洛外推----------------------------------------------------- def monte_carlo_nodes(last_price, mu, sigma, last_day, end_day, mask_days): """返回可信度节点 DataFrame""" days = len(mask_days) dt = 1/252 paths = np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*np.random.normal(0, 1, (MONTE_PATHS, days))) prices = last_price * np.cumprod(paths, axis=1) slopes = (prices[:, 10:] - prices[:, :-10]) / prices[:, :-10] / 10 * 100 # 判断满足斜率区间 hit = ((slopes >= 0.5) & (slopes <= 2)) | (slopes > 2) # 计算首次满足日期可信度 first_hit = np.full(MONTE_PATHS, np.nan) for i in range(MONTE_PATHS): hh = np.where(hit[i])[0] if len(hh) > 0: first_hit[i] = hh[0] conf50 = np.nanpercentile(first_hit, 50) conf80 = np.nanpercentile(first_hit, (1-MONTE_CONF)*100) # 映射回日历 if not np.isnan(conf50): date50 = mask_days[int(conf50)] date80 = mask_days[int(conf80)] price50 = last_price * (1 + mu)**int(conf50) price80 = last_price * (1 + mu)**int(conf80) return pd.DataFrame({'date':[date50, date80], 'price':[price50, price80], 'credib':[0.50, MONTE_CONF]}) else: return pd.DataFrame() # ------------------自动重训函数--------------------------------------- def auto_retrain(df): """滚动重训逻辑回归权重""" today = pd.Timestamp.today() if today.weekday() != RETRAIN_WEEKDAY: return None train = df.tail(ROLL_WINDOW).copy() train['ret5'] = train['close'].shift(-5) / train['close'] - 1 train = train.dropna() if len(train) < MIN_SIGNAL_COUNT: return None X = train[['slope10', 'macd', 'vol_ratio']].fillna(0) y = (train['ret5'] > WIN_THRESHOLD).astype(int) if y.sum() < MIN_SIGNAL_COUNT // 2: return None model = LogisticRegression().fit(X, y) auc = roc_auc_score(y, model.predict_proba(X)[:, 1]) joblib.dump(model, 'signal_model.pkl') print(f"[RETRAIN] {today.date()} 模型重训完成 AUC={auc:.3f}") return auc #----------推送钩子---------------------------------------- def push_lark(msg): if not WEBHOOK_URL: return requests.post(WEBHOOK_URL, json={"msg_type":"text","content":{"text":msg}}) # 4. 替换原主流程入口 # --------------------------------------------------------- def main_plus(): """与原 main() 相同,仅替换子 PDF 生成函数""" font_name = init_chinese_font() init_matplotlib_font(font_name) excel_files = get_all_excel_files() sub_pdf_paths, stock_info_list = [], [] for file in excel_files: df_raw, stock_name, stock_code = load_data(file) if df_raw is None: continue df_indicators = calculate_tech_indicators(df_raw) log_df, final_cash = backtest_strategy(df_indicators, stock_code) # 🔥 使用新函数 sub_pdf = generate_sub_pdf_plus(df_indicators, log_df, stock_name, stock_code, final_cash, font_name) sub_pdf_paths.append(sub_pdf) stock_info_list.append((stock_name, stock_code)) if sub_pdf_paths: merge_to_total_pdf(sub_pdf_paths, stock_info_list, font_name) print("🎉 全部完成!含红♥历史信号 / 蓝♥未来节点 / 最优价 / PDF 嵌入") # ================= 自动重训 & 胜率监控 ================= import tushare as ts from sklearn.linear_model import LogisticRegression from sklearn.metrics import roc_auc_score import joblib, datetime as dt, numpy as np ROLL_WINDOW = 240 # 滚动 240 日 RETRAIN_WEEKDAY = 4 # 周五 MIN_SIGNAL_COUNT = 20 WIN_THRESHOLD = 0.08 IC_DECAY_ALERT = 0.30 MONTE_PATHS = 5000 MONTE_CONF = 0.80 WEBHOOK_URL = "" # 飞书 webhook def get_trade_days(start, end): """本地交易日历(零积分)1990-2030""" holidays = [ '20241001','20241002','20241003','20241004','20241007', '20250203','20250204','20250205','20250206','20250207', '20250501','20250502','20250609','20250610', '20251001','20251002','20251003','20251006','20251007', '20260101','20260216','20260217','20260218','20260219','20260220' ] bdays = pd.bdate_range(start, end) hdays = pd.to_datetime(holidays) return bdays.drop(bdays.intersection(hdays)) def calc_win_rate(df_raw, signal_col='buy_signal', ret_col='fwd_ret'): """计算胜率百分比""" df = df_raw.copy() df[ret_col] = df['close'].shift(-5) / df['close'] - 1 sig = df[signal_col].fillna(False) if sig.sum() < MIN_SIGNAL_COUNT: return None, None, None win = (df[ret_col] > WIN_THRESHOLD) & sig win_rate = win.sum() / sig.sum() return win_rate, sig.sum(), df[ret_col][sig].mean() def monte_carlo_nodes(last_price, mu, sigma, last_day, end_day, mask_days): """蒙特卡洛外推 + 可信度""" days = len(mask_days) dt1 = 1/252 paths = np.exp((mu - 0.5*sigma**2)*dt1 + sigma*np.sqrt(dt1)*np.random.normal(0,1,(MONTE_PATHS,days))) prices = last_price * np.cumprod(paths,axis=1) slopes = (prices[:,10:] - prices[:,:-10]) / prices[:,:-10] / 10 * 100 hit = ((slopes >= 0.5) & (slopes <= 2)) | (slopes > 2) first_hit = np.full(MONTE_PATHS, np.nan) for i in range(MONTE_PATHS): hh = np.where(hit[i])[0] if len(hh) > 0: first_hit[i] = hh[0] if np.all(np.isnan(first_hit)): return pd.DataFrame() p50 = np.nanpercentile(first_hit, 50) p80 = np.nanpercentile(first_hit, (1-MONTE_CONF)*100) date50 = mask_days[int(p50)] date80 = mask_days[int(p80)] price50 = last_price * (1 + mu)**int(p50) price80 = last_price * (1 + mu)**int(p80) return pd.DataFrame({'date':[date50, date80], 'price':[price50, price80], 'credib':[0.50, MONTE_CONF]}) def auto_retrain(df): """周五滚动重训逻辑回归""" if pd.Timestamp.today().weekday() != RETRAIN_WEEKDAY: return None train = df.tail(ROLL_WINDOW).copy() train['ret5'] = train['close'].shift(-5) / train['close'] - 1 train = train.dropna() if len(train) < MIN_SIGNAL_COUNT or train['ret5'].gt(WIN_THRESHOLD).sum() < MIN_SIGNAL_COUNT//2: return None X = train[['slope10', 'macd', 'vol_ratio']].fillna(0) y = (train['ret5'] > WIN_THRESHOLD).astype(int) model = LogisticRegression().fit(X, y) auc = roc_auc_score(y, model.predict_proba(X)[:, 1]) joblib.dump(model, 'signal_model.pkl') print(f"[RETRAIN] {pd.Timestamp.today().date()} 完成 AUC={auc:.3f}") return auc def push_lark(msg): if not WEBHOOK_URL: return import requests requests.post(WEBHOOK_URL, json={"msg_type":"text","content":{"text":msg}}) # ------------- 胜率 & 蒙特卡洛可信度 ------------- win_rate, sig_cnt, avg_ret = calc_win_rate(df_indicators) model_auc = auto_retrain(df_indicators) mask_days = get_trade_days(df_indicators.index[-1].strftime('%Y%m%d'), '20251231') mc_df = monte_carlo_nodes( df_indicators['close'].iloc[-1], df_indicators['close'].pct_change().mean() * 252, df_indicators['close'].pct_change().std() * np.sqrt(252), df_indicators.index[-1], '20251231', mask_days ) elements.append(Spacer(1, 12)) elements.append(Paragraph("<b>5. 胜率与蒙特卡洛可信度</b>", styles['Heading2'])) if win_rate is None: elements.append(Paragraph("• 信号样本不足,暂无法计算胜率。", styles['Normal'])) else: elements.append(Paragraph( f"• 历史胜率:<b>{win_rate:.1%}</b> 信号次数:<b>{sig_cnt}</b> 平均收益:<b>{avg_ret:.2%}</b>", styles['Normal'])) if mc_df.empty: elements.append(Paragraph("• 蒙特卡洛外推:未探测到可信度 ≥80% 节点。", styles['Normal'])) else: mc_tbl_data = [["日期", "预估价(元)", "可信度"]] + \ [[d.strftime('%Y-%m-%d'), f"{p:.2f}", f"{c:.0%}"] for d, p, c in mc_df.values] mc_tbl = Table(mc_tbl_data, colWidths=[120, 100, 80], repeatRows=1) mc_tbl.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen), ("TEXTCOLOR", (0, 0), (-1, -1), colors.black), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("FONTNAME", (0, 0), (-1, -1), font_name), ("GRID", (0, 0), (-1, -1), 1, colors.black), ])) elements.append(mc_tbl) push_lark(f"{stock_name}({stock_code}) 胜率={(win_rate or 0):.1%} 信号={sig_cnt or 0} 可信度节点={len(mc_df)}") import seaborn as sns import matplotlib.pyplot as plt plt.rcdefaults() # 先复位 sns.set_theme( style='whitegrid', # 清爽网格 palette='Set2', # 柔和 8 色 font='Microsoft YaHei', # 系统雅黑 rc={'figure.figsize': (16, 9), 'figure.dpi': 150, 'axes.titlesize': 16, 'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11, 'legend.fontsize': 11, 'figure.subplot.left': 0.08, 'figure.subplot.right': 0.95, 'figure.subplot.bottom': 0.10, 'figure.subplot.top': 0.92} ) # --------------------------------------------------------- # 5. 统一入口 # --------------------------------------------------------- if __name__ == "__main__": main_plus() 需要优化吗,对生成的PDF进行美观度调整,看上去更加VIP主题效果更好。对数据的整合要直观易懂
10-01
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值