import dai
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# ============ 1) DAI SQL 拉取因子数据 ============
def fetch_factor_df(target_date: str):
sql = """
SELECT
date,
instrument,
name,
open,
high,
low,
close,
pre_close,
volume,
amount,
turnover_rate AS turn,
total_market_cap,
list_days,
price_limit_status,
upper_limit,
-- 开盘跳空
open / NULLIF(pre_close, 0) - 1 AS open_gap,
-- 60日涨停次数
m_sum(CASE WHEN price_limit_status = 3 THEN 1 ELSE 0 END, 60) AS zt_count_60d,
-- 5日动量
close / m_lag(close, 5) - 1 AS ret_5d,
-- 开盘接近涨停程度
1 - (upper_limit - open) / COALESCE(NULLIF(upper_limit, 0), 1) AS open_proximity,
-- 量比
volume / NULLIF(m_avg(volume, 20), 1) AS volume_ratio,
-- 日内振幅
(high - low) / NULLIF(open, 1) AS daily_volatility,
-- 60日区间位置
(close - m_min(close, 60)) / NULLIF(m_max(close, 60) - m_min(close, 60), 1) AS position_ratio
FROM cn_stock_prefactors
WHERE
st_status = 0
AND suspended = 0
AND list_days >= 60
AND total_market_cap BETWEEN 50e8 AND 500e8
"""
res = dai.query(sql, filters={"date": [target_date]}, full_db_scan=False)
df = res.df()
df = df.dropna(subset=['open_gap', 'zt_count_60d', 'ret_5d'])
return df
# ============ 2) 本地策略分析类 ============
class EnhancedBreakoutStrategy:
def __init__(self, df: pd.DataFrame):
self.df = df.copy()
if 'rank_score' not in self.df.columns:
self.df['r_open_gap'] = self.df['open_gap'].rank(pct=True)
self.df['r_zt'] = self.df['zt_count_60d'].rank(pct=True)
self.df['r_ret5'] = self.df['ret_5d'].rank(pct=True)
self.df['rank_score'] = 0.5 * self.df['r_open_gap'] + 0.3 * self.df['r_zt'] + 0.2 * self.df['r_ret5']
required = ['open_gap','zt_count_60d','ret_5d','rank_score','volume_ratio','position_ratio','total_market_cap']
for c in required:
if c not in self.df.columns:
self.df[c] = np.nan
def data_quality_check(self):
print("=" * 50)
print("📊 数据质量诊断报告")
print("=" * 50)
checks = {
'数据总量': len(self.df),
'缺失值统计': self.df.isnull().sum().to_dict(),
'数值范围检查': {
'open_gap': (self.df['open_gap'].min(), self.df['open_gap'].max()),
'zt_count_60d': (self.df['zt_count_60d'].min(), self.df['zt_count_60d'].max()),
'ret_5d': (self.df['ret_5d'].min(), self.df['ret_5d'].max()),
'rank_score': (self.df['rank_score'].min(), self.df['rank_score'].max())
}
}
for k,v in checks.items():
print(f"{k}: {v}")
return checks
def strategy_diagnostic(self):
fig, axes = plt.subplots(2,3,figsize=(16,10))
axes[0,0].hist(self.df['ret_5d']*100, bins=20, color='skyblue', edgecolor='black')
axes[0,0].axvline(self.df['ret_5d'].mean()*100, color='red', linestyle='--')
axes[0,0].set_title('5日收益率分布')
axes[0,1].scatter(self.df['zt_count_60d'], self.df['open_gap']*100, c=self.df['rank_score'], cmap='viridis', s=40)
axes[0,1].set_title('涨停次数 vs 开盘跳空')
if 'total_market_cap' in self.df.columns:
market_cap_cut = pd.cut(self.df['total_market_cap']/1e8, bins=[0,50,100,200,500,1000,float('inf')])
market_cap_dist = market_cap_cut.value_counts()
axes[0,2].pie(market_cap_dist.values, labels=market_cap_dist.index, autopct='%1.1f%%')
axes[0,2].set_title('市值分布 (亿元)')
axes[1,0].hist(self.df['rank_score'], bins=15, color='lightgreen', edgecolor='black')
axes[1,0].set_title('策略评分分布')
zt_dist = self.df['zt_count_60d'].value_counts().sort_index()
axes[1,1].bar(zt_dist.index, zt_dist.values, color='orange')
axes[1,1].set_title('60日涨停次数分布')
numeric_cols = ['open_gap','zt_count_60d','ret_5d','rank_score']
if 'total_market_cap' in self.df.columns:
numeric_cols.append('total_market_cap')
sns.heatmap(self.df[numeric_cols].corr().abs().loc[lambda x: x > 0.3], annot=True, cmap='coolwarm', ax=axes[1,2])
axes[1,2].set_title('因子相关性 (相关性 > 0.3)')
plt.tight_layout()
plt.show()
def enhanced_scoring(self):
market_vol = self.df['ret_5d'].std()
market_trend = self.df['ret_5d'].mean()
if market_vol > 0.05:
weights = {'gap':0.3,'zt':0.4,'momentum':0.3}
elif market_trend > 0.02:
weights = {'gap':0.3,'zt':0.3,'momentum':0.4}
else:
weights = {'gap':0.4,'zt':0.35,'momentum':0.25}
self.df['enhanced_score'] = (
weights['gap'] * self.df['r_open_gap'] +
weights['zt'] * self.df['r_zt'] +
weights['momentum'] * self.df['r_ret5']
)
print("动态权重:", weights)
return self.df
def risk_analysis(self):
risks = []
avg_gap = self.df['open_gap'].mean()
if avg_gap > 0.06:
risks.append(f"平均跳空过大: {avg_gap:.2%}")
low_zt = (self.df['zt_count_60d'] <= 1).sum()
if low_zt / len(self.df) > 0.3:
risks.append(f"涨停基因较弱: {low_zt}只")
high_mom = (self.df['ret_5d'] > 0.15).sum()
if high_mom / len(self.df) > 0.4:
risks.append("动量过热")
if risks:
for r in risks:
print("风险:", r)
else:
print("风险检查通过")
return risks
def generate_signals(self, top_n=5):
if 'enhanced_score' not in self.df.columns:
self.enhanced_scoring()
candidates = self.df.sort_values('enhanced_score', ascending=False).head(top_n*3)
candidates = candidates[
(candidates['open_gap'] <= 0.08) &
(candidates['zt_count_60d'] >= 1) &
(candidates['ret_5d'] <= 0.25)
]
final = candidates.head(top_n)
display = final[['instrument','name','open_gap','zt_count_60d','ret_5d','enhanced_score']].copy()
display['open_gap'] = display['open_gap'].map(lambda x: f"{x:.2%}")
display['ret_5d'] = display['ret_5d'].map(lambda x: f"{x:.2%}")
print("最终候选:")
print(display.to_string(index=False))
return final
def performance_metrics(self, signals_df):
if signals_df is None or len(signals_df) == 0:
return {}
metrics = {
'选股数量': len(signals_df),
'平均开盘跳空': f"{signals_df['open_gap'].mean():.2%}",
'平均涨停次数': f"{signals_df['zt_count_60d'].mean():.1f}",
'平均5日收益': f"{signals_df['ret_5d'].mean():.2%}",
'平均enhanced_score': f"{signals_df['enhanced_score'].mean():.3f}"
}
for k,v in metrics.items():
print(f"{k}: {v}")
return metrics
# ============ 3) 一键执行流程 ============
def run_complete_analysis_with_dai(target_date: str):
df = fetch_factor_df(target_date)
print(f"拉取数据行数: {len(df)}")
if df.empty:
raise ValueError("未获取到数据,请检查日期 / 表名 / filters 是否正确")
strat = EnhancedBreakoutStrategy(df)
strat.data_quality_check()
strat.strategy_diagnostic()
strat.enhanced_scoring()
strat.risk_analysis()
signals = strat.generate_signals(top_n=5)
metrics = strat.performance_metrics(signals)
return {'df': df, 'strategy': strat, 'signals': signals, 'metrics': metrics}
# ============ 4) 实时监控类 ============
class RealTimeMonitor:
def __init__(self):
self.history = []
def daily_screening(self, date_str=None):
if date_str is None:
date_str = datetime.now().strftime("%Y-%m-%d")
print("Start daily screening:", date_str)
try:
results = run_complete_analysis_with_dai(date_str)
self.history.append({'date': date_str, 'signals': len(results['signals']), 'metrics': results['metrics']})
# 保存历史记录到 CSV
pd.DataFrame(self.history).to_csv('monitor_history.csv', index=False)
return results
except Exception as e:
print("选股失败:", e)
return None
# ============ 5) 运行示例 ============
if __name__ == "__main__":
date = "2025-10-31"
monitor = RealTimeMonitor()
results = monitor.daily_screening(date)