以下是一个基于QuantConnect的强化学习多因子选股策略示例,使用技术指标作为状态特征。这里采用简化版的DQN(Deep Q-Network)思路,由于平台限制可能需要简化实现:
from AlgorithmImports import *
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque
import random
class DQNAgent:
"""简化版DQN智能体"""
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # 折扣因子
self.epsilon = 1.0 # 探索率
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
"""构建神经网络模型"""
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
"""选择动作"""
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
"""经验回放"""
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
class ReinforcementLearningAlgorithm(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2020, 1, 1)
self.SetEndDate(2023, 1, 1)
self.SetCash(100000)
# 股票池(沪深300成分股示例)
self.symbols = [self.AddEquity(ticker).Symbol for ticker in self.GetIndexMembers("000300.SH")]
# 初始化参数
self.state_size = 5 # 技术指标数量
self.action_size = 3 # 0:卖出,1:持有,2:买入
self.batch_size = 32
self.episodes = 10
# 初始化DQN智能体
self.agent = DQNAgent(self.state_size, self.action_size)
# 设置历史数据请求
self.lookback = 30 # 回看周期
self.indicators = {}
for symbol in self.symbols:
history = self.History(symbol, self.lookback*2, Resolution.Daily)
if history.empty: continue
# 计算技术指标
self.indicators[symbol] = {
'rsi': RSI(symbol, 14, Resolution.Daily),
'ema_fast': EMA(symbol, 12, Resolution.Daily),
'ema_slow': EMA(symbol, 26, Resolution.Daily),
'macd': MACD(symbol, 12, 26, 9, Resolution.Daily),
'bb': BollingerBands(symbol, 20, 2, Resolution.Daily)
}
self.previous_action = {symbol:1 for symbol in self.symbols} # 初始动作为持有
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.AfterMarketOpen("SHFE", 0),
self.Trade)
def GetState(self, symbol):
"""获取当前状态(技术指标)"""
if symbol not in self.indicators: return None
# 获取最新指标值
try:
rsi = self.indicators[symbol]['rsi'].Current.Value
ema_fast = self.indicators[symbol]['ema_fast'].Current.Value
ema_slow = self.indicators[symbol]['ema_slow'].Current.Value
macd = self.indicators[symbol]['macd'].Current.Value
bb_upper = self.indicators[symbol]['bb'].UpperBand.Current.Value
bb_lower = self.indicators[symbol]['bb'].LowerBand.Current.Value
price = self.Securities[symbol].Close
# 构建状态向量
state = [
(price - bb_lower) / (bb_upper - bb_lower) if (bb_upper - bb_lower) !=0 else 0, # 布林带位置
rsi / 100, # RSI标准化
(ema_fast - ema_slow) / price, # EMA差异
macd / price, # MACD值
self.Portfolio[symbol].UnrealizedProfitPercent # 持仓盈亏
]
return np.reshape(state, [1, self.state_size])
except:
return None
def GetReward(self, symbol):
"""计算即时奖励"""
position = self.Portfolio[symbol]
if not position.Invested:
return 0
# 基于持仓收益和风险调整的奖励
return position.UnrealizedProfitPercent - 0.5 * position.PriceModel.GetMaximumOrderQuantityForTargetBuyingPower(
self.Portfolio.TotalPortfolioValue, 0.1).Quantity * 0.01
def Trade(self):
"""每日交易逻辑"""
for symbol in self.symbols:
if not self.Securities[symbol].HasData: continue
# 获取当前状态
state = self.GetState(symbol)
if state is None: continue
# 执行动作
action = self.agent.act(state)
# 获取持仓信息
position = self.Portfolio[symbol]
# 执行交易逻辑
if action == 2 and not position.Invested: # 买入
self.SetHoldings(symbol, 0.1) # 10%仓位
elif action == 0 and position.Invested: # 卖出
self.Liquidate(symbol)
# 获取下一状态并计算奖励
next_state = self.GetState(symbol)
reward = self.GetReward(symbol)
done = False # 可根据实际情况设置结束条件
# 保存经验
self.agent.remember(state, action, reward, next_state, done)
# 更新前一个动作
self.previous_action[symbol] = action
# 经验回放
if len(self.agent.memory) > self.batch_size:
self.agent.replay(self.batch_size)
def OnEndOfDay(self):
"""每日结束时记录数据"""
for symbol in self.symbols:
self.Plot("Indicators", "RSI", self.indicators[symbol]['rsi'].Current.Value)
self.Plot("Indicators", "MACD", self.indicators[symbol]['macd'].Current.Value)
这个策略包含以下关键要素:
- 技术指标计算:
- RSI(相对强弱指数)
- 双EMA(指数移动平均线)
- MACD(移动平均收敛差异)
- 布林带(Bollinger Bands)
- 强化学习组件:
- 使用Keras构建的简单神经网络
- 经验回放机制
- ε-greedy探索策略
- 状态空间包含5个标准化后的技术指标
- 交易逻辑:
- 每日根据模型预测执行交易
- 每次交易控制10%仓位
- 包含基本的风险管理
- 奖励机制:
- 基于持仓收益和风险调整的奖励函数
- 考虑未实现盈亏和潜在风险
改进建议:
- 增加更多技术指标:
# 在Initialize方法中添加更多指标
self.indicators[symbol].update({
'adx': ADX(symbol, 14),
'stoch': Stochastic(symbol, 14),
'obv': OnBalanceVolume(symbol)
})
- 完善状态标准化:
# 使用滚动窗口标准化
def normalize(data, window=30):
return (data - pd.Series(data).rolling(window).mean()) / pd.Series(data).rolling(window).std()
- 改进奖励函数:
def GetReward(self, symbol):
# 包含夏普比率和最大回撤的奖励
returns = self.Portfolio[symbol].UnrealizedProfitPercent
volatility = np.std(self.Portfolio[symbol].DailyReturns)
sharpe = returns / volatility if volatility !=0 else 0
return sharpe - 0.3 * self.Portfolio.MaxDrawdownPercent
- 增加风险管理:
# 在交易前加入风险检查
max_risk = 0.02 # 单笔交易最大风险2%
position_size = self.Portfolio.TotalPortfolioValue * max_risk / self.Securities[symbol].Close
self.SetHoldings(symbol, position_size)
- 使用更先进的强化学习算法:
- 可以考虑实现PPO、A2C等更稳定的算法
- 添加LSTM层处理时序特征
注意:实际部署前需要:
- 在QuantConnect环境测试兼容性
- 调整超参数
- 添加更严格的风险控制
- 优化数据预处理流程
- 进行充分的回测验证
建议先在研究环境开发模型,再在QuantConnect部署推理逻辑,因为平台对在线训练的计算资源有限制。