基于QuantConnect的强化学习多因子选股策略示例，使用技术指标作为状态特征

最新推荐文章于 2025-05-30 13:24:15 发布

gregmankiw

最新推荐文章于 2025-05-30 13:24:15 发布

阅读量345

点赞数 7

CC 4.0 BY-SA版权

文章标签：深度学习人工智能

本文链接：https://blog.youkuaiyun.com/gregmankiw/article/details/145480698

以下是一个基于QuantConnect的强化学习多因子选股策略示例，使用技术指标作为状态特征。这里采用简化版的DQN（Deep Q-Network）思路，由于平台限制可能需要简化实现：

from AlgorithmImports import *
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque
import random

class DQNAgent:
    """简化版DQN智能体"""
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # 折扣因子
        self.epsilon = 1.0   # 探索率
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        """构建神经网络模型"""
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        """选择动作"""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        """经验回放"""
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class ReinforcementLearningAlgorithm(QCAlgorithm):
    def Initialize(self):
        self.SetStartDate(2020, 1, 1)
        self.SetEndDate(2023, 1, 1)
        self.SetCash(100000)
        
        # 股票池（沪深300成分股示例）
        self.symbols = [self.AddEquity(ticker).Symbol for ticker in self.GetIndexMembers("000300.SH")]
        
        # 初始化参数
        self.state_size = 5  # 技术指标数量
        self.action_size = 3  # 0:卖出，1:持有，2:买入
        self.batch_size = 32
        self.episodes = 10
        
        # 初始化DQN智能体
        self.agent = DQNAgent(self.state_size, self.action_size)
        
        # 设置历史数据请求
        self.lookback = 30  # 回看周期
        self.indicators = {}
        for symbol in self.symbols:
            history = self.History(symbol, self.lookback*2, Resolution.Daily)
            if history.empty: continue
            
            # 计算技术指标
            self.indicators[symbol] = {
                'rsi': RSI(symbol, 14, Resolution.Daily),
                'ema_fast': EMA(symbol, 12, Resolution.Daily),
                'ema_slow': EMA(symbol, 26, Resolution.Daily),
                'macd': MACD(symbol, 12, 26, 9, Resolution.Daily),
                'bb': BollingerBands(symbol, 20, 2, Resolution.Daily)
            }
        
        self.previous_action = {symbol:1 for symbol in self.symbols}  # 初始动作为持有
        self.Schedule.On(self.DateRules.EveryDay(), 
                       self.TimeRules.AfterMarketOpen("SHFE", 0),
                       self.Trade)

    def GetState(self, symbol):
        """获取当前状态（技术指标）"""
        if symbol not in self.indicators: return None
        
        # 获取最新指标值
        try:
            rsi = self.indicators[symbol]['rsi'].Current.Value
            ema_fast = self.indicators[symbol]['ema_fast'].Current.Value
            ema_slow = self.indicators[symbol]['ema_slow'].Current.Value
            macd = self.indicators[symbol]['macd'].Current.Value
            bb_upper = self.indicators[symbol]['bb'].UpperBand.Current.Value
            bb_lower = self.indicators[symbol]['bb'].LowerBand.Current.Value
            price = self.Securities[symbol].Close
            
            # 构建状态向量
            state = [
                (price - bb_lower) / (bb_upper - bb_lower) if (bb_upper - bb_lower) !=0 else 0,  # 布林带位置
                rsi / 100,  # RSI标准化
                (ema_fast - ema_slow) / price,  # EMA差异
                macd / price,  # MACD值
                self.Portfolio[symbol].UnrealizedProfitPercent  # 持仓盈亏
            ]
            return np.reshape(state, [1, self.state_size])
        except:
            return None

    def GetReward(self, symbol):
        """计算即时奖励"""
        position = self.Portfolio[symbol]
        if not position.Invested:
            return 0
        # 基于持仓收益和风险调整的奖励
        return position.UnrealizedProfitPercent - 0.5 * position.PriceModel.GetMaximumOrderQuantityForTargetBuyingPower(
            self.Portfolio.TotalPortfolioValue, 0.1).Quantity * 0.01

    def Trade(self):
        """每日交易逻辑"""
        for symbol in self.symbols:
            if not self.Securities[symbol].HasData: continue
            
            # 获取当前状态
            state = self.GetState(symbol)
            if state is None: continue
            
            # 执行动作
            action = self.agent.act(state)
            
            # 获取持仓信息
            position = self.Portfolio[symbol]
            
            # 执行交易逻辑
            if action == 2 and not position.Invested:  # 买入
                self.SetHoldings(symbol, 0.1)  # 10%仓位
            elif action == 0 and position.Invested:  # 卖出
                self.Liquidate(symbol)
            
            # 获取下一状态并计算奖励
            next_state = self.GetState(symbol)
            reward = self.GetReward(symbol)
            done = False  # 可根据实际情况设置结束条件
            
            # 保存经验
            self.agent.remember(state, action, reward, next_state, done)
            
            # 更新前一个动作
            self.previous_action[symbol] = action
        
        # 经验回放
        if len(self.agent.memory) > self.batch_size:
            self.agent.replay(self.batch_size)

    def OnEndOfDay(self):
        """每日结束时记录数据"""
        for symbol in self.symbols:
            self.Plot("Indicators", "RSI", self.indicators[symbol]['rsi'].Current.Value)
            self.Plot("Indicators", "MACD", self.indicators[symbol]['macd'].Current.Value)

这个策略包含以下关键要素：

技术指标计算：

RSI（相对强弱指数）
双EMA（指数移动平均线）
MACD（移动平均收敛差异）
布林带（Bollinger Bands）

强化学习组件：

使用Keras构建的简单神经网络
经验回放机制
ε-greedy探索策略
状态空间包含5个标准化后的技术指标

交易逻辑：

每日根据模型预测执行交易
每次交易控制10%仓位
包含基本的风险管理

奖励机制：

基于持仓收益和风险调整的奖励函数
考虑未实现盈亏和潜在风险

改进建议：

增加更多技术指标：

# 在Initialize方法中添加更多指标
self.indicators[symbol].update({
    'adx': ADX(symbol, 14),
    'stoch': Stochastic(symbol, 14),
    'obv': OnBalanceVolume(symbol)
})

完善状态标准化：

# 使用滚动窗口标准化
def normalize(data, window=30):
    return (data - pd.Series(data).rolling(window).mean()) / pd.Series(data).rolling(window).std()

改进奖励函数：

def GetReward(self, symbol):
    # 包含夏普比率和最大回撤的奖励
    returns = self.Portfolio[symbol].UnrealizedProfitPercent
    volatility = np.std(self.Portfolio[symbol].DailyReturns)
    sharpe = returns / volatility if volatility !=0 else 0
    return sharpe - 0.3 * self.Portfolio.MaxDrawdownPercent

增加风险管理：

# 在交易前加入风险检查
max_risk = 0.02  # 单笔交易最大风险2%
position_size = self.Portfolio.TotalPortfolioValue * max_risk / self.Securities[symbol].Close
self.SetHoldings(symbol, position_size)