这是一个基于机器学习的股票量价预测与交易系统。代码通过baostock获取股票历史数据,使用随机森林算法构建预测模型。系统首先进行数据预处理和特征工程,生成技术指标如移动平均线、RSI、波动率等;然后训练两个随机森林模型分别预测次日收盘价和成交量;最后基于预测结果生成交易信号:预期涨幅超4%且放量时买入,预期跌幅超3%时卖出,否则持有。系统输出预测精度评估和交易建议,为投资决策提供数据支持。

**数据获取:**使用baostock库获取股票(sh.600172)从2025年1月1日至2025年10月25日的日K线数据,包括开盘价、最高价、最低价、收盘价、成交量、成交额,并设置了复权方式(adjustflag="3"表示后复权)。
**数据预处理:**将获取的数据转换为浮点数类型,并将日期转换为datetime格式,按日期排序。
**特征工程:**构建多个技术指标特征,包括:
- 价格变化(当日收盘价-开盘价)
- 高低价比率(最高价/最低价)
- 成交量变化率(当日成交量相对于前一日的变化百分比)
- 价格动量(3日收盘价变化率)
- 移动平均线(5日、10日、20日)
- 波动率(10日收盘价标准差)
- RSI相对强弱指标(14日)
- 成交量相关特征(5日成交量均线、成交量比率)
- 滞后特征(收盘价和成交量的1、2、3、5日滞后值)
**目标变量定义:**将第二日的收盘价和成交量作为目标变量(即下一日的量价)。
**模型训练:**使用随机森林回归模型,分别对收盘价和成交量进行预测。将数据分为训练集和测试集(测试集占20%),并对特征进行标准化处理。
**模型评估:**计算测试集上的平均绝对误差(MAE)和均方根误差(RMSE)来评估模型性能。
**交易策略:**基于预测结果,计算预期收益率(预测的次日收盘价相对于当日收盘价的变化率)。当预期收益率大于4%且预测成交量大于当日成交量时,发出买入信号;当预期收益率小于-3%时,发出卖出信号;否则为持有。
**结果输出:**将预测结果和交易信号保存到CSV文件中,并打印最近10个交易日的预测信号和信号分布。
import baostock as bs
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
# 登录系统
lg = bs.login()
# 获取沪深300指数(sh.000300)的历史K线数据
rs = bs.query_history_k_data_plus("sh.600172",
"date,open,high,low,close,volume,amount",
start_date='2025-1-01', end_date='2025-10-25', frequency="d",adjustflag="3")
# 转换数据为DataFrame
data_list = []
while (rs.error_code == '0') & rs.next():
data_list.append(rs.get_row_data())
df = pd.DataFrame(data_list, columns=rs.fields)
# 登出系统
bs.logout()
# 数据预处理
def preprocess_data(df):
# 转换数据类型
df['open'] = df['open'].astype(float)
df['high'] = df['high'].astype(float)
df['low'] = df['low'].astype(float)
df['close'] = df['close'].astype(float)
df['volume'] = df['volume'].astype(float)
df['amount'] = df['amount'].astype(float)
# 按日期排序
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)
return df
# 特征工程
def create_features(df):
# 技术指标特征
df['price_change'] = df['close'] - df['open']
df['high_low_ratio'] = df['high'] / df['low']
df['volume_change'] = df['volume'].pct_change()
df['price_momentum'] = df['close'].pct_change(periods=3)
# 移动平均线
df['MA5'] = df['close'].rolling(window=5).mean()
df['MA10'] = df['close'].rolling(window=10).mean()
df['MA20'] = df['close'].rolling(window=20).mean()
# 波动率
df['volatility'] = df['close'].rolling(window=10).std()
# 相对强弱指标 (简化版)
delta = df['close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))
# 成交量相关特征
df['volume_MA5'] = df['volume'].rolling(window=5).mean()
df['volume_ratio'] = df['volume'] / df['volume_MA5']
# 滞后特征
for lag in [1, 2, 3, 5]:
df[f'close_lag_{lag}'] = df['close'].shift(lag)
df[f'volume_lag_{lag}'] = df['volume'].shift(lag)
return df
# 准备训练数据
def prepare_training_data(df):
# 创建特征
df = create_features(df)
# 定义目标变量:第二天的收盘价和成交量
df['next_close'] = df['close'].shift(-1)
df['next_volume'] = df['volume'].shift(-1)
# 删除包含NaN的行
df = df.dropna()
# 特征列 (排除日期和目标列)
feature_columns = [col for col in df.columns if col not in ['date', 'next_close', 'next_volume']]
return df, feature_columns
# 训练预测模型
def train_models(df, feature_columns):
# 准备特征和目标
X = df[feature_columns]
y_close = df['next_close']
y_volume = df['next_volume']
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 划分训练集和测试集
X_train, X_test, y_close_train, y_close_test, y_volume_train, y_volume_test = train_test_split(
X_scaled, y_close, y_volume, test_size=0.2, random_state=42, shuffle=False)
# 训练收盘价预测模型
close_model = RandomForestRegressor(n_estimators=100, random_state=42)
close_model.fit(X_train, y_close_train)
# 训练成交量预测模型
volume_model = RandomForestRegressor(n_estimators=100, random_state=42)
volume_model.fit(X_train, y_volume_train)
# 预测
close_pred = close_model.predict(X_test)
volume_pred = volume_model.predict(X_test)
# 获取测试集的原始索引
test_indices = y_close_test.index
return (close_model, volume_model, scaler,
X_test, y_close_test, y_volume_test, close_pred, volume_pred, test_indices)
# 评估模型
def evaluate_models(y_close_test, y_volume_test, close_pred, volume_pred):
print("收盘价预测效果:")
print(f"MAE: {mean_absolute_error(y_close_test, close_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_close_test, close_pred)):.4f}")
print("\n成交量预测效果:")
print(f"MAE: {mean_absolute_error(y_volume_test, volume_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_volume_test, volume_pred)):.4f}")
# 交易策略
def trading_strategy(df, close_pred, volume_pred, test_indices):
"""
基于预测结果的简单交易策略
"""
# 获取测试集的日期
test_dates = df.loc[test_indices, 'date'].values
signals = []
for i, idx in enumerate(test_indices):
current_close_pred = close_pred[i]
current_volume_pred = volume_pred[i]
# 获取前一天的实际收盘价
if idx > 0:
prev_actual_close = df.loc[idx-1, 'close']
prev_volume = df.loc[idx-1, 'volume']
else:
# 如果是第一行数据,使用当天的开盘价作为参考
prev_actual_close = df.loc[idx, 'open']
prev_volume = df.loc[idx, 'volume']
# 简单策略:预测价格上涨且成交量放大时买入
price_change_pct = (current_close_pred - prev_actual_close) / prev_actual_close
if price_change_pct > 0.04 and current_volume_pred > prev_volume:
signal = "BUY"
elif price_change_pct < -0.03:
signal = "SELL"
else:
signal = "HOLD"
signals.append({
'date': test_dates[i],
'predicted_close': current_close_pred,
'predicted_volume': current_volume_pred,
'signal': signal,
'expected_return': price_change_pct
})
return pd.DataFrame(signals)
# 主程序
if __name__ == "__main__":
print("原始数据预览:")
print(df.head())
df.to_csv('stock_data.csv', index=False)
# 数据预处理
df_processed = preprocess_data(df)
# 准备训练数据
df_final, feature_columns = prepare_training_data(df_processed)
print(f"\n特征数量: {len(feature_columns)}")
print(f"有效样本数: {len(df_final)}")
print(f"特征列: {feature_columns}")
# 训练模型
(close_model, volume_model, scaler,
X_test, y_close_test, y_volume_test, close_pred, volume_pred, test_indices) = train_models(df_final, feature_columns)
# 评估模型
evaluate_models(y_close_test, y_volume_test, close_pred, volume_pred)
# 生成交易信号
signals_df = trading_strategy(df_final, close_pred, volume_pred, test_indices)
print("\n最近10个交易日的预测信号:")
print(signals_df.tail(10))
# 统计信号分布
signal_counts = signals_df['signal'].value_counts()
print(f"\n交易信号分布:")
print(signal_counts)
# 保存预测结果
output_df = pd.DataFrame({
'date': signals_df['date'],
'actual_close': y_close_test.values,
'predicted_close': close_pred,
'actual_volume': y_volume_test.values,
'predicted_volume': volume_pred,
'trading_signal': signals_df['signal']
})
output_df.to_csv('stock_prediction_results.csv', index=False)
print("\n预测结果已保存到 stock_prediction_results.csv")
这个模型可以作为一个基础框架,您可以根据实际需求进一步优化特征工程、尝试不同的机器学习算法,或者调整交易策略参数。
1万+

被折叠的 条评论
为什么被折叠?



