import pandas as pd
import numpy as np
import os
import tushare as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# 设置 Tushare Token 并初始化 API
ts.set_token('7b77182b414a6b5a5893aa670f659492f14cecce0d2b7cb80dd160de') # 替换为你的 Tushare Token
pro = ts.pro_api()
# 下载数据并保存到本地文件
file_path = 'pingan_stock_data.csv'
if not os.path.exists(file_path):
data = pro.daily(ts_code='000001.SZ', start_date='20220101', end_date='20241130')
data.to_csv(file_path, index=False)
print(f"文件已下载并保存为 {file_path}。")
# 读取数据
data = pd.read_csv(file_path)
data.rename(columns={'close': 'Close', 'open': 'Open', 'high': 'High', 'low': 'Low', 'vol': 'Volume'}, inplace=True) # 标准化列名[^1]
# 计算技术指标作为特征
data['MA_5'] = data['Close'].rolling(window=5).mean()
data['MA_10'] = data['Close'].rolling(window=10).mean()
data['RSI'] = data['Close'].rolling(window=14).apply(lambda x: RSI(x)[-1], raw=False) # 自定义 RSI 函数[^2]
# 添加价格变化和波动率
data['Price_Change'] = data['Close'].pct_change()
data['Volatility'] = data['Close'].rolling(window=10).std()
# 删除缺失值
data.dropna(inplace=True)
# 筛选特征
features = ['MA_5', 'MA_10', 'RSI', 'Price_Change', 'Volatility']
X = data[features].values
y = data['Close'].values
# 归一化处理
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
# 训练 XGBoost 模型
xgb_model = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train.ravel())
# 预测与反归一化
xgb_predictions_scaled = xgb_model.predict(X_test)
xgb_predictions = scaler_y.inverse_transform(xgb_predictions_scaled.reshape(-1, 1)).flatten()
# 评估性能
xgb_mse = mean_squared_error(y_test_actual, xgb_predictions)
xgb_r2 = r2_score(y_test_actual, xgb_predictions)
print(f"XGBoost MSE: {xgb_mse:.4f}, XGBoost R²: {xgb_r2:.4f}")
# 准备 LSTM 输入数据
def create_dataset(X, y, time_steps=1):
Xs, ys = [], []
for i in range(len(X) - time_steps):
v = X[i:(i + time_steps)]
Xs.append(v)
ys.append(y[i + time_steps])
return np.array(Xs), np.array(ys)
time_steps = 10
X_lstm, y_lstm = create_dataset(X_scaled, y_scaled, time_steps)
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)
# 构建 LSTM 模型
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
# 训练模型
model.fit(X_train_lstm, y_train_lstm, batch_size=64, epochs=10)
# 预测与反归一化
lstm_predictions_scaled = model.predict(X_test_lstm)
lstm_predictions = scaler_y.inverse_transform(lstm_predictions_scaled)
# 评估性能
lstm_mse = mean_squared_error(y_test_actual, lstm_predictions)
lstm_r2 = r2_score(y_test_actual, lstm_predictions)
print(f"LSTM MSE: {lstm_mse:.4f}, LSTM R²: {lstm_r2:.4f}")
# 绘制预测结果
plt.figure(figsize=(10, 5))
plt.plot(y_test_actual, label='Actual Price')
plt.plot(xgb_predictions, label='XGBoost Predictions')
plt.plot(lstm_predictions, label='LSTM Predictions')
plt.title('Model Performance Comparison')
plt.xlabel('Time Step')
plt.ylabel('Closing Price')
plt.legend()
plt.savefig('model_performance.png')
plt.close() -
ImportError Traceback (most recent call last)
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\_core\__init__.py:23
22 try:
---> 23 from . import multiarray
24 except ImportError as exc:
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\_core\multiarray.py:10
9 import functools
---> 10 from . import overrides
11 from . import _multiarray_umath
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\_core\overrides.py:8
7 from .._utils._inspect import getargspec
----> 8 from numpy._core._multiarray_umath import (
9 add_docstring, _get_implementing_args, _ArrayFunctionDispatcher)
12 ARRAY_FUNCTIONS = set()
ImportError: DLL load failed while importing _multiarray_umath: 找不到指定的模块。
During handling of the above exception, another exception occurred:
ImportError Traceback (most recent call last)
Cell In[4], line 5
3 import os
4 import tushare as ts
----> 5 from sklearn.model_selection import train_test_split
6 from sklearn.preprocessing import MinMaxScaler
7 from xgboost import XGBRegressor
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\__init__.py:83
69 # We are not importing the rest of scikit-learn during the build
70 # process, as it may not be compiled yet
71 else:
(...)
77 # later is linked to the OpenMP runtime to make it possible to introspect
78 # it and importing it first would fail if the OpenMP dll cannot be found.
79 from . import (
80 __check_build, # noqa: F401
81 _distributor_init, # noqa: F401
82 )
---> 83 from .base import clone
84 from .utils._show_versions import show_versions
86 __all__ = [
87 "calibration",
88 "cluster",
(...)
129 "show_versions",
130 ]
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\base.py:19
17 from ._config import config_context, get_config
18 from .exceptions import InconsistentVersionWarning
---> 19 from .utils import _IS_32BIT
20 from .utils._estimator_html_repr import estimator_html_repr
21 from .utils._metadata_requests import _MetadataRequester
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\__init__.py:22
20 from ._bunch import Bunch
21 from ._estimator_html_repr import estimator_html_repr
---> 22 from ._param_validation import Interval, validate_params
23 from .class_weight import compute_class_weight, compute_sample_weight
24 from .deprecation import deprecated
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:15
12 from scipy.sparse import csr_matrix, issparse
14 from .._config import config_context, get_config
---> 15 from .validation import _is_arraylike_not_scalar
18 class InvalidParameterError(ValueError, TypeError):
19 """Custom exception to be raised when the parameter of a class/method/function
20 does not have a valid type or value.
21 """
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\validation.py:28
26 from .. import get_config as _get_config
27 from ..exceptions import DataConversionWarning, NotFittedError, PositiveSpectrumWarning
---> 28 from ..utils._array_api import _asarray_with_order, _is_numpy_namespace, get_namespace
29 from ._isfinite import FiniteStatus, cy_isfinite
30 from .fixes import _object_dtype_isnan
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\_array_api.py:9
6 import scipy.special as special
8 from .._config import get_config
----> 9 from .fixes import parse_version
12 def _check_array_api_dispatch(array_api_dispatch):
13 """Check that array_api_compat is installed and NumPy version is compatible.
14
15 array_api_compat follows NEP29, which has a higher minimum NumPy version than
16 scikit-learn.
17 """
File C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\fixes.py:18
16 import numpy as np
17 import scipy
---> 18 import scipy.stats
19 import threadpoolctl
21 import sklearn
File C:\ProgramData\anaconda3\Lib\site-packages\scipy\stats\__init__.py:608
1 """
2 .. _statsrefmanual:
3
(...)
603
604 """
606 from ._warnings_errors import (ConstantInputWarning, NearConstantInputWarning,
607 DegenerateDataWarning, FitError)
--> 608 from ._stats_py import *
609 from ._variation import variation
610 from .distributions import *
File C:\ProgramData\anaconda3\Lib\site-packages\scipy\stats\_stats_py.py:37
35 from numpy import array, asarray, ma
36 from numpy.lib import NumpyVersion
---> 37 from numpy.testing import suppress_warnings
39 from scipy.spatial.distance import cdist
40 from scipy.ndimage import _measurements
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\testing\__init__.py:11
8 from unittest import TestCase
10 from . import _private
---> 11 from ._private.utils import *
12 from ._private.utils import (_assert_valid_refcount, _gen_alignment_data)
13 from ._private import extbuild
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\testing\_private\utils.py:23
20 import concurrent.futures
22 import numpy as np
---> 23 from numpy._core import (
24 intp, float32, empty, arange, array_repr, ndarray, isnat, array)
25 from numpy import isfinite, isnan, isinf
26 import numpy.linalg._umath_linalg
File C:\ProgramData\anaconda3\Lib\site-packages\numpy\_core\__init__.py:49
25 import sys
26 msg = """
27
28 IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE!
(...)
47 """ % (sys.version_info[0], sys.version_info[1], sys.executable,
48 __version__, exc)
---> 49 raise ImportError(msg)
50 finally:
51 for envkey in env_added:
ImportError:
IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE!
Importing the numpy C-extensions failed. This error can happen for
many reasons, often due to issues with your setup or how NumPy was
installed.
We have compiled some common reasons and troubleshooting tips at:
https://numpy.org/devdocs/user/troubleshooting-importerror.html
Please note and check the following:
* The Python version is: Python3.11 from "C:\ProgramData\anaconda3\python.exe"
* The NumPy version is: "1.24.3"
and make sure that they are the versions you expect.
Please carefully study the documentation linked above for further help.
Original error was: DLL load failed while importing _multiarray_umath: 找不到指定的模块。