- 量化策略建模:
1.建立训练集:
X:t时刻之前的特征因子(价格、价格衍生特征、文本特征等)
Y:t时刻之前对应的标签(价格、买卖交易)
2.建立测试集
t时刻之后的特征/因子
3.常用的feature
Time Lags:将滞后期(时间窗口)前的数据样本的指标作为特征
- direction_pred_main.py
创建滞后序列
from __future__ import print_function
import datetime
import numpy as np
import pandas as pd
import tushare as ts
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
def create_lagged_series(symbol, start_date_str, end_date_str, lags=5):
"""
根据start_data, end_date创建symbol的收盘价的滞后序列
因为当期的数据会受前期数据的影响
默认滞后期为5天
"""
date_str_fmt = '%Y-%m-%d'
start_date = datetime.datetime.strptime(start_date_str, date_str_fmt)
one_yr_before_start = start_date - datetime.timedelta(days=365)
one_yr_before_start_str = one_yr_before_start.strftime(date_str_fmt)
hist_data = ts.get_k_data(symbol, one_yr_before_start_str, end_date_str)
hist_data['date'] = pd.to_datetime(hist_data['date'])
hist_data.set_index('date', inplace=True)
hist_lag = pd.DataFrame(index=hist_data.index)
hist_lag['today'] = hist_data['close']
hist_lag['volume'] = hist_data['volume']
for i in range(0, lags):
hist_lag['lag{}'