import os.path
import tushare as ts
from functools import wraps
import _pickle as cPickle
import pandas as pd
import time
ts.set_token('token')
pro = ts.pro_api()
df = pro.trade_cal(exchange='', start_date='20180901', end_date='20181001',
fields='exchange,cal_date,is_open,pretrade_date', is_open='0')
# print(df)
# fac_list = ['Price1Y', 'total_profit_to_cost_ratio', 'VOL120']
fac_list = ['Price1Y', ]
start = '20130101'
end = '20181231'
# 日期列表
df = pro.trade_cal(exchange='', start_date=start, end_date=end,
fields='exchange,cal_date,is_open,pretrade_date', is_open='1')
# date_list = df['cal_date'].to_list()[::-1]
# print(df)
def cache_df(fname, do=True):
def __inner(func):
@wraps(func)
def wrap(*args, **kwargs):
if os.path.isfile(fname):
# load data from file
print("from file")
data = cPickle.load(open(fname, "rb"))
else:
# get data from func
print("from func")
data = func(*args, **kwargs)
if fname and do and not os.path.isfile(fname):
# save data
print("to file")
cPickle.dump(data, open(fname, 'wb'))
return data
return wrap
return __inner
# Price1Y 当前股价除以过去一年股价均值再减1 当日收盘价 / mean(过去一年(250天)的收盘价) -1
# total_profit_to_cost_ratio 成本费用利润率 成本费用利润率=利润总额/(营业成本+财务费用+销售费用+管理费用),以上科目使用的都是TTM的数值
# VOL120 120日平均换手率 120日换手率的均值,单位为%
def get_5d_change_pct(subdf):
# return []
subdf = subdf.sort_values("trade_date")
subdf['Price1Y'] = (subdf['close'] / (subdf['close'].rolling(250).mean())) - 1
# subdf['VOL120'] = subdf['turn_over'].rolling(120).mean()
subdf['5d_close'] = subdf['close'].shift(-4)
subdf['5d_change_pct'] = ((subdf['5d_close'] - subdf['open']) / subdf['open']).shift(-1)
# print(subdf.loc[:, ['trade_date', 'open', 'close', '5d_close', '5d_change_pct']])
# print(subdf.iloc[:10])
# subdf['5d_change_pct'] = (subdf['close'].pct_change(periods=5)).shift(-1)
# print(subdf.iloc[4:-1])
return subdf.iloc[249:-5]
@cache_df('./20130101-20181231-close.pkl')
def get_his_close(df):
all_daily_k = []
for date in df['cal_date'].values:
print(date)
daily_k = pro.daily(trade_date=date)
# print(daily_k.head())
all_daily_k.append(daily_k)
time.sleep(0.3)
# break
return pd.concat(all_daily_k).sort_index()
# @cache_df('./20130101-20131231-close.pkl')
def get_his_close_all(df):
all_daily_k = []
for date in df['cal_date'].values:
print(date)
file_path = rf"./data/{date}.pkl"
if os.path.isfile(file_path):
daily_k = cPickle.load(open(file_path, "rb"))
else:
# daily_k = pro.daily(trade_date=date)
daily_k = pro.bak_daily(trade_date=date,
fields='ts_code,trade_date,name,pct_change,close,change,open,high,low,pre_close,vol_ratio,turn_over,swing,vol,amount,selling,buying,total_share,float_share,pe,industry,area,float_mv,total_mv,avg_price,strength,activity,avg_turnover,attack,interval_3,interval_6')
cPickle.dump(daily_k, open(file_path, "wb"))
time.sleep(15)
# print(daily_k.head())
all_daily_k.append(daily_k)
# break
return pd.concat(all_daily_k).sort_index()
df = get_his_close(df)
print(df.columns)
xx = df.groupby('ts_code').apply(get_5d_change_pct)
print(xx)
xx.reset_index(inplace=True, drop=True)
print(xx.shape)
# xx = xx.loc[xx['5d_change_pct']>0]
# print(xx.shape)
# print(xx.columns)
xx.set_index('trade_date', inplace=True)
xx.fillna(0, inplace=True)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score # , median_absolute_error
# 2.2.1 求解
lr = LinearRegression(normalize=True) # 记得进行标准化(在回归之前,对X减去平均值再除以二范数),以免不同因子的量纲不同而“自带权重”
## 训练(拟合)
lr.fit(xx.loc[:, fac_list], xx.loc[:, '5d_change_pct']) # 第一个参数传入一系列“当日” factor_i 的值,第二个参数是“当日” price 的值
print("LinearRegression 线性回归的回归常数(w0):", lr.intercept_)
print("LinearRegression 线性回归的回归系数(wi)(即各因子的权重):", lr.coef_)
tushare
最新推荐文章于 2024-09-12 13:24:01 发布