#提前缩小数据量
df_t = df1.loc[(df1['rownum_W'] <= (sst+25))]
t = df_t.groupby(['rownum_W'],as_index=False).ts_code.count()
row_liu = t['rownum_W'].loc[t.ts_code>12000]
df1 = pd.DataFrame()
for row in row_liu:
df_for = df_t[df_t.rownum_W==row]
df1 = pd.concat([df1,df_for],axis=0)
#防止集中某一天涨的多,抽取后再打乱数据
df_1 = pd.DataFrame()
df_0 = pd.DataFrame()
for dt in df1.date.unique():
qx_1 = df1_1[df1_1.date==dt]
if len(qx_1)<1250:
qx1 = qx_1[qx_1.date == dt]
else:
qx1 = qx_1[qx_1.date == dt].sample(n=1250)
df_1 = pd.concat([df_1,qx1],axis=0)
for dt in df1.date.unique():
qx_0 = df1_0[df1_0.date==dt]
if len(qx_0)<3000:
qx0 = qx_0[qx_0.date == dt]
else:
qx0 = qx_0[qx_0.date == dt].sample(n=3000)
df_0 = pd.concat([df_0,qx0],axis=0)
df1 = pd.concat([df_1,df_0],axis=0)
from sklearn.utils import shuffle
df1 = shuffle(df1)