仅个人笔记使用,感谢点赞关注
模态分解
- 模态分解各种方法:16种模态分解方法以提高预测精度
时间序列特征
def create_time_fleatures(cls,df):
# 常用时间特征
df['year'] = df.date.dt.year
df['month'] = df.date.dt.month
df['weekday'] = df.date.dt.weekday
df['day_of_month'] = df.date.dt.day
df['day_of_year'] = df.date.dt.dayofyear
df['week_of_year'] = df.date.dt.weekofyear
df['day_of_week'] = df.date.dt.dayofweek + 1
df['hour_of_day'] = df.date.dt.hour
# 特殊时间特征
df['quarter'] = df.date.dt.quarter
df['is_year_start'] = df.date.dt.is_year_start.astype(int)
df['is_year_end'] = df.date.dt.is_year_end.astype(int)
df['is_month_start'] = df.date.dt.is_month_start.astype(int)
df['is_month_end'] = df.date.dt.is_month_end.astype(int)
df['is_quarter_start'] = df.date.dt.is_quarter_start.astype(int)
df['is_quarter_end'] = df.date.dt.is_quarter_end.astype(int)
return df
def create_log_features(cls,ff):
# 滞后特征
ff['open-1'] = ff['open'].shift(1)
ff['openy-2'] = ff['open'].shift(2)
ff['open-3'] = ff['open'].shift(3)
# 滞前特征
ff['close-1'] = ff['close'].shift(-1)
ff['closey-2'] = ff['close'].shift(-2)
return ff
def roll_mean_features(cls,df, windows):
"""
计算移动平均线特征
:param df: 原始数据
:param windows: 窗口大小列表 以[3, 5, 7, 10, 20]给出5种窗口大小
:return: 包含5种窗口大小的移动平均线特征
"""
df = df.copy()
for window in windows:
df['mv_' + str(window)] = df['open'].transform(
lambda x: x.shift(1).rolling(window=window, min_periods=1, win_type="triang").mean())
# min_periods=1表示即使在窗口初期数据不足时也计算平均值
# win_type='triang'指定了窗口的权重类型为三角形(Triangular)权重
return df
# todo #将连续特征离散化
def get_bin(data,colmun_names):
for col in colmun_names:
data[col+'_bin'] = pd.cut(data[col],10,labels=False)
def add_poly_features(data, column_names):
'''
进行特征的构造,构造的方式就是特征与特征相乘(自己与自己,自己与其他人)
例如:有 a、b 两个特征,那么它的 2 次多项式的次数为 (1,a,b,a^2,ab, b^2)。
PolynomialFeatures 这个类有 3 个参数:
degree:控制多项式的次数;
interaction_only:默认为 False,如果指定为 True,那么就不会有特征自己和自己结合的项,组合的特征中没有 a2a2 和 b2b2;
include_bias:默认为 True 。如果为 True 的话,那么结果中就会有 0 次幂项,即全为 1 这一列。
:param data: df
:param column_names: 字段名
:return:
'''
# train = add_poly_features(train, ['fzd','fs','fx','wd','sd','yq'])
# test = add_poly_features(test, ['fzd','fs','fx','wd','sd','yq'])
features = data[column_names]
rest_features = data.drop(column_names, axis=1)
poly_transformer = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
poly_features = pd.DataFrame(poly_transformer.fit_transform(features),
columns=poly_transformer.get_feature_names(column_names))
for col in poly_features.columns:
if col in rest_features.columns.tolist():
continue
rest_features.insert(1, col, poly_features[col])
return rest_features
