【Python进阶】Python计算consine相似性矩阵

from sklearn.metrics.pairwise import cosine_similarity # 计算所有股票的持仓相似度 def calculate_daily_positions_consine_similarity(df): df = df.copy() all_dates = pd.date_range(start=df['date'].min(), end=df['date'].max(), name='date').strftime("%Y-%m-%d") all_clients = df['client_id'].unique() all_instruments = df['instrument_id'].unique() full_panel = pd.MultiIndex.from_product( [all_dates, all_clients, all_instruments], names=['date', 'client_id', 'instrument_id'] ).to_frame(index=False) merged = full_panel.merge( df, on=['date', 'client_id', 'instrument_id'], how='left' ) # 填充缺失日期 merged['cum_qty'] = merged.groupby(['client_id', 'instrument_id'])['cum_qty'].ffill() merged = merged.fillna(0) merged['position_type'] = np.where( merged['cum_qty'] > 0, 'long', np.where(merged['cum_qty'] < 0, 'short', 'neutral') ) # 计算每日持仓统计 merged['is_long'] = (merged['position_type'] == 'long') merged['is_short'] = (merged['position_type'] == 'short') daily_stats = merged.groupby(['client_id', 'date']).agg( long_qty=('cum_qty', lambda s: s[s > 0].sum()), short_qty=('cum_qty', lambda s: abs(s[s < 0].sum())) ).reset_index() daily_stats["total_qty"] = daily_stats["long_qty"] + daily_stats["short_qty"] merged = merged.merge(daily_stats, on=['client_id', 'date']) merged["posion_weight"] = merged["cum_qty"] / merged["total_qty"] merged["posion_weight"] = merged["posion_weight"].fillna(0) # 计算持仓余弦相似度 def calculate_cosine_similarity(client_group): client_group = client_group.sort_values('date') results = [] stock_matrix = client_group.pivot(index='date', columns='instrument_id', values='posion_weight').fillna(0) similarity_matrix = cosine_similarity(stock_matrix.values) # 计算每日相似度(与前一日比较) for i in range(1, len(stock_matrix)): current_date = stock_matrix.index[i] results.append({ 'date': current_date, 'pos_sim': similarity_matrix[i, i-1] }) # # 计算每日相似度(与前一日比较) # for i in range(1, len(stock_matrix)): # current_date = stock_matrix.index[i] # prev_date = stock_matrix.index[i-1] # # 计算相似度 # pos_vec1 = stock_matrix.loc[prev_date].values.reshape(1, -1) # pos_vec2 = stock_matrix.loc[current_date].values.reshape(1, -1) # pos_sim = cosine_similarity(pos_vec1, pos_vec2)[0][0] if long_total[prev_date] > 0 and long_total[current_date] > 0 else 0 # results.append({ # 'date': current_date, # 'pos_sim': pos_sim # }) return pd.DataFrame(results) # 每个client,每日,根据所有股票仓位,可以计算相似性 similarity_results = merged.groupby(['client_id']).apply(calculate_cosine_similarity).reset_index() return similarity_results final_daily_stats = daily_stats.merge( similarity_results, on=['client_id', 'date'], how='left' ) return final_daily_stats # 计算每日持仓余弦相似性 daily_positions_cos = calculate_daily_positions_consine_similarity(multi_cpty_df[(multi_cpty_df["client_id"] == "ABRH400")]) daily_positions_cos 你误解了我的意思,不过没关系,现在再看看这个代码,你能理解了么
最新发布
07-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值