# 计算每日持仓统计和前5大持仓
def calculate_daily_positions(df):
# 连续日期范围
all_dates = pd.date_range(start=df['date'].min(), end=df['date'].max(), name='date').strftime("%Y-%m-%d")
all_clients = df['client_id'].unique()
# 完整的时间-客户
date_client_panel = pd.MultiIndex.from_product(
[all_dates, all_clients],
names=['date', 'client_id']
).to_frame(index=False)
# 合并原始数据(填充缺失日期)
merged = date_client_panel.merge(
df,
on=['date', 'client_id'],
how='left'
)
# 标记持仓方向
merged['position_type'] = np.where(
merged['cum_qty'] > 0, 'long',
np.where(merged['cum_qty'] < 0, 'short', 'neutral')
)
# 计算每日持仓统计
daily_stats = merged.groupby(['client_id', 'date']).agg(
long_count=('instrument_id', lambda x: (x.notnull() & (merged.loc[x.index, 'position_type'] == 'long')).nunique()),
short_count=('instrument_id', lambda x: (x.notnull() & (merged.loc[x.index, 'position_type'] == 'short')).nunique()),
long_qty=('cum_qty', lambda x: x[x > 0].sum()),
short_qty=('cum_qty', lambda x: abs(x[x < 0].sum()))
).reset_index()
# 计算前5大持仓
def top5_holdings(group):
# 分离多头和空头
long_pos = group[group['position_type'] == 'long']
short_pos = group[group['position_type'] == 'short']
# 多头前5
if not long_pos.empty:
top5_long = long_pos.nlargest(5, 'cum_qty')
long_top5_qty = top5_long['cum_qty'].sum()
long_top5_ratio = long_top5_qty / group['long_qty'].iloc[0]
else:
long_top5_qty = 0
long_top5_ratio = 0
# 空头前5(按绝对值排序)
if not short_pos.empty:
short_pos['abs_qty'] = short_pos['cum_qty'].abs()
top5_short = short_pos.nlargest(5, 'abs_qty')
short_top5_qty = top5_short['abs_qty'].sum()
short_top5_ratio = short_top5_qty / group['short_qty'].iloc[0]
else:
short_top5_qty = 0
short_top5_ratio = 0
return pd.Series({
'long_top5_qty': long_top5_qty,
'long_top5_ratio': long_top5_ratio,
'short_top5_qty': short_top5_qty,
'short_top5_ratio': short_top5_ratio
})
top5_stats = merged.groupby(['client_id', 'date']).apply(top5_holdings).reset_index()
# 合并结果
final_daily_stats = daily_stats.merge(top5_stats, on=['client_id', 'date'])
# 计算换手率(使用前一日持仓量)
final_daily_stats['prev_long_qty'] = final_daily_stats.groupby('client_id')['long_qty'].shift(1)
final_daily_stats['prev_short_qty'] = final_daily_stats.groupby('client_id')['short_qty'].shift(1)
# 获取当日交易量(从原始数据)
daily_trade = df.groupby(['client_id', 'date'])['td_qty'].sum().abs().reset_index(name='daily_trade')
final_daily_stats = final_daily_stats.merge(daily_trade, on=['client_id', 'date'], how='left')
# 计算换手率
final_daily_stats['turnover_rate'] = final_daily_stats['daily_trade'] / (
final_daily_stats['prev_long_qty'] + final_daily_stats['prev_short_qty']
).replace(0, np.nan)
return final_daily_stats
# 计算每日持仓统计
daily_positions_df = calculate_daily_positions(multi_cpty_df)
daily_positions_df
你这里的代码好像有点问题,报错:---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\indexes\base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas\\_libs\\hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas\\_libs\\hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'short_qty'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[57], line 87
84 return final_daily_stats
86 # 计算每日持仓统计
---> 87 daily_positions_df = calculate_daily_positions(multi_cpty_df)
89 daily_positions_df
Cell In[57], line 66, in calculate_daily_positions(df)
57 short_top5_ratio = 0
59 return pd.Series({
60 'long_top5_qty': long_top5_qty,
61 'long_top5_ratio': long_top5_ratio,
62 'short_top5_qty': short_top5_qty,
63 'short_top5_ratio': short_top5_ratio
64 })
---> 66 top5_stats = merged.groupby(['client_id', 'date']).apply(top5_holdings).reset_index()
68 # 合并结果
69 final_daily_stats = daily_stats.merge(top5_stats, on=['client_id', 'date'])
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\groupby\groupby.py:1824, in GroupBy.apply(self, func, include_groups, *args, **kwargs)
1822 with option_context("mode.chained_assignment", None):
1823 try:
-> 1824 result = self._python_apply_general(f, self._selected_obj)
1825 if (
1826 not isinstance(self.obj, Series)
1827 and self._selection is None
1828 and self._selected_obj.shape != self._obj_with_exclusions.shape
1829 ):
1830 warnings.warn(
1831 message=_apply_groupings_depr.format(
1832 type(self).__name__, "apply"
(...)
1835 stacklevel=find_stack_level(),
1836 )
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\groupby\groupby.py:1885, in GroupBy._python_apply_general(self, f, data, not_indexed_same, is_transform, is_agg)
1850 @final
1851 def _python_apply_general(
1852 self,
(...)
1857 is_agg: bool = False,
1858 ) -> NDFrameT:
1859 """
1860 Apply function f in python space
1861
(...)
1883 data after applying f
1884 """
-> 1885 values, mutated = self._grouper.apply_groupwise(f, data, self.axis)
1886 if not_indexed_same is None:
1887 not_indexed_same = mutated
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\groupby\ops.py:919, in BaseGrouper.apply_groupwise(self, f, data, axis)
917 # group might be modified
918 group_axes = group.axes
--> 919 res = f(group)
920 if not mutated and not _is_indexed_like(res, group_axes, axis):
921 mutated = True
Cell In[57], line 54, in calculate_daily_positions.<locals>.top5_holdings(group)
52 top5_short = short_pos.nlargest(5, 'abs_qty')
53 short_top5_qty = top5_short['abs_qty'].sum()
---> 54 short_top5_ratio = short_top5_qty / group['short_qty'].iloc[0]
55 else:
56 short_top5_qty = 0
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File c:\Users\matianht\.conda\envs\nomura\lib\site-packages\pandas\core\indexes\base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'short_qty'