一、Alphalens 简介
Alphalens是用于因子分析的Python工具包,是Quantopian公司旗下三大开源包之一,另外两个分别是 Zipline(回测)和Pyfolio(绩效和风险分析)。
- Github-Zipline:回测
- Github-Alphalens:因子分析
- Github-Pyfolio:绩效和风险分析
Alphalens的主要功能:
- Returns Analysis
- Information Coefficient Analysis
- Turnover Analysis
- Grouped Analysis
二、Alphalens 安装
alphalens 0.3.6
使用pip安装:
pip install alphalens
使用conda安装:
conda install -c conda-forge alphalens
三、Alphalens 使用
1、数据预处理
factor_data = utils.get_clean_factor_and_forward_returns(factor_df, price_df,
quantiles=5, bins=None,periods=(1, 5, 10, 20), max_loss=0.5)
factor_df:

price_df:

factor_data:


2、主要功能 alphalens.tears.create_full_tear_sheet
def create_full_tear_sheet(factor_data,
long_short=True,
group_neutral=False,
by_group=False):
# 因子分组统计结果
plotting.plot_quantile_statistics_table(factor_data)
# 因子收益率分析
create_returns_tear_sheet(factor_data,
long_short,
group_neutral,
by_group,
set_context=False)
# 因子IC分析
create_information_tear_sheet(factor_data,
group_neutral,
by_group,
set_context=False)
create_turnover_tear_sheet(factor_data, set_context=False)
1)因子分组统计结果
plotting.plot_quantile_statistics_table(factor_data)
生成Quantiles Statistics,因子值factor_data[‘factor’]按不同分组factor_data[‘factor_quantile’]进行统计分析。

2)因子收益率分析
create_returns_tear_sheet
def create_returns_tear_sheet(factor_data,
long_short=True,
group_neutral=False,
by_group=False):
# weights = factor_weights(factor_data, demeaned, group_adjust, equal_weight)
# weights = (factor - factor.mean())/sum(abs((factor - factor.mean())))
# 某一交易日所有股票的因子值,去中心化(factor - factor.mean())后得到权重weights
# 该交易日的factor_returns=factor_data中的收益率按照weights加权平均
# factor_returns相当于是因子值加权的收益率,其中weights和为0。
factor_returns = perf.factor_returns(factor_data,
long_short,
group_neutral)
# Compute mean returns for factor quantiles across provided forward returns columns.
# 首先,对同一横截面上的收益率去中心化处理(x - x.mean())
# 然后,对不同分组factor_quantile(level)计算去中心化收益率的均值(mean_quant_ret)和
# 标准差(std_quantile)
mean_quant_ret, std_quantile = \
perf.mean_return_by_quantile(factor_data,
by_group=False,
demeaned=long_short,
group_adjust=group_neutral)
# period_ret.add(1).pow(conversion_factor).sub(1)
# 将收益率按照base_period统一计算维度
mean_quant_rateret = \
mean_quant_ret.apply(utils.rate_of_return, axis=0,
base_period=mean_quant_ret.columns[0])
# compute quantile bucket returns separately for each date.
# 首先,对横截面上的收益率去中心化(x - x.mean())
# 然后,对不同分组和不同日期分别计算去中心化收益率的均值(mean_quant_ret_bydate)
# 和标准差(std_quant_daily)
mean_quant_ret_bydate, std_quant_daily = \
perf.mean_return_by_quantile(factor_data,
by_date=True,
by_group=False,
demeaned=long_short,
group_adjust=group_neutral)
mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
utils.rate_of_return, axis=0,
base_period=mean_quant_ret_bydate.columns[0]
)
compstd_quant_daily = \
std_quant_daily.apply(utils.std_conversion, axis=0,
base_period=std_quant_daily.columns[0])
# reg_fit = OLS(y, x).fit()
# alpha, beta = reg_fit.params
# x为横截面上所有个股的平均收益率,x代表市场表现
# x = add_constant(x)
# y为因子值加权的收益率(factor_returns)
alpha_beta = perf.factor_alpha_beta(factor_data,
factor_returns,
long_short,
group_neutral)
# Top组的mean_quant_rateret_bydate - Bottom组的mean_quant_rateret_bydate
mean_ret_spread_quant, std_spread_quant = \
perf.compute_mean_returns_spread(mean_quant_rateret_bydate,
factor_data['factor_quantile'].max(),
factor_data['factor_quantile'].min(),
std_err=compstd_quant_daily)
fr_cols = len(factor_returns.columns)
vertical_sections = 2 + fr_cols * 3
gf = GridFigure(rows=vertical_sections, cols=1)
# 表格Returns Analysis(不同周期下的收益率结果):
# alpha、beta、mean_quant_rateret.iloc[-1]、mean_quant_rateret.iloc[0]、
# mean_ret_spread_quant.mean()
plotting.plot_returns_table(alpha_beta,
mean_quant_rateret,
mean_ret_spread_quant)
# 柱形图Mean Period Wise Return By Factor Quantile:
# mean_quant_rateret,分组平均超额收益(收益去中心化处理)
plotting.plot_quantile_returns_bar(mean_quant_rateret,
by_group=False,
ylim_percentiles=None,
ax=gf.next_row())
# 小提琴图violin
plotting.plot_quantile_returns_violin(mean_quant_rateret_bydate,
ylim_percentiles=(1, 99),
ax=gf.next_row())
# <CustomBusinessDay>
trading_calendar = factor_data.index.levels[0].freq
if trading_calendar is None:
trading_calendar = pd.tseries.offsets.BDay()
warnings.warn(
"'freq' not set in factor_data index: assuming business day",
UserWarning
)
for p in factor_returns:
# p遍历factor_returns的列名1D、5D、10D、20D
title = ('Factor Weighted '
+ ('Group Neutral ' if group_neutral else '')
+ ('Long/Short ' if long_short else '')
+ "Portfolio Cumulative Return ({} Period)".format(p))
# 因子加权的多空累计收益净值曲线
# Factor Weighted Long/Short Portfolio Cumulative Return (1D Period)
# factor_returns = perf.cumulative_returns(factor_returns, period, freq)
plotting.plot_cumulative_returns(
factor_returns[p],
period=p,
freq=trading_calendar,
title=title,
ax=gf.next_row()
)
# 因子分组超额收益净值曲线
plotting.plot_cumulative_returns_by_quantile(
mean_quant_ret_bydate[p],
period=p,
freq=trading_calendar,
ax=gf.next_row()
)
ax_mean_quantile_returns_spread_ts = [gf.next_row()
for x in range(fr_cols)]
plotting.plot_mean_quantile_returns_spread_time_series(
mean_ret_spread_quant,
std_err=std_spread_quant,
bandwidth=0.5,
ax=ax_mean_quantile_returns_spread_ts
)
plt.show()
gf.close()
去中心化因子的加权收益率
demeaned = True
weights = factor_weights(factor_data, demeaned, group_adjust, equal_weight)
weights = (factor - factor.mean())/sum(abs((factor - factor.mean())))
同一横截面上所有股票的因子值,去中心化(factor - factor.mean())后得到权重weights
该交易日的因子加权收益率factor_returns=factor_data中的收益率按照weights加权平均
factor_returns = perf.factor_returns(factor_data, long_short, group_neutral)

mean_quant_rateret
- 首先,对同一横截面上的收益率去中心化处理(x - x.mean()),factor_data[]
# factor_data['5D']=factor_data.groupby('date')['5D'].transform(lambda x: x - x.mean())
factor_data = utils.demean_forward_returns(factor_data)
- 然后,对不同分组factor_quantile(level)计算去中心化收益率的均值(mean_quant_ret)和标准差(std_quantile)
grouper = ['factor_quantile', factor_data.index.get_level_values('date')]
mean_quant_ret = factor_data.groupby(grouper)['1D','5D','10D','20D'].mean()
- 将收益率按照base_period统一计算维度
mean_quant_rateret = mean_quant_ret.add(1).pow(conversion_factor).sub(1)


表格Returns Analysis(不同周期下的收益率结果):
alpha、beta、 mean_quant_rateret.iloc[-1]、mean_quant_rateret.iloc[0]、mean_ret_spread_quant.mean()
plotting.plot_returns_table(alpha_beta,
mean_quant_rateret,
mean_ret_spread_quant)

分组平均超额收益Mean Period Wise Return By Factor Quantile:
mean_quant_rateret,分组平均超额收益(收益去中心化处理)
plotting.plot_quantile_returns_bar(mean_quant_rateret,
by_group=False,
ylim_percentiles=None,
ax=gf.next_row())

因子加权的多空累计收益净值曲线
# Factor Weighted Long/Short Portfolio Cumulative Return (1D Period)
# factor_returns = perf.cumulative_returns(factor_returns, period, freq)
plotting.plot_cumulative_returns(
factor_returns[p],
period=p,
freq=trading_calendar,
title=title,
ax=gf.next_row()
)

因子分组超额收益净值曲线
plotting.plot_cumulative_returns_by_quantile(
mean_quant_ret_bydate[p],
period=p,
freq=trading_calendar,
ax=gf.next_row()
)

mean return spread 分布
Top组的mean_quant_rateret_bydate - Bottom组的mean_quant_rateret_bydate

3)IC分析
create_information_tear_sheet
def create_information_tear_sheet(factor_data,
group_neutral=False,
by_group=False):
# Computes the Spearman Rank Correlation between factor values and
# N period forward returns for each period in the factor index.
# 计算每个横截面上,因子值与不同周期收益率的spearman相关系数
ic = perf.factor_information_coefficient(factor_data, group_neutral)
# 生成IC表Information Analysis:
plotting.plot_information_table(ic)
columns_wide = 2
fr_cols = len(ic.columns)
rows_when_wide = (((fr_cols - 1) // columns_wide) + 1)
vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols
gf = GridFigure(rows=vertical_sections, cols=columns_wide)
ax_ic_ts = [gf.next_row() for _ in range(fr_cols)]
plotting.plot_ic_ts(ic, ax=ax_ic_ts)
ax_ic_hqq = [gf.next_cell() for _ in range(fr_cols * 2)]
plotting.plot_ic_hist(ic, ax=ax_ic_hqq[::2])
plotting.plot_ic_qq(ic, ax=ax_ic_hqq[1::2])
plt.show()
gf.close()
计算IC
ic = perf.factor_information_coefficient(factor_data, group_neutral)
计算同一横截面上收益率与因子的spearman相关性
stats.spearmanr( factor_data['5D'], factor_data['factor'])

生成IC表 Information Analysis
def plot_information_table(ic_data):
ic_summary_table = pd.DataFrame()
ic_summary_table["IC Mean"] = ic_data.mean()
ic_summary_table["IC Std."] = ic_data.std()
ic_summary_table["Risk-Adjusted IC"] = \
ic_data.mean() / ic_data.std()
t_stat, p_value = stats.ttest_1samp(ic_data, 0)
ic_summary_table["t-stat(IC)"] = t_stat
ic_summary_table["p-value(IC)"] = p_value
ic_summary_table["IC Skew"] = stats.skew(ic_data)
ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)
print("Information Analysis")
utils.print_table(ic_summary_table.apply(lambda x: x.round(3)).T)

本文介绍了Python中的Alphalens库,主要用于因子分析,包括因子简介、安装方法及详细使用步骤。内容涵盖数据预处理、因子分组统计、收益率分析和IC分析,提供因子相关性、收益率分布和信息系数等关键指标的深入理解。
4273





