“美化”数据分析
积累一些数据分析时,展示方法小技巧。使得数据图表更好读。这是一篇不断增加内容的📒,不断整理总结。
我只是知识的搬运工,都是平时学习的东西。
对数据列最小值加粗显示font-weight: bold
对DataFrame数据的各列的最小值加粗显示。
def highlight_min(x):
x_min = x.min()
return ["font-weight: bold" if v == x_min else "" for v in x]
results.style.apply(highlight_min)#results是DataFrame结构
绘图时添加垂直线ax.vlines/plt.axvline/plt.vlines
添加一些垂直线,标注情况,更易读。
ax.vlines
ax = results.plot()
ax.vlines(
alpha_aic,
results["AIC criterion"].min(),
results["AIC criterion"].max(),
label="alpha: AIC estimate",
linestyles="--",
color="tab:blue",
)
ax.vlines(
alpha_bic,
results["BIC criterion"].min(),
results["BIC criterion"].max(),
label="alpha: BIC estimate",
linestyle="--",
color="tab:orange",
)
ax.set_xlabel(r"$\alpha$ value")
ax.set_ylabel("criterion")
ax.set_xscale("log")
ax.legend()
_ = ax.set_title(
f"Information-criterion for model selection (training time {fit_time:.2f}s)"
)
其结果为:
plt.axvline
import matplotlib.pyplot as plt
ymin, ymax = 2300, 3800
lasso = model[-1]
plt.semilogx(lasso.alphas_, lasso.mse_path_, linestyle=":")
plt.plot(
lasso.alphas_,
lasso.mse_path_.mean(axis=-1),
color="black",
label="Average across the folds",
linewidth=2,
)
plt.axvline(lasso.alpha_, linestyle="--", color="black", label="alpha: CV estimate")
plt.ylim(ymin, ymax)
plt.xlabel(r"$\alpha$")
plt.ylabel("Mean square error")
plt.legend()
_ = plt.title(
f"Mean square error on each fold: coordinate descent (train time: {fit_time:.2f}s)"
)
其结果为:
plt.vlines
import matplotlib.pyplot as plt
plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
plt.vlines(
index_alpha_path_bic,
aic_criterion.min(),
aic_criterion.max(),
color="black",
linestyle="--",
label="Selected alpha",
)
plt.legend()
plt.ylabel("Information criterion")
plt.xlabel("Lasso model sequence")
_ = plt.title("Lasso model selection via AIC and BIC")
预测结果区间显示ax.fill_between
预测结果的$\pm$1倍 σ \sigma σ区域显示。
ax = sns.scatterplot(
data=full_data, x="input_feature", y="target", color="black", alpha=0.75
)#真实值散点图
ax.plot(X_plot, y_plot, color="black", label="Ground Truth")
ax.plot(X_plot, y_brr, color="red", label="BayesianRidge with polynomial features")
ax.plot(X_plot, y_ard, color="navy", label="ARD with polynomial features")
ax.fill_between(
X_plot.ravel(),
y_ard - y_ard_std,#y_ard是ard模型的预测结果
y_ard + y_ard_std,
color="navy",
alpha=0.3,
)
ax.fill_between(
X_plot.ravel(),
y_brr - y_brr_std,#y_brr是brr模型的预测结果
y_brr + y_brr_std,
color="red",
alpha=0.3,
)
ax.legend()
_ = ax.set_title("Polynomial fit of a non-linear feature")
其结果为:
坐标显示数学公式r""
坐标轴包含数学公式时怎么显示:
ax.set_xlabel(r"$\alpha$ value")
颜色
两种颜色tab:blue/tab:orange
plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
To be continue