def is_outlier(points, threshold=3.5):
"""
Returns a boolean array with True if points are outliers and False
otherwise.
Data points with a modified z-score greater than this
# value will be classified as outliers.
"""
# transform into vector
if len(points.shape) == 1:
points = points[:,None]
# compute median value
# axis=0表述列; axis=1,表述行
median = np.median(points, axis=0)
# compute diff sums along the axis
diff = np.sum((points - median)**2, axis=-1)
diff = np.sqrt(diff)
# compute MAD
med_abs_deviation = np.median(diff)
# compute modified Z-score
# http://www.itl.nist.gov/div898/handbook/eda/section4/eda43.htm#Iglewicz
modified_z_score = 0.6745 * diff / med_abs_deviation
# return a mask for each outlier
return modified_z_score > threshold2017.04.10:python数据可视化01
最新推荐文章于 2023-12-30 17:14:08 发布
本文介绍了一种基于修改后的Z得分的异常值检测方法。该方法首先计算数据集的中位数,接着计算每个数据点与中位数之间的差异,并通过调整得到修改后的Z得分。当得分超过设定阈值时,则认为该数据点为异常值。

1161

被折叠的 条评论
为什么被折叠?



