参加携程一个数据竞赛,要按PR曲线下面积进行评估,简单写了个评估模型的函数,其中的label参数是按照预测得分排序后的目标值(0,1)列表
def pr_area(label):
'''PR曲线 r值在0.05-0.5之间的面积'''
pr = list()
pos = label.count(0)
neg = label.count(1)
tp = 0
fn = pos
fp = 0
area = 0
left_p = 0
left_r = 0
for i,l in enumerate(label):
if l==1:
tp += 1
fn -= 1
elif l==0:
fp += 1
else:
raise 'erro'
r = tp/(tp+fn)
p = tp/(tp+fp)
pr.append((p,r))
if r >= 0.05 and r <= 0.5:
if left_p and left_r:
area += (left_p+p)*(r-left_r)/2
left_p = p
left_r = r
return area