文章目录
1. 计算rank、mAP和mINP
"""eval_metrics.py"""
from __future__ import print_function, absolute_import
import numpy as np
def eval_sysu(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=20):
"""Evaluation with sysu metric
Key: for each query identity, its gallery images from the same camera view are discarded. "Following the original setting in ite dataset"
"""
# distmat传入的是每个特征向量的乘积的负数,gallery(3804*2048)和query转置(2048*301)的矩阵乘积(3804*301)的负数
# q_pids表示query的标签,g_pids表示gallery的标签
# q_camids, g_camids分别表示query和gallery样本的相机标签
# max_rank,统计gallery可能性最大的前多少个
num_q, num_g = distmat.shape # num_q表示query的个数,num_g表示gallery的个数
if num_g < max_rank:
max_rank = num_g
print("Note: number of gallery samples is quite small, got {}".format(num_g))
# 沿着行从小到大排序,返回该数值的原来的索引号(因为传入的是原值的负数,所以矩阵乘积原值中最大的那个数值,序号最小为0)
# 返回原值中,每行数值从大到小的索引值
indices = np.argsort(distmat, axis=1)
# 得到每行(每个query)可能性从大到小的预测标签
pred_label = g_pids[indices]
# 将每行的预测标签和真实标签比较
matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) # 扩大真实标签的维度,最终将True/False转换数据类型1/0
# print('matches:', matches, matches.shape) # matches是3804*301维度,每个元素是1或者0.
# compute cmc curve for each query
new_all_cmc = [] # 一种新的方法,存储所有query的[000111..]数组
all_cmc = [] # 原始的方法,存储所有query的[000111...]数组
all_AP = [] # 存储所有query的AP
all_INP = [] # 存储所有query的INP
num_valid_q = 0. # number of valid query(有效的query个数)
# 遍历所有的query样本
for q_idx in range(num_q):
# 1. get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
# 2. 要在不同位置的摄像机之间进行匹配
# 相机2和相机3在相同的位置,所以相机3的probe图像要跳过相机2的gallery图像
order = indices[q_idx] # 找到这个query对应的gallery可能性排序索引
remove = (q_camid == 3) & (g_camids[order] == 2) # 同时成立则为True,输出301个True或者False
# 取反,False->True。得到的keep中,True是可以使用的gallery。
keep = np.invert(remove)
# 3. compute cmc curve
# the cmc calculation is different from standard protocol
# we follow the protocol of the author's released code
# 去除重复的预测标签
new_cmc = pred_label[q_idx][keep] # 取出这个query行,所有True的预测标签
new_index = np.unique(new_cmc, return_index=True)[1] # 将重复的预测标签只保留一个,返回重复标签的第一个索引下标
new_cmc = [new_cmc[index] for index in sorted(new_index)]
# new_match从找到正确标签开始全是1,之前全是0
new_match = (new_cmc == q_pid).astype(np.int32) # 输出1或者0,1表示与query同ID的预测标签
new_cmc = new_match.cumsum() # 依次输出前k个元素累加和(k=1,2...) 0 0 0 1 1 1 ...
new_all_cmc.append(new_cmc[:max_rank]) # 将该样本的序列添加到所有样本的数组中
# 原始cmc
orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches
if not np.any(orig_cmc):
# this condition is true when query identity does not appear in gallery
continue
cmc = orig_cmc.cumsum() # 0 0 0 0 1 1 1 1 2 2 2 2
# 4. compute mINP
# refernece: Deep Learning for Person Re-identification: A Survey and Outlook
pos_idx = np.where(orig_cmc == 1) # 找到正确标签对应的索引
pos_max_idx = np.max(pos_idx) # 找到最大的索引
inp = cmc[pos_max_idx] / (pos_max_idx + 1.0) # 计算INP
all_INP.append(inp)
# 将序列0 0 1 1 2 2 转换为 0 0 1 1 1 1
cmc[cmc > 1] = 1
all_cmc.append(cmc[:max_rank])
num_valid_q += 1.
# 5. compute average precision(AP)
# reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
num_rel = orig_cmc.sum() # 把该行中所有的1求和,得到正确样本的个数
tmp_cmc = orig_cmc.cumsum() # 累加,得到0 0 1 1 2 2...
# 正确标签,在正样本的位置 / 在所有样本中的位置
tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] # i是索引,x是数值,成对取出
tmp_cmc = np.asarray(tmp_cmc) * orig_cmc # 只保留正确标签的计算结果
AP = tmp_cmc.sum() / num_rel # 计算AP
all_AP.append(AP)
assert num_valid_q > 0,