先展示S-test函数
@torch.no_grad()
def S_test(net, args, test_loader, logger, step, test_info, subset='test'):
net.eval()
snippet_result = {}
snippet_result['version'] = 'VERSION 1.3'
snippet_result['results'] = {}
snippet_result['external_data'] = {'used': True, 'details': 'Features from I3D Network'}
if subset == 'train':
snippet_result['bkg_score'] = {}
num_correct = 0.
num_total = 0.
for sample in tqdm(test_loader):
_data, _vid_label, _vid_name, _vid_len, _vid_duration = sample['data'], sample['vid_label'], sample['vid_name'], sample['vid_len'], sample['vid_duration']
outputs = net(_data.to(args.device))
_vid_score, _cas_fuse = outputs['vid_score'], outputs['cas_fuse']
for b in range(_data.shape[0]):
vid_name = _vid_name[b]
vid_len = _vid_len[b].item()
vid_duration = _vid_duration[b].item()
# >> caculate video-level prediction
label_np = _vid_label[b].unsqueeze(0).numpy()
score_np = _vid_score[b].cpu().numpy()
pred_np = np.zeros_like(score_np)
pred_np[np.where(score_np < args.class_thresh)] = 0
pred_np[np.where(score_np >= args.class_thresh)] = 1
if pred_np.sum() == 0:
pred_np[np.argmax(score_np)] = 1
correct_pred = np.sum(label_np == pred_np, axis=1)
num_correct += np.sum((correct_pred == args.num_class).astype(np.float32))
num_total += correct_pred.shape[0]
# >> post-process
cas_fuse = _cas_fuse[b]
num_segments = _data[b].shape[0]
# class-specific score
cas_S = cas_fuse[:, :-1]
pred = np.where(score_np >= args.class_thresh)[0]
if len(pred) == 0:
pred = np.array([np.argmax(score_np)])
cas_pred = cas_S.cpu().numpy()[:, pred]
cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
cas_pred = utils.upgrade_resolution(cas_pred, args.scale)
# class-agnostic score
agnostic_score = 1 - cas_fuse[:, -1].unsqueeze(1)
agnostic_score = agnostic_score.expand((-1, args.num_class))
agnostic_score = agnostic_score.cpu().numpy()[:, pred]
agnostic_score = np.reshape(agnostic_score, (num_segments, -1, 1))
agnostic_score = utils.upgrade_resolution(agnostic_score, args.scale)
# >> save output
if subset == 'train':
snippet_result['bkg_score'][vid_name] = cas_fuse[:, -1].cpu().numpy()
# >> generate proposals
proposal_dict = {}
for i in range(len(args.act_thresh_cas)):
cas_temp = cas_pred.copy()
zero_location = np.where(cas_temp[:, :, 0] < args.act_thresh_cas[i])
cas_temp[zero_location] = 0
seg_list = []
for c in range(len(pred)):
pos = np.where(cas_temp[:, c, 0] > 0)
seg_list.append(pos)
proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
for i in range(len(proposals)):
class_id = proposals[i][0][2]
if class_id not in proposal_dict.keys():
proposal_dict[class_id] = []
proposal_dict[class_id] += proposals[i]
for i in range(len(args.act_thresh_agnostic)):
cas_temp = cas_pred.copy()
agnostic_score_temp = agnostic_score.copy()
zero_location = np.where(agnostic_score_temp[:, :, 0] < args.act_thresh_agnostic[i])
agnostic_score_temp[zero_location] = 0
seg_list = []
for c in range(len(pred)):
pos = np.where(agnostic_score_temp[:, c, 0] > 0)
seg_list.append(pos)
proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
for i in range(len(proposals)):
class_id = proposals[i][0][2]
if class_id not in proposal_dict.keys():
proposal_dict[class_id] = []
proposal_dict[class_id] += proposals[i]
if args.mode == 'train' or args.mode == 'infer':
final_proposals = utils.post_process(args, vid_name, proposal_dict, test_loader)
else:
final_proposals = []
for class_id in proposal_dict.keys():
temp_proposal = proposal_dict[class_id]
final_proposals += temp_proposal
final_proposals = utils.result2json(args, final_proposals)
snippet_result['results'][vid_name] = final_proposals
json_path = os.path.join(args.output_path_s1, 'snippet_result_{}.json'.format(subset, args.seed))
with open(json_path, 'w') as f:
json.dump(snippet_result, f, cls=NumpyArrayEncoder)
if args.mode == 'train' or args.mode == 'infer':
test_acc = num_correct / num_total
print("TEST ACC:{:.4f}".format(test_acc))
test_map = log_evaluate(args, step, test_acc, logger, json_path, test_info, subset)
return test_map
初始化了snippet_result = {},用于存放结果。
_data, _vid_label, _vid_name, _vid_len, _vid_duration = sample['data'], sample['vid_label'], sample['vid_name'], sample['vid_len'], sample['vid_duration']
outputs = net(_data.to(args.device))
_vid_score, _cas_fuse = outputs['vid_score'], outputs['cas_fuse']
模型推理得到输出,和训练的区别是
vid_score的计算方式有所不同
if vid_labels is None:
vid_score = torch.mean(topk_scores, dim=1)
else:
vid_score = (torch.mean(topk_scores, dim=1) * vid_labels) + \
(torch.mean(cas_S, dim=1) * (1 - vid_labels))
或许这是vid_loss权重为0的原因?待定
# >> caculate video-level prediction
label_np = _vid_label[b].unsqueeze(0).numpy()
score_np = _vid_score[b].cpu().numpy()
pred_np = np.zeros_like(score_np)
pred_np[np.where(score_np < args.class_thresh)] = 0
pred_np[np.where(score_np >= args.class_thresh)] = 1
if pred_np.sum() == 0:
pred_np[np.argmax(score_np)] = 1
correct_pred = np.sum(label_np == pred_np, axis=1)
num_correct += np.sum((correct_pred == args.num_class).astype(np.float32))
num_total += correct_pred.shape[0]
根据vid_sroce对视频整体进行分类,如果分数大于阈值则包含该动作类别否则不包含
# >> post-process
cas_fuse = _cas_fuse[b]
num_segments = _data[b].shape[0]
# class-specific score
cas_S = cas_fuse[:, :-1]
pred = np.where(score_np >= args.class_thresh)[0]
if len(pred) == 0:
pred = np.array([np.argmax(score_np)])
cas_pred = cas_S.cpu().numpy()[:, pred]
cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
cas_pred = utils.upgrade_resolution(cas_pred, args.scale)
cas_pred是所有snippets里相应预测动作的具体分数,即class-specific score
def upgrade_resolution(arr, scale):
x = np.arange(0, arr.shape[0])
# 检查数组长度是否至少为2
if len(x) < 2 or len(arr) < 2:
print("错误:'x' 或 'arr' 数组的长度必须至少为2.")
# 沿第一个维度叠加两个相同的数组
arr = np.concatenate([arr, arr], axis=0) # 结果形状将是 (2, 8, 1)
x = np.arange(0, arr.shape[0])
f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
scale_x = np.arange(0, arr.shape[0], 1 / scale)
up_scale = f(scale_x)
return up_scale
前景分数,不区分类别,即class-agnostic score
# class-agnostic score
agnostic_score = 1 - cas_fuse[:, -1].unsqueeze(1)
agnostic_score = agnostic_score.expand((-1, args.num_class))
agnostic_score = agnostic_score.cpu().numpy()[:, pred]
agnostic_score = np.reshape(agnostic_score, (num_segments, -1, 1))
agnostic_score = utils.upgrade_resolution(agnostic_score, args.scale)
1 - cas_fuse[:, -1].unsqueeze(1)可以理解成1减去背景分数,即前景分数
pred是预测类别索引
这里其实是一个分数,只是复制成num_class数目,然后挑选预测类别
下面生成proposals
proposal_dict = {}定义空子典,解释放在注释里
# 遍历
for i in range(len(args.act_thresh_cas)):
# copy class-specific score
cas_temp = cas_pred.copy()
# 查找分数小于一系列args.act_thresh_cas的索引
zero_location = np.where(cas_temp[:, :, 0] < args.act_thresh_cas[i])
cas_temp[zero_location] = 0
seg_list = []
# 查找pred类别里分数大于0的索引
for c in range(len(pred)):
pos = np.where(cas_temp[:, c, 0] > 0)
seg_list.append(pos)
# 到这一步是将低于设定阈值的snippets移除
proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
for i in range(len(proposals)):
class_id = proposals[i][0][2]
if class_id not in proposal_dict.keys():
proposal_dict[class_id] = []
proposal_dict[class_id] += proposals[i]
看看生成proposal的函数
def get_proposal_oic(args, tList, wtcam, vid_score, c_pred, v_len, num_segments, v_duration):
t_factor = float(16 * v_len) / ( args.scale * num_segments * args.frames_per_sec )
temp = []
# 遍历预测类别的snippets索引
for i in range(len(tList)):
c_temp = []
temp_list = np.array(tList[i])[0]
if temp_list.any():
# 分割数组
grouped_temp_list = grouping(temp_list)
# 遍历
for j in range(len(grouped_temp_list)):
# 计算这一类分割split内的平均分数
inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0])
len_proposal = len(grouped_temp_list[j])
# 左侧起点
outer_s = max(0, int(grouped_temp_list[j][0] - args._lambda * len_proposal))
# 右侧终点
outer_e = min(int(wtcam.shape[0] - 1), int(grouped_temp_list[j][-1] + args._lambda * len_proposal))
outer_temp_list = list(range(outer_s, int(grouped_temp_list[j][0]))) + \
list(range(int(grouped_temp_list[j][-1] + 1), outer_e + 1))
if len(outer_temp_list) == 0:
outer_score = 0
# 外部平均分数
else:
outer_score = np.mean(wtcam[outer_temp_list, i, 0])
c_score = inner_score - outer_score + args.gamma * vid_score[c_pred[i]]
# 这里是计算开始时间。特征数目 args.frames_per_sec * 持续时间 * scale / 16, 再乘t_factor也就是float(16 * v_len) / ( args.scale * num_segments * args.frames_per_sec ) 即视频持续时间,再乘对应索引可以得到对应秒数
t_start = grouped_temp_list[j][0] * t_factor
t_end = (grouped_temp_list[j][-1] + 1) * t_factor
c_temp.append([t_start, t_end, c_pred[i], c_score])
temp.append(c_temp)
return temp
然后遍历proposals,加入proposals_dict
for i in range(len(proposals)):
class_id = proposals[i][0][2]
if class_id not in proposal_dict.keys():
proposal_dict[class_id] = []
# 不同的class_id当作key,value是对应的proposals_list
proposal_dict[class_id] += proposals[i]
下面代码类似,只不过是用前景阈值划分proposals
for i in range(len(args.act_thresh_agnostic)):
cas_temp = cas_pred.copy()
agnostic_score_temp = agnostic_score.copy()
zero_location = np.where(agnostic_score_temp[:, :, 0] < args.act_thresh_agnostic[i])
agnostic_score_temp[zero_location] = 0
seg_list = []
for c in range(len(pred)):
pos = np.where(agnostic_score_temp[:, c, 0] > 0)
seg_list.append(pos)
proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
for i in range(len(proposals)):
class_id = proposals[i][0][2]
if class_id not in proposal_dict.keys():
proposal_dict[class_id] = []
proposal_dict[class_id] += proposals[i]
不同mode不同操作
if args.mode == 'train' or args.mode == 'infer':
final_proposals = utils.post_process(args, vid_name, proposal_dict, test_loader)
else:
final_proposals = []
for class_id in proposal_dict.keys():
temp_proposal = proposal_dict[class_id]
final_proposals += temp_proposal
final_proposals = utils.result2json(args, final_proposals)
snippet_result['results'][vid_name] = final_proposals
mode为’train’或’infer’执行utils.post_process函数,’test‘不进行soft_nms
def post_process(args, vid_name, proposal_dict, test_loader):
final_proposals = []
for class_id in proposal_dict.keys():
temp_proposal = soft_nms(proposal_dict[class_id], sigma=0.3)
final_proposals += temp_proposal
if args.dataset == "THUMOS14":
ambilist = test_loader.dataset.ambilist
final_proposals = np.array(final_proposals)
final_proposals = filter_segments(final_proposals, vid_name, ambilist)
final_proposals = result2json(args, final_proposals)
return final_proposals
看看soft_nms
def soft_nms(dets, iou_thr=0.7, method='gaussian', sigma=0.3):
"""
Apply Soft NMS to a set of detection results.
"""
# expand dets with areas, and the second dimension is
# x1, x2, label, score, area
dets = np.array(dets)
# 持续时间+1
areas = dets[:, 1] - dets[:, 0] + 1
dets = np.concatenate((dets, areas[:, None]), axis=1)
retained_box = []
while dets.size > 0:
# 找出得分最高的索引
max_idx = np.argmax(dets[:, 3], axis=0)
# 将得分最高的proposal放在首位
dets[[0, max_idx], :] = dets[[max_idx, 0], :]
# 添加到列表里
retained_box.append(dets[0, :-1].tolist())
# 计算得分最高的proposal的起始坐标(dets[0, 0])与其他所有框的起始坐标(dets[1:, 0])的最大值。结果是一个数组,每个元素是得分最高的框与另一个框在轴上重叠区域的起始点。
xx1 = np.maximum(dets[0, 0], dets[1:, 0])
# 结尾
xx2 = np.minimum(dets[0, 1], dets[1:, 1])
# 计算交集宽度
inter = np.maximum(xx2 - xx1 + 1, 0.0)
# 计算交并比
iou = inter / (dets[0, -1] + dets[1:, -1] - inter)
if method == 'linear':
weight = np.ones_like(iou)
weight[iou > iou_thr] -= iou[iou > iou_thr]
elif method == 'gaussian':
weight = np.exp(-(iou * iou) / sigma)
else: # traditional nms
weight = np.ones_like(iou)
weight[iou > iou_thr] = 0
# 给剩下的proposals乘weight改变得分,iou高相应的weight较小,反之iou小weight较大。因为iou过大说明两个框高度重叠没有意义
dets[1:, 3] *= weight
# 移除掉得分最高的proposal,开始下一轮
dets = dets[1:, :]
return retained_box
当数据集为’THUMOS14’,执行filter_segments函数
def filter_segments(segment_predict, vn, ambilist):
"""
Filter out segments overlapping with ambiguous_test segments.
"""
num_segment = len(segment_predict)
ind = np.zeros(num_segment)
for i in range(num_segment):
for a in ambilist:
if a[0] == vn:
gt = range(int(round(float(a[2]) )), int(round(float(a[3]) )))
pd = range(int(segment_predict[i][0]), int(segment_predict[i][1]))
IoU = float(len(set(gt).intersection(set(pd)))) / float(len(set(gt).union(set(pd))))
if IoU > 0:
ind[i] = 1
s = [segment_predict[i, :] for i in range(num_segment) if ind[i] == 0]
return np.array(s)
这个函数通过计算IoU来识别和过滤掉那些与已知模糊或不明确区间重叠的视频段,最终返回没有重叠的视频段。
结果保存到json文件里
json_path = os.path.join(args.output_path_s1, 'snippet_result_{}.json'.format(subset, args.seed))
with open(json_path, 'w') as f:
json.dump(snippet_result, f, cls=NumpyArrayEncoder)
当mode='train’或’infer’时,执行准确率测试,计算map
if args.mode == 'train' or args.mode == 'infer':
test_acc = num_correct / num_total
print("TEST ACC:{:.4f}".format(test_acc))
test_map = log_evaluate(args, step, test_acc, logger, json_path, test_info, subset)
return test_map
看看计算函数
def log_evaluate(args, step, test_acc, logger, json_path, test_info, subset='test'):
# >> evaluate mAP
mapping_subset = {'THUMOS14':{'train':'Validation', 'test':'Test'},'GTEA':{'train':'training', 'test':'validation'},'BEOID':{'train':'training', 'test':'validation'},'ActivityNet1.3':{'train':'train', 'test':'val'}}
subset_name = mapping_subset[args.dataset][subset]
gt_path = os.path.join(args.data_path, "gt_full.json")
anet_detection = ANETdetection(gt_path, json_path, subset=subset_name, tiou_thresholds=args.tIoU_thresh,
verbose=False, check_status=False, blocked_videos=args.blocked_videos)
mAP, _ = anet_detection.evaluate()
# >> log mAP
if args.cfg == 'thumos' or args.cfg == 'gtea' or args.cfg == 'beoid':
log_folder = 'acc'
test_info['step'].append(step)
test_info['test_acc'].append(test_acc)
if logger is not None:
logger.log_value('{}/Test accuracy'.format(log_folder), test_acc, step)
test_info["average_mAP[0.1:0.7]"].append(mAP[:7].mean())
test_info["average_mAP[0.1:0.5]"].append(mAP[:5].mean())
test_info["average_mAP[0.3:0.7]"].append(mAP[2:7].mean())
for i in range(len(args.tIoU_thresh)):
test_info["mAP@{:.1f}".format(args.tIoU_thresh[i])].append(mAP[i])
if logger is not None:
logger.log_value('{}/average mAP[0.1:0.7]'.format(log_folder), mAP[:7].mean(), step)
logger.log_value('{}/average mAP[0.1:0.5]'.format(log_folder), mAP[:5].mean(), step)
logger.log_value('{}/average mAP[0.3:0.7]'.format(log_folder), mAP[2:7].mean(), step)
for i in range(len(args.tIoU_thresh)):
logger.log_value('{}/mAP@{:.1f}'.format(log_folder, args.tIoU_thresh[i]), mAP[i], step)
return test_info["average_mAP[0.1:0.7]"][-1]
实例化ANETdetection
class ANETdetection(object):
GROUND_TRUTH_FIELDS = ['database']
# GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
PREDICTION_FIELDS = ['results', 'version', 'external_data']
def __init__(self, ground_truth_filename=None, prediction_filename=None,
ground_truth_fields=GROUND_TRUTH_FIELDS,
prediction_fields=PREDICTION_FIELDS,
tiou_thresholds=np.linspace(0.5, 0.95, 10),
subset='validation', verbose=False,
check_status=False,
blocked_videos=[]):
if not ground_truth_filename:
raise IOError('Please input a valid ground truth file.')
if not prediction_filename:
raise IOError('Please input a valid prediction file.')
self.subset = subset
self.tiou_thresholds = tiou_thresholds
self.verbose = verbose
self.gt_fields = ground_truth_fields
self.pred_fields = prediction_fields
self.ap = None
self.check_status = check_status
# Retrieve blocked videos from server.
# if self.check_status:
# self.blocked_videos = get_blocked_videos()
# else:
# self.blocked_videos = list()
self.blocked_videos=blocked_videos
# Import ground truth and predictions.
self.ground_truth, self.activity_index = self._import_ground_truth(
ground_truth_filename)
self.prediction = self._import_prediction(prediction_filename)
if self.verbose:
print ('[INIT] Loaded annotations from {} subset.'.format(subset))
nr_gt = len(self.ground_truth)
print ('\tNumber of ground truth instances: {}'.format(nr_gt))
nr_pred = len(self.prediction)
print ('\tNumber of predictions: {}'.format(nr_pred))
print ('\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds))
def _import_ground_truth(self, ground_truth_filename):
"""Reads ground truth file, checks if it is well formatted, and returns
the ground truth instances and the activity classes.
Parameters
----------
ground_truth_filename : str
Full path to the ground truth json file.
Outputs
-------
ground_truth : df
Data frame containing the ground truth instances.
activity_index : dict
Dictionary containing class index.
"""
with open(ground_truth_filename, 'r') as fobj:
data = json.load(fobj)
# Checking format
if not all([field in data.keys() for field in self.gt_fields]):
raise IOError('Please input a valid ground truth file.')
# Read ground truth data.
activity_index, cidx = {}, 0
video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
for videoid, v in data['database'].items():
# print(v)
if self.subset != v['subset']:
continue
if videoid in self.blocked_videos:
continue
for ann in v['annotations']:
if ann['label'] not in activity_index:
activity_index[ann['label']] = cidx
cidx += 1
video_lst.append(videoid)
t_start_lst.append(float(ann['segment'][0]))
t_end_lst.append(float(ann['segment'][1]))
label_lst.append(activity_index[ann['label']])
ground_truth = pd.DataFrame({'video-id': video_lst,
't-start': t_start_lst,
't-end': t_end_lst,
'label': label_lst})
if self.verbose:
print(activity_index)
return ground_truth, activity_index
def _import_prediction(self, prediction_filename):
"""Reads prediction file, checks if it is well formatted, and returns
the prediction instances.
Parameters
----------
prediction_filename : str
Full path to the prediction json file.
Outputs
-------
prediction : df
Data frame containing the prediction instances.
"""
with open(prediction_filename, 'r') as fobj:
data = json.load(fobj)
# Checking format...
if not all([field in data.keys() for field in self.pred_fields]):
raise IOError('Please input a valid prediction file.')
# Read predictions.
video_lst, t_start_lst, t_end_lst = [], [], []
label_lst, score_lst = [], []
for videoid, v in data['results'].items():
if videoid in self.blocked_videos:
continue
for result in v:
label = self.activity_index[result['label']]
video_lst.append(videoid)
t_start_lst.append(float(result['segment'][0]))
t_end_lst.append(float(result['segment'][1]))
label_lst.append(label)
score_lst.append(result['score'])
prediction = pd.DataFrame({'video-id': video_lst,
't-start': t_start_lst,
't-end': t_end_lst,
'label': label_lst,
'score': score_lst})
return prediction
def _get_predictions_with_label(self, prediction_by_label, label_name, cidx):
"""Get all predicitons of the given label. Return empty DataFrame if there
is no predcitions with the given label.
"""
try:
return prediction_by_label.get_group(cidx).reset_index(drop=True)
except:
if self.verbose:
print ('Warning: No predictions of label \'%s\' were provdied.' % label_name)
return pd.DataFrame()
def wrapper_compute_average_precision(self):
"""Computes average precision for each class in the subset.
"""
ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))
# Adaptation to query faster
ground_truth_by_label = self.ground_truth.groupby('label')
prediction_by_label = self.prediction.groupby('label')
results = Parallel(n_jobs=len(self.activity_index))(
delayed(compute_average_precision_detection)(
ground_truth=ground_truth_by_label.get_group(cidx).reset_index(drop=True),
prediction=self._get_predictions_with_label(prediction_by_label, label_name, cidx),
tiou_thresholds=self.tiou_thresholds,
) for label_name, cidx in self.activity_index.items())
for i, cidx in enumerate(self.activity_index.values()):
ap[:,cidx] = results[i]
return ap
def evaluate(self):
"""Evaluates a prediction file. For the detection task we measure the
interpolated mean average precision to measure the performance of a
method.
"""
self.ap = self.wrapper_compute_average_precision()
self.mAP = self.ap.mean(axis=1)
self.average_mAP = self.mAP.mean()
if self.verbose:
print ('[RESULTS] Performance on ActivityNet detection task.')
print ('Average-mAP: {}'.format(self.average_mAP))
if len(self.mAP)==7:
print("-------------------------------------------------------------------------------")
print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
print("-------------------------------------------------------------------------------")
print('|mAP |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
print("-------------------------------------------------------------------------------")
print('|Average-mAP: {:.4f} Average mAP[0.1:0.5]:{:.4f} Average mAP[0.3:0.7]:{:.4f}'.
format(self.average_mAP, self.mAP[:5].mean(), self.mAP[2:7].mean()))
print("-------------------------------------------------------------------------------")
if len(self.mAP) == 10:
print("-------------------------------------------------------------------------------")
print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
print("-------------------------------------------------------------------------------")
print('|mAP |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
print("-------------------------------------------------------------------------------")
print('|Average-mAP[0.5:0.95]: {:.4f}'.
format(self.average_mAP))
print("-------------------------------------------------------------------------------")
return self.mAP, self.average_mAP
主要就是导入pred和gt
计算ap和map,详细计算过程后面有时间再更,看起来是从别处拿过来用的,保证预测的proposals和gt格式正确就没问题。
def evaluate(self):
"""Evaluates a prediction file. For the detection task we measure the
interpolated mean average precision to measure the performance of a
method.
"""
self.ap = self.wrapper_compute_average_precision()
self.mAP = self.ap.mean(axis=1)
self.average_mAP = self.mAP.mean()
if self.verbose:
print ('[RESULTS] Performance on ActivityNet detection task.')
print ('Average-mAP: {}'.format(self.average_mAP))
if len(self.mAP)==7:
print("-------------------------------------------------------------------------------")
print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
print("-------------------------------------------------------------------------------")
print('|mAP |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
print("-------------------------------------------------------------------------------")
print('|Average-mAP: {:.4f} Average mAP[0.1:0.5]:{:.4f} Average mAP[0.3:0.7]:{:.4f}'.
format(self.average_mAP, self.mAP[:5].mean(), self.mAP[2:7].mean()))
print("-------------------------------------------------------------------------------")
if len(self.mAP) == 10:
print("-------------------------------------------------------------------------------")
print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
print("-------------------------------------------------------------------------------")
print('|mAP |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
print("-------------------------------------------------------------------------------")
print('|Average-mAP[0.5:0.95]: {:.4f}'.
format(self.average_mAP))
print("-------------------------------------------------------------------------------")
return self.mAP, self.average_mAP
看compute_average_precision_detection函数,计算AP
def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)):
"""Compute average precision (detection task) between ground truth and
predictions data frames. If multiple predictions occurs for the same
predicted segment, only the one with highest score is matches as
true positive. This code is greatly inspired by Pascal VOC devkit.
Parameters
----------
ground_truth : df
Data frame containing the ground truth instances.
Required fields: ['video-id', 't-start', 't-end']
prediction : df
Data frame containing the prediction instances.
Required fields: ['video-id, 't-start', 't-end', 'score']
tiou_thresholds : 1darray, optional
Temporal intersection over union threshold.
Outputs
-------
ap : float
Average precision score.
"""
ap = np.zeros(len(tiou_thresholds))
if prediction.empty:
return ap
npos = float(len(ground_truth))
lock_gt = np.ones((len(tiou_thresholds),len(ground_truth))) * -1
# Sort predictions by decreasing score order.
sort_idx = prediction['score'].values.argsort()[::-1]
prediction = prediction.loc[sort_idx].reset_index(drop=True)
# Initialize true positive and false positive vectors.
tp = np.zeros((len(tiou_thresholds), len(prediction)))
fp = np.zeros((len(tiou_thresholds), len(prediction)))
# Adaptation to query faster
ground_truth_gbvn = ground_truth.groupby('video-id')
# Assigning true positive to truly grount truth instances.
for idx, this_pred in prediction.iterrows():
try:
# Check if there is at least one ground truth in the video associated.
ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id'])
except Exception as e:
fp[:, idx] = 1
continue
this_gt = ground_truth_videoid.reset_index()
tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values,
this_gt[['t-start', 't-end']].values)
# We would like to retrieve the predictions with highest tiou score.
tiou_sorted_idx = tiou_arr.argsort()[::-1]
for tidx, tiou_thr in enumerate(tiou_thresholds):
for jdx in tiou_sorted_idx:
if tiou_arr[jdx] < tiou_thr:
fp[tidx, idx] = 1
break
if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0:
continue
# Assign as true positive after the filters above.
tp[tidx, idx] = 1
lock_gt[tidx, this_gt.loc[jdx]['index']] = idx
break
if fp[tidx, idx] == 0 and tp[tidx, idx] == 0:
fp[tidx, idx] = 1
# 计算某个类别的AP曲线, tp和fp是按照置信度(前景分数)排序, 可以不同置信度以上算出一个点(纵坐标是precision,横坐标是 recall),通过累计得到一条AP曲线。七个IOU对应七条AP曲线。
tp_cumsum = np.cumsum(tp, axis=1).astype(np.float)
fp_cumsum = np.cumsum(fp, axis=1).astype(np.float)
recall_cumsum = tp_cumsum / npos
precision_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum)
for tidx in range(len(tiou_thresholds)):
ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:])
return ap