HR-Pro代码debug记录之一阶段测试

原创已于 2025-02-17 04:20:49 修改 · 431 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#pytorch #python

于 2024-07-01 13:14:58 首次发布

Temporal Action Localization 专栏收录该内容

4 篇文章

订阅专栏

先展示S-test函数

@torch.no_grad()
def S_test(net, args, test_loader, logger, step, test_info, subset='test'):
    net.eval()
    snippet_result = {}
    snippet_result['version'] = 'VERSION 1.3'
    snippet_result['results'] = {}
    snippet_result['external_data'] = {'used': True, 'details': 'Features from I3D Network'}
    if subset == 'train':
        snippet_result['bkg_score'] = {}
    num_correct = 0.
    num_total = 0.
    for sample in tqdm(test_loader):
        _data, _vid_label, _vid_name, _vid_len, _vid_duration = sample['data'], sample['vid_label'], sample['vid_name'], sample['vid_len'], sample['vid_duration']
        outputs = net(_data.to(args.device))
        _vid_score, _cas_fuse = outputs['vid_score'], outputs['cas_fuse']
        for b in range(_data.shape[0]):
            vid_name = _vid_name[b]
            vid_len = _vid_len[b].item()
            vid_duration = _vid_duration[b].item()
            # >> caculate video-level prediction
            label_np = _vid_label[b].unsqueeze(0).numpy()
            score_np = _vid_score[b].cpu().numpy()
            pred_np = np.zeros_like(score_np)
            pred_np[np.where(score_np < args.class_thresh)] = 0
            pred_np[np.where(score_np >= args.class_thresh)] = 1
            if pred_np.sum() == 0:
                pred_np[np.argmax(score_np)] = 1
            correct_pred = np.sum(label_np == pred_np, axis=1)
            num_correct += np.sum((correct_pred == args.num_class).astype(np.float32))
            num_total += correct_pred.shape[0]

            # >> post-process
            cas_fuse = _cas_fuse[b]
            num_segments = _data[b].shape[0]
            # class-specific score
            cas_S = cas_fuse[:, :-1]
            pred = np.where(score_np >= args.class_thresh)[0]
            if len(pred) == 0:
                pred = np.array([np.argmax(score_np)])
            cas_pred = cas_S.cpu().numpy()[:, pred]   
            cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
            cas_pred = utils.upgrade_resolution(cas_pred, args.scale)
            # class-agnostic score
            agnostic_score = 1 - cas_fuse[:, -1].unsqueeze(1)
            agnostic_score = agnostic_score.expand((-1, args.num_class))
            agnostic_score = agnostic_score.cpu().numpy()[:, pred]
            agnostic_score = np.reshape(agnostic_score, (num_segments, -1, 1))
            agnostic_score = utils.upgrade_resolution(agnostic_score, args.scale)

            # >> save output
            if subset == 'train':
                snippet_result['bkg_score'][vid_name] = cas_fuse[:, -1].cpu().numpy()
            
            # >> generate proposals
            proposal_dict = {}
            for i in range(len(args.act_thresh_cas)):
                cas_temp = cas_pred.copy()
                zero_location = np.where(cas_temp[:, :, 0] < args.act_thresh_cas[i])
                cas_temp[zero_location] = 0

                seg_list = []
                for c in range(len(pred)):
                    pos = np.where(cas_temp[:, c, 0] > 0)
                    seg_list.append(pos)
                proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
                for i in range(len(proposals)):
                    class_id = proposals[i][0][2]
                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []
                    proposal_dict[class_id] += proposals[i]

            for i in range(len(args.act_thresh_agnostic)):
                cas_temp = cas_pred.copy()
                agnostic_score_temp = agnostic_score.copy()
                zero_location = np.where(agnostic_score_temp[:, :, 0] < args.act_thresh_agnostic[i])
                agnostic_score_temp[zero_location] = 0

                seg_list = []
                for c in range(len(pred)):
                    pos = np.where(agnostic_score_temp[:, c, 0] > 0)
                    seg_list.append(pos)
                proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
                for i in range(len(proposals)):
                    class_id = proposals[i][0][2]
                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []
                    proposal_dict[class_id] += proposals[i]

            if args.mode == 'train' or args.mode == 'infer':
                final_proposals = utils.post_process(args, vid_name, proposal_dict, test_loader)
            else:
                final_proposals = []
                for class_id in proposal_dict.keys():
                    temp_proposal = proposal_dict[class_id]
                    final_proposals += temp_proposal
                final_proposals = utils.result2json(args, final_proposals)

            snippet_result['results'][vid_name] = final_proposals

    json_path = os.path.join(args.output_path_s1, 'snippet_result_{}.json'.format(subset, args.seed))
    with open(json_path, 'w') as f:
        json.dump(snippet_result, f, cls=NumpyArrayEncoder)
         
    if args.mode == 'train' or args.mode == 'infer':
        test_acc = num_correct / num_total
        print("TEST ACC:{:.4f}".format(test_acc))
        test_map = log_evaluate(args, step, test_acc, logger, json_path, test_info, subset)
        return test_map

初始化了snippet_result = {}，用于存放结果。

_data, _vid_label, _vid_name, _vid_len, _vid_duration = sample['data'], sample['vid_label'], sample['vid_name'], sample['vid_len'], sample['vid_duration']
outputs = net(_data.to(args.device))
_vid_score, _cas_fuse = outputs['vid_score'], outputs['cas_fuse']

模型推理得到输出，和训练的区别是
vid_score的计算方式有所不同

        if vid_labels is None:
            vid_score = torch.mean(topk_scores, dim=1)
        else:
            vid_score = (torch.mean(topk_scores, dim=1) * vid_labels) + \
                        (torch.mean(cas_S, dim=1) * (1 - vid_labels))

或许这是vid_loss权重为0的原因？待定


            # >> caculate video-level prediction
            label_np = _vid_label[b].unsqueeze(0).numpy()
            score_np = _vid_score[b].cpu().numpy()
            pred_np = np.zeros_like(score_np)
            pred_np[np.where(score_np < args.class_thresh)] = 0
            pred_np[np.where(score_np >= args.class_thresh)] = 1
            if pred_np.sum() == 0:
                pred_np[np.argmax(score_np)] = 1
            correct_pred = np.sum(label_np == pred_np, axis=1)
            num_correct += np.sum((correct_pred == args.num_class).astype(np.float32))
            num_total += correct_pred.shape[0]

根据vid_sroce对视频整体进行分类，如果分数大于阈值则包含该动作类别否则不包含

            # >> post-process
            cas_fuse = _cas_fuse[b]
            num_segments = _data[b].shape[0]
            # class-specific score
            cas_S = cas_fuse[:, :-1]
            pred = np.where(score_np >= args.class_thresh)[0]
            if len(pred) == 0:
                pred = np.array([np.argmax(score_np)])
            cas_pred = cas_S.cpu().numpy()[:, pred]   
            cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
            cas_pred = utils.upgrade_resolution(cas_pred, args.scale)

cas_pred是所有snippets里相应预测动作的具体分数，即class-specific score

def upgrade_resolution(arr, scale):
    x = np.arange(0, arr.shape[0])
    # 检查数组长度是否至少为2
    if len(x) < 2 or len(arr) < 2:
        print("错误：'x' 或 'arr' 数组的长度必须至少为2.")
        # 沿第一个维度叠加两个相同的数组
        arr = np.concatenate([arr, arr], axis=0)  # 结果形状将是 (2, 8, 1)
        x = np.arange(0, arr.shape[0])

    f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
    scale_x = np.arange(0, arr.shape[0], 1 / scale)
    up_scale = f(scale_x)
    return up_scale

前景分数，不区分类别，即class-agnostic score

# class-agnostic score
agnostic_score = 1 - cas_fuse[:, -1].unsqueeze(1)
agnostic_score = agnostic_score.expand((-1, args.num_class))
agnostic_score = agnostic_score.cpu().numpy()[:, pred]
agnostic_score = np.reshape(agnostic_score, (num_segments, -1, 1))
agnostic_score = utils.upgrade_resolution(agnostic_score, args.scale)

1 - cas_fuse[:, -1].unsqueeze(1)可以理解成1减去背景分数，即前景分数
pred是预测类别索引
这里其实是一个分数，只是复制成num_class数目，然后挑选预测类别
下面生成proposals
proposal_dict = {}定义空子典，解释放在注释里

            # 遍历
            for i in range(len(args.act_thresh_cas)):
                # copy class-specific score
                cas_temp = cas_pred.copy()
                # 查找分数小于一系列args.act_thresh_cas的索引
                zero_location = np.where(cas_temp[:, :, 0] < args.act_thresh_cas[i])
                cas_temp[zero_location] = 0

                seg_list = []
                # 查找pred类别里分数大于0的索引
                for c in range(len(pred)):
                    pos = np.where(cas_temp[:, c, 0] > 0)
                    seg_list.append(pos)
                # 到这一步是将低于设定阈值的snippets移除
                proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
                for i in range(len(proposals)):
                    class_id = proposals[i][0][2]
                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []
                    proposal_dict[class_id] += proposals[i]

看看生成proposal的函数

def get_proposal_oic(args, tList, wtcam, vid_score, c_pred, v_len, num_segments, v_duration):
    t_factor = float(16 * v_len) / ( args.scale * num_segments * args.frames_per_sec )
    temp = []
    # 遍历预测类别的snippets索引
    for i in range(len(tList)):
        c_temp = []
        temp_list = np.array(tList[i])[0]
        if temp_list.any():
            # 分割数组
            grouped_temp_list = grouping(temp_list)
            # 遍历
            for j in range(len(grouped_temp_list)):
                # 计算这一类分割split内的平均分数
                inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0])
                len_proposal = len(grouped_temp_list[j])
                # 左侧起点
                outer_s = max(0, int(grouped_temp_list[j][0] - args._lambda * len_proposal))
                # 右侧终点
                outer_e = min(int(wtcam.shape[0] - 1), int(grouped_temp_list[j][-1] + args._lambda * len_proposal))
                outer_temp_list = list(range(outer_s, int(grouped_temp_list[j][0]))) + \
                                    list(range(int(grouped_temp_list[j][-1] + 1), outer_e + 1))
                if len(outer_temp_list) == 0:
                    outer_score = 0
                # 外部平均分数
                else:
                    outer_score = np.mean(wtcam[outer_temp_list, i, 0])

                c_score = inner_score - outer_score + args.gamma * vid_score[c_pred[i]]
                # 这里是计算开始时间。特征数目 args.frames_per_sec * 持续时间 * scale / 16， 再乘t_factor也就是float(16 * v_len) / ( args.scale * num_segments * args.frames_per_sec ) 即视频持续时间，再乘对应索引可以得到对应秒数
                t_start = grouped_temp_list[j][0] * t_factor
                t_end = (grouped_temp_list[j][-1] + 1) * t_factor
                c_temp.append([t_start, t_end, c_pred[i], c_score])
            temp.append(c_temp)
    return temp

然后遍历proposals，加入proposals_dict

                for i in range(len(proposals)):
                    class_id = proposals[i][0][2]
                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []
                    # 不同的class_id当作key，value是对应的proposals_list
                    proposal_dict[class_id] += proposals[i]

下面代码类似，只不过是用前景阈值划分proposals

            for i in range(len(args.act_thresh_agnostic)):
                cas_temp = cas_pred.copy()
                agnostic_score_temp = agnostic_score.copy()
                zero_location = np.where(agnostic_score_temp[:, :, 0] < args.act_thresh_agnostic[i])
                agnostic_score_temp[zero_location] = 0

                seg_list = []
                for c in range(len(pred)):
                    pos = np.where(agnostic_score_temp[:, c, 0] > 0)
                    seg_list.append(pos)
                proposals = utils.get_proposal_oic(args, seg_list, cas_temp, score_np, pred, vid_len, num_segments, vid_duration)
                for i in range(len(proposals)):
                    class_id = proposals[i][0][2]
                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []
                    proposal_dict[class_id] += proposals[i]

不同mode不同操作

            if args.mode == 'train' or args.mode == 'infer':
                final_proposals = utils.post_process(args, vid_name, proposal_dict, test_loader)
            else:
                final_proposals = []
                for class_id in proposal_dict.keys():
                    temp_proposal = proposal_dict[class_id]
                    final_proposals += temp_proposal
                final_proposals = utils.result2json(args, final_proposals)

            snippet_result['results'][vid_name] = final_proposals

mode为’train’或’infer’执行utils.post_process函数，’test‘不进行soft_nms

def post_process(args, vid_name, proposal_dict, test_loader):
    final_proposals = []
    for class_id in proposal_dict.keys():
        temp_proposal = soft_nms(proposal_dict[class_id], sigma=0.3)
        final_proposals += temp_proposal
    if args.dataset == "THUMOS14":
        ambilist = test_loader.dataset.ambilist
        final_proposals = np.array(final_proposals)
        final_proposals = filter_segments(final_proposals, vid_name, ambilist)
    final_proposals = result2json(args, final_proposals)

    return final_proposals

看看soft_nms

def soft_nms(dets, iou_thr=0.7, method='gaussian', sigma=0.3):
    """
    Apply Soft NMS to a set of detection results.
    """
    # expand dets with areas, and the second dimension is
    # x1, x2, label, score, area
    dets = np.array(dets)
    # 持续时间+1
    areas = dets[:, 1] - dets[:, 0] + 1
    dets = np.concatenate((dets, areas[:, None]), axis=1)

    retained_box = []
    while dets.size > 0:
        # 找出得分最高的索引
        max_idx = np.argmax(dets[:, 3], axis=0)
        # 将得分最高的proposal放在首位
        dets[[0, max_idx], :] = dets[[max_idx, 0], :]
        # 添加到列表里
        retained_box.append(dets[0, :-1].tolist())
        # 计算得分最高的proposal的起始坐标（dets[0, 0]）与其他所有框的起始坐标（dets[1:, 0]）的最大值。结果是一个数组，每个元素是得分最高的框与另一个框在轴上重叠区域的起始点。
        xx1 = np.maximum(dets[0, 0], dets[1:, 0])
        # 结尾
        xx2 = np.minimum(dets[0, 1], dets[1:, 1])
        # 计算交集宽度
        inter = np.maximum(xx2 - xx1 + 1, 0.0)
        # 计算交并比
        iou = inter / (dets[0, -1] + dets[1:, -1] - inter)

        if method == 'linear':
            weight = np.ones_like(iou)
            weight[iou > iou_thr] -= iou[iou > iou_thr]
        elif method == 'gaussian':
            weight = np.exp(-(iou * iou) / sigma)
        else:  # traditional nms
            weight = np.ones_like(iou)
            weight[iou > iou_thr] = 0
        # 给剩下的proposals乘weight改变得分，iou高相应的weight较小，反之iou小weight较大。因为iou过大说明两个框高度重叠没有意义
        dets[1:, 3] *= weight
        # 移除掉得分最高的proposal，开始下一轮
        dets = dets[1:, :]

    return retained_box

当数据集为’THUMOS14’，执行filter_segments函数

def filter_segments(segment_predict, vn, ambilist):
    """
    Filter out segments overlapping with ambiguous_test segments.
    """
    num_segment = len(segment_predict)
    ind = np.zeros(num_segment)
    for i in range(num_segment):
        for a in ambilist:
            if a[0] == vn:
                gt = range(int(round(float(a[2]) )), int(round(float(a[3]) )))
                pd = range(int(segment_predict[i][0]), int(segment_predict[i][1]))
                IoU = float(len(set(gt).intersection(set(pd)))) / float(len(set(gt).union(set(pd))))
                if IoU > 0:
                    ind[i] = 1
    s = [segment_predict[i, :] for i in range(num_segment) if ind[i] == 0]
    return np.array(s)

这个函数通过计算IoU来识别和过滤掉那些与已知模糊或不明确区间重叠的视频段，最终返回没有重叠的视频段。
结果保存到json文件里

json_path = os.path.join(args.output_path_s1, 'snippet_result_{}.json'.format(subset, args.seed))
with open(json_path, 'w') as f:
        json.dump(snippet_result, f, cls=NumpyArrayEncoder)

当mode='train’或’infer’时，执行准确率测试，计算map

if args.mode == 'train' or args.mode == 'infer':
        test_acc = num_correct / num_total
        print("TEST ACC:{:.4f}".format(test_acc))
        test_map = log_evaluate(args, step, test_acc, logger, json_path, test_info, subset)
        return test_map

看看计算函数

def log_evaluate(args, step, test_acc, logger, json_path, test_info, subset='test'):
    # >> evaluate mAP
    mapping_subset = {'THUMOS14':{'train':'Validation', 'test':'Test'},'GTEA':{'train':'training', 'test':'validation'},'BEOID':{'train':'training', 'test':'validation'},'ActivityNet1.3':{'train':'train', 'test':'val'}}
    subset_name = mapping_subset[args.dataset][subset]
    gt_path = os.path.join(args.data_path, "gt_full.json")
    anet_detection = ANETdetection(gt_path, json_path, subset=subset_name, tiou_thresholds=args.tIoU_thresh,
                                    verbose=False, check_status=False, blocked_videos=args.blocked_videos)
    mAP, _ = anet_detection.evaluate()

    # >> log mAP
    if args.cfg == 'thumos' or args.cfg == 'gtea' or args.cfg == 'beoid':
        log_folder = 'acc'
        test_info['step'].append(step)
        test_info['test_acc'].append(test_acc)
        if logger is not None:
            logger.log_value('{}/Test accuracy'.format(log_folder), test_acc, step)

        test_info["average_mAP[0.1:0.7]"].append(mAP[:7].mean())
        test_info["average_mAP[0.1:0.5]"].append(mAP[:5].mean())
        test_info["average_mAP[0.3:0.7]"].append(mAP[2:7].mean())
        for i in range(len(args.tIoU_thresh)):
            test_info["mAP@{:.1f}".format(args.tIoU_thresh[i])].append(mAP[i])

        if logger is not None:
            logger.log_value('{}/average mAP[0.1:0.7]'.format(log_folder), mAP[:7].mean(), step)
            logger.log_value('{}/average mAP[0.1:0.5]'.format(log_folder), mAP[:5].mean(), step)
            logger.log_value('{}/average mAP[0.3:0.7]'.format(log_folder), mAP[2:7].mean(), step)
            for i in range(len(args.tIoU_thresh)):
                logger.log_value('{}/mAP@{:.1f}'.format(log_folder, args.tIoU_thresh[i]), mAP[i], step)

            return test_info["average_mAP[0.1:0.7]"][-1]

实例化ANETdetection

class ANETdetection(object):
    GROUND_TRUTH_FIELDS = ['database']
    # GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
    PREDICTION_FIELDS = ['results', 'version', 'external_data']

    def __init__(self, ground_truth_filename=None, prediction_filename=None,
                 ground_truth_fields=GROUND_TRUTH_FIELDS,
                 prediction_fields=PREDICTION_FIELDS,
                 tiou_thresholds=np.linspace(0.5, 0.95, 10), 
                 subset='validation', verbose=False, 
                 check_status=False,
                 blocked_videos=[]):
        if not ground_truth_filename:
            raise IOError('Please input a valid ground truth file.')
        if not prediction_filename:
            raise IOError('Please input a valid prediction file.')
        self.subset = subset
        self.tiou_thresholds = tiou_thresholds
        self.verbose = verbose
        self.gt_fields = ground_truth_fields
        self.pred_fields = prediction_fields
        self.ap = None
        self.check_status = check_status
        # Retrieve blocked videos from server.

        # if self.check_status:
        #     self.blocked_videos = get_blocked_videos()
        # else:
        #     self.blocked_videos = list()
        self.blocked_videos=blocked_videos

        # Import ground truth and predictions.
        self.ground_truth, self.activity_index = self._import_ground_truth(
            ground_truth_filename)
        self.prediction = self._import_prediction(prediction_filename)

        if self.verbose:
            print ('[INIT] Loaded annotations from {} subset.'.format(subset))
            nr_gt = len(self.ground_truth)
            print ('\tNumber of ground truth instances: {}'.format(nr_gt))
            nr_pred = len(self.prediction)
            print ('\tNumber of predictions: {}'.format(nr_pred))
            print ('\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds))

    
    def _import_ground_truth(self, ground_truth_filename):
        """Reads ground truth file, checks if it is well formatted, and returns
           the ground truth instances and the activity classes.

        Parameters
        ----------
        ground_truth_filename : str
            Full path to the ground truth json file.

        Outputs
        -------
        ground_truth : df
            Data frame containing the ground truth instances.
        activity_index : dict
            Dictionary containing class index.
        """
        with open(ground_truth_filename, 'r') as fobj:
            data = json.load(fobj)
        # Checking format
        if not all([field in data.keys() for field in self.gt_fields]):
            raise IOError('Please input a valid ground truth file.')

        # Read ground truth data.
        activity_index, cidx = {}, 0
        video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
        for videoid, v in data['database'].items():
            # print(v)
            if self.subset != v['subset']:
                continue
            if videoid in self.blocked_videos:
                continue
            for ann in v['annotations']:
                if ann['label'] not in activity_index:
                    activity_index[ann['label']] = cidx
                    cidx += 1
                video_lst.append(videoid)
                t_start_lst.append(float(ann['segment'][0]))
                t_end_lst.append(float(ann['segment'][1]))
                label_lst.append(activity_index[ann['label']])

        ground_truth = pd.DataFrame({'video-id': video_lst,
                                     't-start': t_start_lst,
                                     't-end': t_end_lst,
                                     'label': label_lst})
        if self.verbose:
            print(activity_index)
        return ground_truth, activity_index

    def _import_prediction(self, prediction_filename):
        """Reads prediction file, checks if it is well formatted, and returns
           the prediction instances.

        Parameters
        ----------
        prediction_filename : str
            Full path to the prediction json file.

        Outputs
        -------
        prediction : df
            Data frame containing the prediction instances.
        """
        with open(prediction_filename, 'r') as fobj:
            data = json.load(fobj)
        # Checking format...
        if not all([field in data.keys() for field in self.pred_fields]):
            raise IOError('Please input a valid prediction file.')

        # Read predictions.
        video_lst, t_start_lst, t_end_lst = [], [], []
        label_lst, score_lst = [], []
        for videoid, v in data['results'].items():
            if videoid in self.blocked_videos:
                continue
            for result in v:
                label = self.activity_index[result['label']]
                video_lst.append(videoid)
                t_start_lst.append(float(result['segment'][0]))
                t_end_lst.append(float(result['segment'][1]))
                label_lst.append(label)
                score_lst.append(result['score'])
        prediction = pd.DataFrame({'video-id': video_lst,
                                   't-start': t_start_lst,
                                   't-end': t_end_lst,
                                   'label': label_lst,
                                   'score': score_lst})
        return prediction

    def _get_predictions_with_label(self, prediction_by_label, label_name, cidx):
        """Get all predicitons of the given label. Return empty DataFrame if there
        is no predcitions with the given label.
        """
        try:
            return prediction_by_label.get_group(cidx).reset_index(drop=True)
        except:
            if self.verbose:
                print ('Warning: No predictions of label \'%s\' were provdied.' % label_name)
            return pd.DataFrame()

    def wrapper_compute_average_precision(self):
        """Computes average precision for each class in the subset.
        """
        ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))

        # Adaptation to query faster
        ground_truth_by_label = self.ground_truth.groupby('label')
        prediction_by_label = self.prediction.groupby('label')

        results = Parallel(n_jobs=len(self.activity_index))(
                    delayed(compute_average_precision_detection)(
                        ground_truth=ground_truth_by_label.get_group(cidx).reset_index(drop=True),
                        prediction=self._get_predictions_with_label(prediction_by_label, label_name, cidx),
                        tiou_thresholds=self.tiou_thresholds,
                    ) for label_name, cidx in self.activity_index.items())

        for i, cidx in enumerate(self.activity_index.values()):
            ap[:,cidx] = results[i]

        return ap

    def evaluate(self):
        """Evaluates a prediction file. For the detection task we measure the
        interpolated mean average precision to measure the performance of a
        method.
        """
        self.ap = self.wrapper_compute_average_precision()

        self.mAP = self.ap.mean(axis=1)
        self.average_mAP = self.mAP.mean()

        if self.verbose:
            print ('[RESULTS] Performance on ActivityNet detection task.')
            print ('Average-mAP: {}'.format(self.average_mAP))
        if len(self.mAP)==7:
            print("-------------------------------------------------------------------------------")
            print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
            print("-------------------------------------------------------------------------------")
            print('|mAP   |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
            print("-------------------------------------------------------------------------------")
            print('|Average-mAP: {:.4f} Average mAP[0.1:0.5]:{:.4f} Average mAP[0.3:0.7]:{:.4f}'.
                  format(self.average_mAP, self.mAP[:5].mean(), self.mAP[2:7].mean()))
            print("-------------------------------------------------------------------------------")
        if len(self.mAP) == 10:
            print("-------------------------------------------------------------------------------")
            print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
            print("-------------------------------------------------------------------------------")
            print('|mAP   |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
            print("-------------------------------------------------------------------------------")
            print('|Average-mAP[0.5:0.95]: {:.4f}'.
                  format(self.average_mAP))
            print("-------------------------------------------------------------------------------")
            
        return self.mAP, self.average_mAP

主要就是导入pred和gt
计算ap和map，详细计算过程后面有时间再更，看起来是从别处拿过来用的，保证预测的proposals和gt格式正确就没问题。

    def evaluate(self):
        """Evaluates a prediction file. For the detection task we measure the
        interpolated mean average precision to measure the performance of a
        method.
        """
        self.ap = self.wrapper_compute_average_precision()

        self.mAP = self.ap.mean(axis=1)
        self.average_mAP = self.mAP.mean()

        if self.verbose:
            print ('[RESULTS] Performance on ActivityNet detection task.')
            print ('Average-mAP: {}'.format(self.average_mAP))
        if len(self.mAP)==7:
            print("-------------------------------------------------------------------------------")
            print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
            print("-------------------------------------------------------------------------------")
            print('|mAP   |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
            print("-------------------------------------------------------------------------------")
            print('|Average-mAP: {:.4f} Average mAP[0.1:0.5]:{:.4f} Average mAP[0.3:0.7]:{:.4f}'.
                  format(self.average_mAP, self.mAP[:5].mean(), self.mAP[2:7].mean()))
            print("-------------------------------------------------------------------------------")
        if len(self.mAP) == 10:
            print("-------------------------------------------------------------------------------")
            print('|t-IoU |{}|'.format("||".join(["{:.3f}".format(item) for item in self.tiou_thresholds])))
            print("-------------------------------------------------------------------------------")
            print('|mAP   |{}|'.format("||".join(["{:.3f}".format(item) for item in self.mAP])))
            print("-------------------------------------------------------------------------------")
            print('|Average-mAP[0.5:0.95]: {:.4f}'.
                  format(self.average_mAP))
            print("-------------------------------------------------------------------------------")
            
        return self.mAP, self.average_mAP

看compute_average_precision_detection函数,计算AP

def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)):
    """Compute average precision (detection task) between ground truth and
    predictions data frames. If multiple predictions occurs for the same
    predicted segment, only the one with highest score is matches as
    true positive. This code is greatly inspired by Pascal VOC devkit.

    Parameters
    ----------
    ground_truth : df
        Data frame containing the ground truth instances.
        Required fields: ['video-id', 't-start', 't-end']
    prediction : df
        Data frame containing the prediction instances.
        Required fields: ['video-id, 't-start', 't-end', 'score']
    tiou_thresholds : 1darray, optional
        Temporal intersection over union threshold.

    Outputs
    -------
    ap : float
        Average precision score.
    """
    ap = np.zeros(len(tiou_thresholds))
    if prediction.empty:
        return ap

    npos = float(len(ground_truth))
    lock_gt = np.ones((len(tiou_thresholds),len(ground_truth))) * -1
    # Sort predictions by decreasing score order.
    sort_idx = prediction['score'].values.argsort()[::-1]
    prediction = prediction.loc[sort_idx].reset_index(drop=True)

    # Initialize true positive and false positive vectors.
    tp = np.zeros((len(tiou_thresholds), len(prediction)))
    fp = np.zeros((len(tiou_thresholds), len(prediction)))

    # Adaptation to query faster
    ground_truth_gbvn = ground_truth.groupby('video-id')

    # Assigning true positive to truly grount truth instances.
    for idx, this_pred in prediction.iterrows():

        try:
            # Check if there is at least one ground truth in the video associated.
            ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id'])
        except Exception as e:
            fp[:, idx] = 1
            continue

        this_gt = ground_truth_videoid.reset_index()
        tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values,
                               this_gt[['t-start', 't-end']].values)
        # We would like to retrieve the predictions with highest tiou score.
        tiou_sorted_idx = tiou_arr.argsort()[::-1]
        for tidx, tiou_thr in enumerate(tiou_thresholds):
            for jdx in tiou_sorted_idx:
                if tiou_arr[jdx] < tiou_thr:
                    fp[tidx, idx] = 1
                    break
                if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0:
                    continue
                # Assign as true positive after the filters above.
                tp[tidx, idx] = 1
                lock_gt[tidx, this_gt.loc[jdx]['index']] = idx
                break
            
            if fp[tidx, idx] == 0 and tp[tidx, idx] == 0:
                fp[tidx, idx] = 1
    # 计算某个类别的AP曲线， tp和fp是按照置信度（前景分数）排序, 可以不同置信度以上算出一个点（纵坐标是precision，横坐标是 recall），通过累计得到一条AP曲线。七个IOU对应七条AP曲线。
    tp_cumsum = np.cumsum(tp, axis=1).astype(np.float)
    fp_cumsum = np.cumsum(fp, axis=1).astype(np.float)
    recall_cumsum = tp_cumsum / npos

    precision_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum)

    for tidx in range(len(tiou_thresholds)):
        ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:])


    return ap