exlpose

原创
已于 2024-12-15 15:35:55 修改 · 252 阅读
0 ·
CC 4.0 BY-SA版权
文章标签：
#python
于 2024-12-15 13:55:32 首次发布
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com)
# ------------------------------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from collections import defaultdict
from collections import OrderedDict
import logging
import os
import os.path

import cv2
import json_tricks as json
import numpy as np
from torch.utils.data import Dataset

from crowdposetools.cocoeval import COCOeval
# from dataset.JointsDataset import JointsDataset

from dataset.JointsDataset import JointsDataset
from nms.nms import oks_nms
from nms.nms import soft_oks_nms

# -------------------------------------------
crowdpose_sigmas = np.array([.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .25, .25]) / 10.0

# -------------------------------------------

logger = logging.getLogger(__name__)


class EXLPOSEDataset(JointsDataset):
    """`CrowdPose`_ Dataset.

    Args:
        root (string): Root directory where dataset is located to.
        dataset (string): Dataset name(train2017, val2017, test2017).
        data_format(string): Data format for reading('jpg', 'zip')
        transform (callable, optional): A function/transform that  takes in an opencv image
            and returns a transformed version. E.g, ``transforms.ToTensor``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
    """

    def __init__(self, cfg, root, image_set, is_train, transform=None):
        super().__init__(cfg, root, image_set, is_train, transform=None)

        # def __init__(self, root, dataset, data_format, transform=None,
        #              target_transform=None):
        from crowdposetools.coco import COCO
        self.nms_thre = cfg.TEST.NMS_THRE
        self.image_thre = cfg.TEST.IMAGE_THRE
        self.soft_nms = cfg.TEST.SOFT_NMS
        self.oks_thre = cfg.TEST.OKS_THRE
        self.in_vis_thre = cfg.TEST.IN_VIS_THRE
        self.bbox_file = cfg.TEST.COCO_BBOX_FILE
        self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
        self.image_width = cfg.MODEL.IMAGE_SIZE[0]
        self.image_height = cfg.MODEL.IMAGE_SIZE[1]
        self.aspect_ratio = self.image_width * 1.0 / self.image_height
        self.pixel_std = 200
        self.root = cfg.DATASET.ROOT
        # self.scale_thre = cfg.TEST.SCALE_THRE

        # self.dataset_type = dataset_type
        # self.coco_WL = COCO(self._get_anno_file_keypoint()[0])
        # self.coco_LL = COCO(self._get_anno_file_keypoint()[1])
        self.coco = COCO(self._get_anno_file_keypoint())
        # self.ids_WL = list(self.coco_WL.imgs.keys())
        self.ids = list(self.coco.imgs.keys())
        self.transform = transform

        cats = [cat['name']
                for cat in self.coco.loadCats(self.coco.getCatIds())]
        self.classes = ['__background__'] + cats
        logger.info('=> classes: {}'.format(self.classes))
        self.num_classes = len(self.classes)
        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
        self._coco_ind_to_class_ind = dict(
            [
                (self._class_to_coco_ind[cls], self._class_to_ind[cls])
                for cls in self.classes[1:]
            ]
        )

        # load image file names
        self.image_set_index = self._load_image_set_index()
        self.num_images = len(self.image_set_index)
        logger.info('=> num_images: {}'.format(self.num_images))

        self.num_joints = 14
        self.flip_pairs = [[0, 1], [2, 3], [4, 5], [6, 7],
                           [8, 9], [10, 11]]
        self.parent_ids = None
        self.upper_body_ids = (0, 1, 2, 3, 4, 5, 12, 13)
        self.lower_body_ids = (6, 7, 8, 9, 10, 11)

        self.joints_weight = np.array(
            [
                1., 1., 1.2, 1.2,
                1.5, 1.5, 1., 1.,
                1.2, 1.2, 1.5, 1.5,
                1., 1.
            ],
            dtype=np.float32
        ).reshape((self.num_joints, 1))

        self.db = self._get_db()

        if is_train and cfg.DATASET.SELECT_DATA:
            self.db = self.select_data(self.db)

        logger.info('=> load {} samples'.format(len(self.db)))

        return

    def _load_image_set_index(self):
        """ image id: int """
        image_ids = self.coco.getImgIds()
        return image_ids

    def _get_anno_file_keypoint(self):
        """ self.root / annotations / person_keypoints_train2017.json """
        prefix = 'ExLPose' \
            if 'test' not in self.image_set else 'ExLPose'
        return os.path.join(
            self.root,
            'Annotations',
            prefix + '_' + self.image_set + '.json'
        )

    def _get_db(self):
        if self.is_train or self.use_gt_bbox:
            # use ground truth bbox
            gt_db = self._load_coco_keypoint_annotations()
        else:
            # use bbox from detection
            gt_db = self._load_coco_person_detection_results()
        return gt_db

    def _load_coco_keypoint_annotations(self):
        """ ground truth bbox and keypoints """
        gt_db = []
        for index in self.image_set_index:
            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
        return gt_db

    # def _get_image_path(self, file_name):
    #     # return os.path.join(self.image_dir, file_name)
    #     images_dir = os.path.join(self.root, 'ExLPose')
    #     return os.path.join(images_dir, file_name)

    def image_path_from_index(self, index):
        """ example: images / train2017 / 000000119993.jpg """
        # print('index',index)
        # file_name = '%012d.jpg' % index
        # print('file_name', file_name)
        # load well-light image
        # coco_WL = self.coco_WL
        # img_id_WL = self.ids_WL[index]
        # ann_ids_WL = coco_WL.getAnnIds(imgIds=img_id_WL)
        # target_WL = coco_WL.loadAnns(ann_ids_WL)
        # image_info_WL = coco_WL.loadImgs(img_id_WL)[0]
        # file_name_WL = image_info_WL['file_name']

        # load unpaired low-lit image during training
        coco_LL = self.coco
        index_LL = np.random.randint(len(self.ids))
        img_id_LL = self.ids[index_LL]

        ann_ids_LL = coco_LL.getAnnIds(imgIds=img_id_LL)
        # target_LL = coco_LL.loadAnns(ann_ids_LL)
        image_info_LL = coco_LL.loadImgs(img_id_LL)[0]
        file_name_LL = image_info_LL['file_name']

        # image_path_wl = os.path.join(self.root, file_name_WL)
        image_path_ll = os.path.join(self.root, file_name_LL)

        return image_path_ll

    def _load_coco_keypoint_annotation_kernal(self, index):
        """
        coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
        iscrowd:
            crowd instances are handled by marking their overlaps with all categories to -1
            and later excluded in training
        bbox:
            [x1, y1, w, h]
        :param index: coco image id
        :return: db entry
        """
        # im_ann_wl = self.coco.loadImgs(index)[0]
        im_ann_ll = self.coco.loadImgs(index)

        # print :im_ann_ll:im_ann_ll[0]
        # print('im_ann_ll[0]',im_ann_ll[0])
        #im_ann_ll[0] {'file_name': 'dark/imgs_0209_vid000011_exp1200_dark_000052__gain_5.60_exposure_333.png', 'id': 2527, 'height': 1199, 'width': 1919, 'crowdIndex': 0}

        #print :im_ann_ll
        # print('im_ann_ll',im_ann_ll)
        #im_ann_ll [{'file_name': 'dark/imgs_0209_vid000011_exp1200_dark_000052__gain_5.60_exposure_333.png', 'id': 2527, 'height': 1199, 'width': 1919, 'crowdIndex': 0}]
        width = im_ann_ll[0]['width']
        height = im_ann_ll[0]['height']
        # image_id=im_ann_ll[0]['image_id']

        annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
        objs = self.coco.loadAnns(annIds)
        # print('objs',objs)
        # objs[{'num_keypoints': 13, 'iscrowd': 0,
        #       'keypoints': [1171.24, 545.66, 2, 1195.69, 545.66, 2, 0, 0, 0, 1236.45, 586.41, 2, 1255.47, 564.68, 2,
        #                     1277.2, 567.39, 2, 1192.98, 635.32, 2, 1214.71, 640.75, 2, 1214.71, 733.13, 2, 1222.86,
        #                     733.13, 2, 1217.43, 806.49, 2, 1217.43, 809.2, 2, 1192.98, 491.32, 2, 1190.26, 532.07, 2],
        #       'image_id': 675, 'bbox': [1157.04, 487.24, 117.05999999999995, 346.77], 'category_id': 1, 'id': 934}, {
        #          'num_keypoints': 14, 'iscrowd': 0,
        #          'keypoints': [1013.08, 524.59, 2, 1068.17, 518.79, 2, 1004.38, 591.28, 2, 1091.36, 565.18, 2, 1004.38,
        #                        649.27, 2, 1091.36, 608.68, 2, 1027.57, 620.28, 2, 1065.27, 620.28, 2, 1033.37, 707.26,
        #                        2, 1071.07, 704.36, 2, 1039.17, 785.55, 2, 1065.27, 782.65, 2, 1050.77, 455.0, 2,
        #                        1044.97, 501.39, 2], 'image_id': 675,
        #          'bbox': [996.75, 447.75, 92.54999999999995, 370.15], 'category_id': 1, 'id': 935}, {
        #          'num_keypoints': 14, 'iscrowd': 0,
        #          'keypoints': [938.28, 632.51, 2, 990.4, 632.51, 2, 929.6, 672.47, 2, 1004.3, 674.21, 2, 933.07, 702.0,
        #                        2, 1000.83, 696.79, 2, 950.44, 714.17, 2, 985.19, 714.17, 2, 957.39, 757.6, 2, 983.45,
        #                        757.6, 2, 960.87, 794.08, 2, 979.98, 794.08, 2, 964.34, 576.92, 2, 964.34, 616.88, 2],
        #          'image_id': 675, 'bbox': [931.86, 586.48, 76.12, 221.37], 'category_id': 1, 'id': 936}, {
        #          'num_keypoints': 6, 'iscrowd': 0,
        #          'keypoints': [0, 0, 0, 310.4, 581.7, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319.1, 683.5, 2, 354.8,
        #                        682.1, 2, 0, 0, 0, 338.9, 754.5, 2, 0, 0, 0, 343.2, 831.3, 2, 0, 0, 0, 286.8, 574.0, 2],
        #          'image_id': 675, 'bbox': [278.3, 560.83, 91.75, 294.74], 'category_id': 1, 'id': 937}, {
        #          'num_keypoints': 14, 'iscrowd': 0,
        #          'keypoints': [800.73, 497.26, 2, 704.74, 504.94, 2, 831.45, 562.54, 2, 681.7, 570.22, 2, 816.09,
        #                        589.42, 2, 735.46, 585.58, 2, 796.89, 643.17, 2, 735.46, 647.01, 2, 796.89, 735.33, 2,
        #                        743.13, 743.01, 2, 777.69, 808.28, 2, 758.49, 835.16, 2, 750.81, 408.95, 2, 750.81,
        #                        478.06, 2], 'image_id': 675, 'bbox': [668.02, 399.45, 172.63, 477.93], 'category_id': 1,
        #          'id': 938}, {'num_keypoints': 12, 'iscrowd': 0,
        #                       'keypoints': [384.14, 555.52, 2, 459.02, 558.52, 2, 375.15, 609.44, 2, 465.01, 612.44, 2,
        #                                     0, 0, 0, 0, 0, 0, 402.11, 669.35, 2, 444.05, 663.36, 2, 399.12, 741.23, 2,
        #                                     441.05, 738.24, 2, 405.11, 819.11, 2, 435.06, 816.12, 2, 414.09, 486.63, 2,
        #                                     420.08, 528.57, 2], 'image_id': 675,
        #                       'bbox': [350.28, 476.15, 123.63000000000005, 382.4], 'category_id': 1, 'id': 939}, {
        #          'num_keypoints': 12, 'iscrowd': 0,
        #          'keypoints': [1081.23, 550.46, 2, 1124.05, 550.46, 2, 0, 0, 0, 1161.15, 581.86, 2, 0, 0, 0, 1149.74,
        #                        541.9, 2, 1089.79, 647.52, 2, 1129.76, 650.37, 2, 1115.48, 727.44, 2, 1135.46, 730.3, 2,
        #                        1122.2, 814.1, 2, 1130.9, 811.8, 2, 1109.77, 487.67, 2, 1106.92, 533.34, 2],
        #          'image_id': 675, 'bbox': [1062.76, 483.38, 118.72000000000003, 364.38], 'category_id': 1, 'id': 940}, {
        #          'num_keypoints': 14, 'iscrowd': 0,
        #          'keypoints': [927.8, 528.25, 2, 843.73, 525.01, 2, 950.44, 589.68, 2, 814.63, 579.98, 2, 934.27,
        #                        644.66, 2, 827.56, 647.89, 2, 914.87, 651.12, 2, 856.67, 651.12, 2, 914.87, 735.19, 2,
        #                        885.77, 728.73, 2, 901.94, 809.57, 2, 895.47, 825.73, 2, 876.07, 453.88, 2, 882.53,
        #                        512.08, 2], 'image_id': 675, 'bbox': [821.0, 449.03, 144.70000000000005, 412.89],
        #          'category_id': 1, 'id': 941}, {'num_keypoints': 5, 'iscrowd': 0,
        #                                         'keypoints': [492.7, 521.0, 2, 480.16, 510.64, 2, 495.5, 587.2, 2, 0, 0,
        #                                                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        #                                                       0, 0, 0, 0, 0, 0, 431.6, 470.7, 2, 467.6, 506.2, 2],
        #                                         'image_id': 675,
        #                                         'bbox': [422.0, 462.48, 95.29999999999995, 246.51999999999998],
        #                                         'category_id': 1, 'id': 942}]

        # sanitize bboxes
        valid_objs = []
        for obj in objs:

            x, y, w, h = obj['bbox']
            x1 = np.max((0, x))
            y1 = np.max((0, y))
            x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
            y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
            if x2 >= x1 and y2 >= y1:
                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
                valid_objs.append(obj)
        objs = valid_objs

        rec = []
        for obj in objs:
            image_id=obj['image_id']
            print('image_id',image_id)#image_id 0,image_id 2
            cls = self._coco_ind_to_class_ind[obj['category_id']]
            if cls != 1:
                continue

            # ignore objs without keypoints annotation
            if max(obj['keypoints']) == 0:
                continue

            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float)
            for ipt in range(self.num_joints):
                joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
                joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
                joints_3d[ipt, 2] = 0
                t_vis = obj['keypoints'][ipt * 3 + 2]
                if t_vis > 1:
                    t_vis = 1
                joints_3d_vis[ipt, 0] = t_vis
                joints_3d_vis[ipt, 1] = t_vis
                joints_3d_vis[ipt, 2] = 0

            center, scale = self._box2cs(obj['clean_bbox'][:4])
            # image_file_name = im_ann_ll['file_name'].split('/')[-1]
            # image_path = os.path.join(self.image_dir, image_file_name)
            print('self.image_path_from_index(index)',self.image_path_from_index(index))

            rec.append({
                'image': self.image_path_from_index(index),
                'image_id': image_id,
                'center': center,
                'scale': scale,
                'joints_3d': joints_3d,
                'joints_3d_vis': joints_3d_vis,
                'filename': '',
                'imgnum': 0,
                # 'annotation_id': obj['id']
            })

        return rec

    def _box2cs(self, box):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h)

    def _xywh2cs(self, x, y, w, h):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5

        if w > self.aspect_ratio * h:
            h = w * 1.0 / self.aspect_ratio
        elif w < self.aspect_ratio * h:
            w = h * self.aspect_ratio
        scale = np.array(
            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
            dtype=np.float32)
        if center[0] != -1:
            scale = scale * 1.25
            # scale = scale * self.scale_thre

        return center, scale

    def _load_coco_person_detection_results(self):
        all_boxes = None
        with open(self.bbox_file, 'r') as f:
            all_boxes = json.load(f)

        if not all_boxes:
            logger.error('=> Load %s fail!' % self.bbox_file)
            return None

        logger.info('=> Total boxes: {}'.format(len(all_boxes)))

        kpt_db = []
        num_boxes = 0
        for n_img in range(0, len(all_boxes)):
            det_res = all_boxes[n_img]
            if det_res['category_id'] != 1:
                continue
            img_name = self.image_path_from_index(det_res['image_id'])
            image_id = det_res['image_id']
            box = det_res['bbox']
            score = det_res['score']

            if score < self.image_thre:
                continue

            num_boxes = num_boxes + 1

            center, scale = self._box2cs(box)
            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
            joints_3d_vis = np.ones(
                (self.num_joints, 3), dtype=np.float)
            kpt_db.append({
                'image': img_name,
                'image_id': image_id,
                'center': center,
                'scale': scale,
                'score': score,
                'joints_3d': joints_3d,
                'joints_3d_vis': joints_3d_vis,
            })

        logger.info('=> Total boxes after filter low score@{}: {}'.format(
            self.image_thre, num_boxes))
        return kpt_db



    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Root Location: {}\n'.format(self.root)
        # tmp = '    Transforms (if any): '
        # fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        # tmp = '    Target Transforms (if any): '
        # fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        return fmt_str

    def processKeypoints(self, keypoints):
        tmp = keypoints.copy()
        if keypoints[:, 2].max() > 0:
            p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0)
            num_keypoints = keypoints.shape[0]
            for i in range(num_keypoints):
                tmp[i][0:3] = [
                    float(keypoints[i][0]),
                    float(keypoints[i][1]),
                    float(keypoints[i][2])
                ]

        return tmp

    # def evaluate(self, cfg, preds, scores, output_dir,
    #              *args, **kwargs):
    def evaluate(self, cfg, preds, output_dir, all_boxes, image_id,
                 *args, **kwargs):
        rank = cfg.RANK

        # if all_boxes.shape[1] == 8:
        #     return self.evaluate_lambda(cfg, preds, output_dir, all_boxes, img_path, epoch, *args, **kwargs)

        # print('img_path', img_path)#'/media/zou/D/dataset/ExLPose/dark/imgs_0206_vid000012_exp400_dark_000052__gain_4.10_exposure_1250.png'
        res_folder = os.path.join(output_dir, 'results')
        if not os.path.exists(res_folder):
            try:
                os.makedirs(res_folder)
            except Exception:
                logger.error('Fail to make {}'.format(res_folder))

        res_file = os.path.join(
            res_folder, 'keypoints_{}_results_{}.json'.format(
最低0.47元/天解锁文章