NYUDv2数据集预处理，转化为HHA

cww cww

已于 2024-11-20 18:10:33 修改

阅读量1k

点赞数 23

CC 4.0 BY-SA版权

分类专栏：图像文章标签：计算机视觉人工智能

于 2024-11-19 21:11:00 首次发布

本文链接：https://blog.youkuaiyun.com/weixin_51473106/article/details/143891798

图像专栏收录该内容

1 篇文章

订阅专栏

参考NYUDv2数据集预处理——获得语义分割的图像和标签并着色

NYUDv2数据集

NYUDv2数据集包含RGB、RGB-D图像，共有849类。
本文只使用NYUDv2数据集中的RGB图像，且只包含40类。
只需下载Labeled dataset (~2.8 GB)

该数据集包括三个组件：

Labeled：视频数据的子集，并附有密集的多类标签。此数据也已经过预处理，以填充缺失的深度标签。
Raw：Kinect 提供的原始 rgb、深度和加速计数据。
Toolbox：用于操作数据和标签的有用函数。

ps：我只用到Lbeled数据集这里只对其进行预处理

Labeled 数据集

其包含以下变量：（标红部分为脚本提取的数据）

ccelData – 拍摄每帧时指示的加速度计值的 Nx4 矩阵。这些列包含设备的滚动、偏航、俯仰和倾斜角度。
depths– HxWxN 绘制深度图的矩阵，其中 H 和 W 分别是高度和宽度，N 是图像数。深度元素的值以米为单位。
images – RGB 图像的 HxWx3xN 矩阵，其中 H 和 W 分别是高度和宽度，N 是图像的数量。
instances （实例） – 实例映射的 HxWxN 矩阵。使用 Toolbox 中的 get_instance_masks.m 恢复场景中每个对象实例的蒙版。
labels – 对象标签掩码的 HxWxN 矩阵，其中 H 和 W 分别是高度和宽度，N 是图像数。标签范围从 1..C 开始，其中 C 是类的总数。如果像素的标签值为 0，则该像素为“未标记”。
names– 每个类的英文名称的 Cx1 元胞数组。
namesToIds– 从英文标签名称映射到类 ID（使用 C 键值对）
rawDepths – 原始深度图的 HxWxN 矩阵，其中 H 和 W 分别是高度和宽度，N 是图像数量。这些深度图在深度图像投影到 RGB 图像平面上之后，但在填充缺失的深度值之前捕获深度图像。此外，已删除 Kinect 设备的深度非线性，每个深度图像的值均以米为单位。
rawDepthFilenames – 用于标记数据集中每个深度图像的文件名（在 Raw 数据集中）的 Nx1 元胞数组。
rawRgbFilenames– 用于标记数据集中每个 RGB 图像的文件名（在 Raw 数据集中）的 Nx1 元胞数组。
scenes – Nx1 元胞数组，表示从中拍摄每张图像的场景的名称。
sceneTypes – 从中拍摄每张图像的场景类型的 Nx1 元胞数组。

脚本内容

preprocess.py

输出原始图像、分割图像、深度图像和原始深度图像，并且能够输出划分训练集和验证集的txt文件

import os
import h5py
import numpy as np
from PIL import Image
from tqdm import tqdm
from scipy.io import loadmat
# 输出原始图像和分割图像，并且能够输出划分训练集和验证集的txt文件
# 定义一个函数，用于将图像和标签列表写入txt文件
def write_txt(f, list_ids):
    f.write('\n'.join(list_ids))
    f.close()

# 定义一个函数，用于从HDF5文件中提取图像数据，并保存为JPG格式
def extract_data(root):
    """
    extract images and labels.
    :param root:
    :return:
    """
    print('Extracting images and labels from nyu_depth_v2_labeled.mat...')
    data = h5py.File(os.path.join(root, 'nyu_depth_v2_labeled.mat'))
    images = np.array(data['images'])
    print(f'images shape: {images.shape}')
    num_img = images.shape[0]
    print(f'image number: {num_img}')

    images_dir = os.path.join(root, 'images')
    if not os.path.isdir(images_dir):
        os.makedirs(images_dir)

    bar = tqdm(range(num_img))
    for i in bar:
        img = images[i]
        r = Image.fromarray(img[0]).convert('L')
        g = Image.fromarray(img[1]).convert('L')
        b = Image.fromarray(img[2]).convert('L')
        img = Image.merge('RGB', (r, g, b))
        img = img.transpose(Image.ROTATE_270)
        img.save(os.path.join(images_dir, str(i) + '.jpg'), optimize=True)

# 定义一个函数，用于从splits.mat文件中生成训练集和验证集的划分，并保存到txt文件
def split(root):
    print('Generating training and validation split from split.mat...')
    split_file = loadmat(os.path.join(root, 'splits.mat'))
    train_images = tuple([int(x) for x in split_file["trainNdxs"]])
    test_images = tuple([int(x) for x in split_file["testNdxs"]])
    print("%d training images" % len(train_images))
    print("%d test images" % len(test_images))

    train_ids = [str(i - 1) for i in train_images]
    test_ids = [str(i - 1) for i in test_images]

    train_list_file = open(os.path.join(root, 'train.txt'), 'a')
    write_txt(train_list_file, train_ids)

    test_list_file = open(os.path.join(root, 'val.txt'), 'a')
    write_txt(test_list_file, test_ids)

# 定义一个函数，用于从labels40.mat文件中提取40类标签，并保存为PNG格式
def labels_40(root):
    print('Extracting labels with 40 classes from labels40.mat...')
    data = loadmat(os.path.join(root, 'labels40.mat'))
    labels = np.array(data['labels40'])
    print(f'labels shape: {labels.shape}')

    path_converted = os.path.join(root, 'labels40')
    if not os.path.isdir(path_converted):
        os.makedirs(path_converted)

    bar = tqdm(range(labels.shape[2]))
    for i in bar:
        label = np.array(labels[:, :, i].transpose((1, 0)))
        label_img = Image.fromarray(np.uint8(label))
        label_img = label_img.transpose(Image.ROTATE_270)
        label_img.save(os.path.join(path_converted, str(i) + '.png'), optimize=True)



def extract_depths(root):
    """
    Extract depth maps from nyu_depth_v2_labeled.mat and save as .npy files.
    :param root: Root directory where the mat file is located.
    :return: None
    """
    print('Extracting depth maps from nyu_depth_v2_labeled.mat...')
    data = h5py.File(os.path.join(root, 'nyu_depth_v2_labeled.mat'), 'r')
    depths = np.array(data['depths'])  # Assuming 'depths' is the key for depth maps in the mat file
    print(f'Depths shape: {depths.shape}')
    num_depths = depths.shape[0]
    print(f'Number of depth maps: {num_depths}')

    depths_dir = os.path.join(root, 'depths')
    if not os.path.isdir(depths_dir):
        os.makedirs(depths_dir)

    bar = tqdm(range(num_depths))
    for i in bar:
        depth_map = depths[i]
        # Normalize depth map to 0-255 range
        depth_map_normalized = (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map))
        depth_map_normalized = (depth_map_normalized * 255).astype(np.uint8)
        # Save as JPG file
        img = Image.fromarray(depth_map_normalized)
        img = img.transpose(Image.ROTATE_270)
        img.save(os.path.join(depths_dir, f'{i}.jpg'), optimize=True)

def extract_raw_depths(root):
    """
    Extract raw depth maps from nyu_depth_v2_labeled.mat and save as PNG files.
    :param root: Root directory where the mat file is located.
    :return: None
    """
    print('Extracting raw depth maps from nyu_depth_v2_labeled.mat...')
    data = h5py.File(os.path.join(root, 'nyu_depth_v2_labeled.mat'), 'r')
    raw_depths = np.array(data['depths'])  # Assuming 'rawDepths' is the key for raw depth maps in the mat file
    print(f'Raw depth maps shape: {raw_depths.shape}')
    num_depths = raw_depths.shape[0]
    print(f'Number of raw depth maps: {num_depths}')

    raw_depths_dir = os.path.join(root, 'rawDepths')
    if not os.path.isdir(raw_depths_dir):
        os.makedirs(raw_depths_dir)

    bar = tqdm(range(num_depths))
    for i in bar:
        raw_depth_map = raw_depths[i]
        # Normalize raw depth map to 0-255 range
        raw_depth_map_normalized = (raw_depth_map - np.min(raw_depth_map)) / (np.max(raw_depth_map) - np.min(raw_depth_map))
        raw_depth_map_normalized = (raw_depth_map_normalized * 255).astype(np.uint8)
        # Convert to PIL Image and rotate clockwise by 90 degrees
        img = Image.fromarray(raw_depth_map_normalized)
        img = img.transpose(Image.ROTATE_270)  # Clockwise rotation by 90 degrees
        # Save as PNG file
        img.save(os.path.join(raw_depths_dir, f'{i}.jpg'), optimize=True)

def main():
    root = os.path.dirname(__file__)
    extract_data(root)
    extract_depths(root)
    split(root)
    labels_40(root)
    extract_raw_depths(root)


if __name__ == '__main__':
    main()