python 对.hdf5 文件进行读取、属性查看、数据修改

本文链接：https://blog.youkuaiyun.com/m0_58644391/article/details/144509191

1. 读取.hdf5文件：

import os
import h5py
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5"  # 一个.hdf5文件，获取新的 'action' 值
# 读取源文件中的 'left' 值
def get_value_from_source(source_file_path):
    with h5py.File(source_file_path, 'r') as f:
        # 假设 'action' 存储在文件的根目录下
        action = f['action']#该值是个数
        #action = f['action'][0]#该值是个数组
    return action

new_left_value = get_left_value_from_source(source_file_path)
print(new_left_value)

2.判断该.hdf5文件的格式与属性

import os
import h5py
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5"  # 一个.hdf5文件

# 读取HDF5文件中的所有数据集
def traverse_datasets(hdf_file):
    import h5py

    def h5py_dataset_iterator(g, prefix=''):
        for key in g.keys():
            item = g[key]
            print("key: ",key)
            path = '{}/{}'.format(prefix, key)
            if isinstance(item, h5py.Dataset): # test for dataset
                yield (path, item)
                print("item: ", item)
            elif isinstance(item, h5py.Group): # test for group (go down)
                yield from h5py_dataset_iterator(item, path)
                print("item Group:  ", item)

    with h5py.File(hdf_file, 'r') as f:
        for (path, dset) in h5py_dataset_iterator(f):
            print(path, dset)
            if "action" in path:
                print(" ")

    return None

traverse_datasets(source_file_path)

3.将其中一个文件夹路径下所有.hdf5文件的["action"]（14维的数组）的值的前7维的数全改成某个.hdf5文件的["action"]的前7维的数：

import os
import h5py

# 定义源文件路径和目标文件夹路径
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5"  # 另一个.hdf5文件，获取新的 'left' 值
target_folder_path = "/home/robot/datasets/dataset_package_test1/train_data"  # 目标文件夹，包含所有要修改的 .hdf5 文件


# 读取源文件中的 'action' 值
def get_left_value_from_source(source_file_path):
    with h5py.File(source_file_path, 'r') as f:
        # 假设 'action' 存储在文件的根目录下
        action = f['action'][0]
        # left_value = action['left'][()]  # 获取 'left' 的数据
    return action

# 修改目标文件中的 'action' 值
def modify_left_in_file(target_file_path, new_left_value):
    with h5py.File(target_file_path, 'r+') as f:
        # 假设 'action' 存储在文件的根目录下
        # 打印数据集的属性
        for i in range(len(f["/observations/images/top"])):
            print(f['action'][i])
            action = f['action'][i]
            print("action now:",action,"last value: ",new_left_value)
            new_left_value[8:] = f['action'][i][8:]
            #f['action'][i][8:] = new_left_value 这样是无效的
            f['action'][i] = new_left_value #
            print("action changed:", f['action'][i], "last value: ", new_left_value)
        # action['left'][...] = new_left_value  # 修改 'left' 的值为新的

# 获取源文件中的 'left' 值
new_left_value = get_left_value_from_source(source_file_path)
# print("action back:",new_left_value)
# traverse_datasets(source_file_path)
# modify_left_in_file(source_file_path)
# 遍历目标文件夹中的所有 .hdf5 文件，并修改
# 'left' 值
for filename in os.listdir(target_folder_path):
    if filename.endswith(".hdf5"):
        target_file_path = os.path.join(target_folder_path, filename)
        print(f"Modifying {target_file_path}...")
        modify_left_in_file(target_file_path, new_left_value)

print("Modification completed.")

关键点：官方说单独索引要修改的序列是无效的，必须是调用所有索引：https://docs.h5py.org/en/latest/high/dataset.html#reading-writing-data