1. 读取.hdf5文件:
import os
import h5py
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5" # 一个.hdf5文件,获取新的 'action' 值
# 读取源文件中的 'left' 值
def get_value_from_source(source_file_path):
with h5py.File(source_file_path, 'r') as f:
# 假设 'action' 存储在文件的根目录下
action = f['action']#该值是个数
#action = f['action'][0]#该值是个数组
return action
new_left_value = get_left_value_from_source(source_file_path)
print(new_left_value)
2.判断该.hdf5文件的格式与属性
import os
import h5py
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5" # 一个.hdf5文件
# 读取HDF5文件中的所有数据集
def traverse_datasets(hdf_file):
import h5py
def h5py_dataset_iterator(g, prefix=''):
for key in g.keys():
item = g[key]
print("key: ",key)
path = '{}/{}'.format(prefix, key)
if isinstance(item, h5py.Dataset): # test for dataset
yield (path, item)
print("item: ", item)
elif isinstance(item, h5py.Group): # test for group (go down)
yield from h5py_dataset_iterator(item, path)
print("item Group: ", item)
with h5py.File(hdf_file, 'r') as f:
for (path, dset) in h5py_dataset_iterator(f):
print(path, dset)
if "action" in path:
print(" ")
return None
traverse_datasets(source_file_path)
3.将其中一个文件夹路径下所有.hdf5文件的["action"](14维的数组)的值的前7维的数全改成某个.hdf5文件的["action"]的前7维的数:
import os
import h5py
# 定义源文件路径和目标文件夹路径
source_file_path = "/home/robot/dobot_xtrainer_diffusion_v1.0.0/datasets/orange/train_data/episode_init_3.hdf5" # 另一个.hdf5文件,获取新的 'left' 值
target_folder_path = "/home/robot/datasets/dataset_package_test1/train_data" # 目标文件夹,包含所有要修改的 .hdf5 文件
# 读取源文件中的 'action' 值
def get_left_value_from_source(source_file_path):
with h5py.File(source_file_path, 'r') as f:
# 假设 'action' 存储在文件的根目录下
action = f['action'][0]
# left_value = action['left'][()] # 获取 'left' 的数据
return action
# 修改目标文件中的 'action' 值
def modify_left_in_file(target_file_path, new_left_value):
with h5py.File(target_file_path, 'r+') as f:
# 假设 'action' 存储在文件的根目录下
# 打印数据集的属性
for i in range(len(f["/observations/images/top"])):
print(f['action'][i])
action = f['action'][i]
print("action now:",action,"last value: ",new_left_value)
new_left_value[8:] = f['action'][i][8:]
#f['action'][i][8:] = new_left_value 这样是无效的
f['action'][i] = new_left_value #
print("action changed:", f['action'][i], "last value: ", new_left_value)
# action['left'][...] = new_left_value # 修改 'left' 的值为新的
# 获取源文件中的 'left' 值
new_left_value = get_left_value_from_source(source_file_path)
# print("action back:",new_left_value)
# traverse_datasets(source_file_path)
# modify_left_in_file(source_file_path)
# 遍历目标文件夹中的所有 .hdf5 文件,并修改
# 'left' 值
for filename in os.listdir(target_folder_path):
if filename.endswith(".hdf5"):
target_file_path = os.path.join(target_folder_path, filename)
print(f"Modifying {target_file_path}...")
modify_left_in_file(target_file_path, new_left_value)
print("Modification completed.")
关键点:官方说单独索引要修改的序列是无效的,必须是调用所有索引:https://docs.h5py.org/en/latest/high/dataset.html#reading-writing-data