H5数据集的使用
with h5py.File("data_train.h5", 'w') as hf:
hf.create_dataset('train_input', data=shuffled_input)
hf.create_dataset('train_label', data=shuffled_label)
with h5py.File("data_train.h5", 'r') as hf:
train_data = np.array(hf.get('train_input'))
train_label = np.array(hf.get('train_label'))
使用h5py的 group 分割GB级数据
with h5py.File("myh5py.h5", "w") as f:
g1 = f.create_group("bar1")
g2 = f.create_group("bar2")
g1["train"] = np.arange(10)
g1["label"] = np.arange(10)
g2["train"] = np.arange(20)
g2["label"] = np.arange(20)
with h5py.File("myh5py.h5", "r") as hf:
for g in hf.keys():
print(np.array(hf.get(g)['train']))
print(np.array(hf.get(g)['label']))