安装 h5py
pip install h5py
转化hdf5格式
这里以 scene
数据集为例,该数据集可在 https://mulan.sourceforge.net/datasets-mlc.html
中获取
import h5py
from scipy.io import arff
import numpy as np
source_path = r'./scene.arff'
target_path = r'./scene.hdf5'
data, meta = arff.loadarff(source_path)
feature = []
target = []
for i in range(data.shape[0]):
t = list(data[i])
feature.append(t[:-6])
target.append([int(bytes.decode(v)) for v in t[-6:]])
feature = np.array(feature)
target = np.array(target)
# 保存到 target_path
with h5py.File(target_path, 'w') as hf:
hf.create_dataset('feature', data=feature)
hf.create_dataset('target', data=target)
# 查看
with h5py.File(target_path, 'r') as hf:
print('keys: ', hf.keys())
print(hf['feature'].shape)
print(hf['target'])
查看 HDF 文件
推荐使用 https://portal.hdfgroup.org/downloads/hdfview/hdfview3_3_2.html