read file(torch)
feattemp=torch.Tensor(batchSize,seqLengthMax,featdim)
-- ...
file = io.open("feat.txt", "r")
io.input(file)
temp=io.read()
for m=1,featdim do
print("m"..m)
feattemp[i][j][m]=temp:split(' ')[m]
end
io.close(file)
这里的featdim是1024维.
文件feat.txt只有一行,1024个数,以空格间隔.
读起来超级慢!!!!
convert txt to hdf5(python)
temp.txt
2.11 4.32 9.103 4.92 1.2
1
http://hdf-forum.184993.n3.nabble.com/Converting-from-ASCII-to-hdf5-h5fromtxt-td3264997.html
#!/usr/bin/env python
from sys import argv
import numpy as np
import h5py
data = np.loadtxt("temp.txt")
h5file = h5py.File("temp.h5",'w')
h5file['feature'] = data
h5file.close()
2
https://www.getdatajoy.com/learn/Read_and_Write_HDF5_from_Python
from __future__ import print_function
import numpy as np
import h5py
m1 = np.random.random(size = (1000,20))
m2 = np.random.random(size = (1000,200))
with h5py.File('data.h5', 'w') as hf:
hf.create_dataset('dataset_1', data=m1)
hf.create_dataset('dataset_2', data=m2)
final
import numpy as np
import os
import sys
import glob
import h5py
dataset="LDC2014E16"
featdir="/work1/t2g-shinoda2011/15M54105/trecvid/features/"
dcnndir="Bottom_up_13k_pool5"
checksum="/work1/t2g-shinoda2011/14M38468/MED14-SUPPTEST/video/CHECKSUMS"
#nfeat=12988 #softmax layer
nfeat=1024 #pool5 layer
'''
dataset=sys.argv[1]
featdir=sys.argv[2]
dcnndir=sys.argv[3]
checksum=sys.argv[4]
nfeat=int(sys.argv[5])
'''
outpath=os.path.join(featdir,dataset,dcnndir+"_hdf5")
featpath=os.path.join(featdir,dataset,dcnndir)
log=os.path.join(featdir,dataset+"_"+str(nfeat)+"_nolist")
if not os.path.exists(outpath):
os.makedirs(outpath)
ch = open(checksum, 'r')
ch_lines = ch.readlines()
ch.close()
for ix, line in enumerate(ch_lines):
videoname=line.split(' ')[1].split('.')[0]
videodir = videoname+"_split"#videoname+"_split"
featlist = glob.glob('%s/%s/*' %(featpath, videodir))
outfile=os.path.join(outpath,videoname+".h5")
temp=map(os.path.basename,featlist)
ids=map(int,temp)
#print(type(ids))
ids.sort()
feats=np.zeros((len(featlist),nfeat))
for i in range(0,len(featlist)):
#print('%s/%s/%08d' %(featpath, videodir,ids[i]))
feats[i]=np.loadtxt('%s/%s/%08d' %(featpath, videodir,ids[i]))
if len(featlist) != 0:
with h5py.File(outfile, 'w') as hf:
hf.create_dataset('feature', data=feats)
else:
with open(log, 'w') as ll:
ll.write(videoname+"\n")
print(feats)
sys.exit()
read hdf5(torch)
https://github.com/deepmind/torch-hdf5/blob/master/doc/usage.md
require 'hdf5'
local myFile = hdf5.open('temp.h5', 'r')
local data = myFile:read('feature'):all()
myFile:close()
--the type of data is "torch.DoubleTensor"