一 、 前言
最近解析SCUT FIR Pedestrian Dataset数据,发现只有matlab的解析代码,使用起来非常不方便,开源的教程基本没有Python的解析,这里自己写了一个,
二 、解析代码
2.1 解析官方给的txt文档
# 传入官方txt文档路径,返回解析好的字典格式内容
def process_txt(txtpath):
"""
传入原始txt文件路径,解析文件
walk_person : 走路的人
ride_person :骑车的人
squat_person :蹲着的人
people: 一群人
person? :不确定是否是人
people? : 不确定是否是一群人
目标存坐标存储在 pos 标签中,
4个值的含义分别是 左上角坐标x,y 目标w,h
"""
with open(txtpath,'r') as f:
info = f.readlines()
stap = 7
label_dict = {}
N = (len(info)-3)//7
for i in range(N):
info_ = info[3+stap*i : 3+ stap*(i+1)]
occl = info_[5][6:-3].split(" ")
lock = info_[6][6:-3].split(" ")
posv = info_[4][6:-3].split(";")
pos = info_[3][6:-3].split(";")
label =info_[2].split(" ")[0][5:-1]
strr =int(info_[2].split(" ")[1][4:])
end =int(info_[2].split(" ")[2][4:])
for idx ,frameid in enumerate(range(strr,end+1)):
label_ ={}
label_['frame'] = str(frameid)
label_['label'] = str(label)
label_['pos'] = pos[idx]
label_['posv'] = posv[idx]
label_['lock'] = lock[idx]
label_['occl'] = occl[idx]
if str(frameid) not in list(label_dict.keys()):
label_dict[str(frameid)] = [label_]
else:
label_dict[str(frameid)].append(label_)
return label_dict
2.1 拆解视频,成为独立的帧
在这里插入代码片
import os
import json
import fnmatch
import shutil
def open_save(file, savepath):
# read .seq file, and save the images into the savepath
f = open(file, 'rb')
string = f.read().decode('latin-1')
splitstring = "\xFF\xD8\xFF\xE0\x00\x10\x4A\x46\x49\x46"
# split .seq file into segment with the image prefix
strlist = string.split(splitstring)
f.close()
count = 0
# delete the image folder path if it exists
if os.path.exists(savepath):
shutil.rmtree(savepath)
# create the image folder path
if not os.path.exists(savepath):
os.mkdir(savepath)
# deal with file segment, every segment is an image except the first one
for img in strlist:
filename = str(count) + '.jpg'
filenamewithpath = os.path.join(savepath, filename)
# abandon the first one, which is filled with .seq header
if count > 0:
i = open(filenamewithpath, 'wb+')
i.write(splitstring.encode('latin-1'))
i.write(img.encode('latin-1'))
i.close()
count += 1
# 解析视频成为jpg图片
def Split_Video_to_Image():
rootdir = "./SCUT FIR Pedestrian Dataset/videos/set02"
# rootdir = "E:\\GPassport\\Work\\Search\\PedestrianDetection\\CaltechDatasets\\set\\set01"
# walk in the rootdir, take down the .seq filename and filepath
for parent, dirnames, filenames in os.walk(rootdir):
for filename in filenames:
# check .seq file with suffix
if fnmatch.fnmatch(filename, '*.seq'):
# take down the filename with path of .seq file
thefilename = os.path.join(parent, filename)
# create the image folder by combining .seq file path with .seq filename
thesavepath = parent + '/' + filename.split('.')[0]
print("Filename=" + thefilename)
print("Savepath=" + thesavepath)
open_save(thefilename, thesavepath)
2.3 批量生成txt对应的json标签文件
def all_txt_process():
txtroot = './SCUT FIR Pedestrian Dataset/annotations'
savejson = './SCUT FIR Pedestrian Dataset/annotations_label'
for file in os.listdir(txtroot):
for txtname in os.listdir(os.path.join(txtroot,file)):
if os.path.splitext(txtname)[-1] != '.txt':
continue
label_dict = process_txt(os.path.join(txtroot,file,txtname))
svaepath = os.path.join(savejson,file)
os.makedirs(svaepath,exist_ok=True)
with open(os.path.join(svaepath,txtname.replace('.txt','_frame_label.json')), 'w') as f:
json.dump(label_dict, f,)