CelebA数据库转换为VOC、YOLO格式

最新推荐文章于 2024-10-31 11:27:07 发布

AI算法网奇

最新推荐文章于 2024-10-31 11:27:07 发布

阅读量2.9k

点赞数

分类专栏：深度学习宝典

深度学习宝典专栏收录该内容

1041 篇文章 ¥29.90 ¥99.00

订阅专栏

超级会员免费看

这篇博客介绍了如何将 CelebA 数据库中的人脸标注数据转换为适用于faster-rcnn和YOLO等算法的VOC和YOLO格式，以便于进行人脸识别和目标检测的训练。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

CelebA数据库转换为VOC、YOLO格式

原文：https://blog.youkuaiyun.com/minstyrain/article/details/77888176

CelebA是香港中文大学发布的20多万的名人人脸数据库，被很多算法用来训练，取得了不错的效果，其主页为http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html

它提供了人脸包围框和5个关键点的标注，都放置在一个txt文件中，这和faster-rcnn、YOLO等的要求还是有一些差距的，因此需要做些变换才能使用。

下面这个脚本实现了讲CelebA自带标注转换为VOC和YOLO格式的功能.

import cv2,h5py,os
import numpy as np
from xml.dom.minidom import Document
import progressbar
rootdir="../"
imgdir=rootdir+"Img/img_celeba"
landmarkpath=rootdir+"Anno/list_landmarks_celeba.txt"
bboxpath=rootdir+"Anno/list_bbox_celeba.txt"
vocannotationdir=rootdir+"/"+"Annotations"
labelsdir=rootdir+"/"+"labels"
convet2yoloformat=True
convert2vocformat=True
resized_dim=(48,48)
datasetprefix="/home/yanhe/data/CelebA/images/"
progress = progressbar.ProgressBar(widgets=[
progressbar.Percentage(),
' (', progressbar.SimpleProgress(), ') ',
' (', progressbar.Timer(), ') ',
' (', progressbar.ETA(), ') ',])
def drawbboxandlandmarks(img,bbox,landmark):
cv2.rectangle(img,(bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),(0,255,0))
for i in range(int(len(landmark)/2)):
cv2.circle(img,(int(landmark[2*i]),int(landmark[2*i+1])),2,(0,0,255))
def loadgt():
imgpaths=[]
landmarks=[]
bboxes=[]
with open(landmarkpath) as landmarkfile:
lines=landmarkfile.readlines()
lines=lines[2:]
for line in lines:
landmarkline=line.split()
imgpath=landmarkline[0]
imgpaths.append(imgpath)
landmarkline=landmarkline[1:]
landmark=[int(str) for str in landmarkline]
landmarks.append(landmark)
with open(bboxpath) as bboxfile:
lines=bboxfile.readlines()
lines=lines[2:]
for line in lines:
bboxline=line.split()
imgpath=bboxline[0]
bboxline=bboxline[1:]
bbox=[int(bb) for bb in bboxline]
bboxes.append(bbox)
return imgpaths,bboxes,landmarks
def generate_hdf5():
imgpaths,bboxes,landmarks=loadgt()
numofimg=len(imgpaths)
faces=[]
labels=[]
#numofimg=2
for i in range(numofimg):
imgpath=imgdir+"/"+imgpaths[i]
print(i)#,imgpath)
bbox=bboxes[i]
landmark=landmarks[i]
img=cv2.imread(imgpath)
if bbox[2]<=0 or bbox[3]<=0:
continue
face=img[bbox[1]:bbox[1]+bbox[3],bbox[0]:bbox[0]+bbox[2]]
face=cv2.resize(face,resized_dim)
faces.append(face)
label=[]
label.append(1)
for i in range(len(bbox)):
label.append(bbox[i])
for i in range(len(landmark)):
lm=landmark[i]
if i%2==0:
lm=(lm-bbox[0])*1.0/(bbox[2])
else:
lm=(lm-bbox[1])*1.0/(bbox[3])
label.append(lm)
labels.append(label)
faces=np.asarray(faces)
labels=np.asarray(labels)
f=h5py.File('train.h5','w')
f['data']=faces.astype(np.float32)
f['labels']=labels.astype(np.float32)
f.close()
def viewginhdf5():
f = h5py.File('train.h5','r')
f.keys()
faces=f['data'][:]
labels=f['labels'][:]
for i in range(len(faces)):
print(i)
face=faces[i].astype(np.uint8)
label=labels[i]
bbox=label[1:4]
landmark=label[5:]
for i in range(int(len(landmark)/2)):
cv2.circle(face,(int(landmark[2*i]*resized_dim[0]),int(landmark[2*i+1]*resized_dim[1])),1,(0,0,255))
cv2.imshow("img",face)
cv2.waitKey()
f.close()
def showgt():
landmarkfile=open(landmarkpath)
bboxfile=open(bboxpath)
numofimgs=int(landmarkfile.readline())
_=landmarkfile.readline()
_=bboxfile.readline()
_=bboxfile.readline()
index=0
pbar = progress.start()
if convet2yoloformat:
if not os.path.exists(labelsdir):
os.mkdir(labelsdir)
if convert2vocformat:
if not os.path.exists(vocannotationdir):
os.mkdir(vocannotationdir)
# while(index<numofimgs):
for i in pbar(range(numofimgs)):
#pbar.update(int((index/(numofimgs-1))*10000))
landmarkline=landmarkfile.readline().split()
filename=landmarkline[0]
#sys.stdout.write("\r"+str(index)+":"+filename)
#sys.stdout.flush()
imgpath=imgdir+"/"+filename
img=cv2.imread(imgpath)
landmarkline=landmarkline[1:]
landmark=[int(pt) for pt in landmarkline]
bboxline=bboxfile.readline().split()
imgpath2=imgdir+"/"+bboxline[0]
bboxline=bboxline[1:]
bbox=[int(bb) for bb in bboxline]
drawbboxandlandmarks(img,bbox,landmark)
if convet2yoloformat:
height=img.shape[0]
width=img.shape[1]
txtpath=labelsdir+"/"+filename
txtpath=txtpath[:-3]+"txt"
ftxt=open(txtpath,'w')
xcenter=(bbox[0]+bbox[2]*0.5)/width
ycenter=(bbox[1]+bbox[3]*0.5)/height
wr=bbox[2]*1.0/width
hr=bbox[3]*1.0/height
line="0 "+str(xcenter)+" "+str(ycenter)+" "+str(wr)+" "+str(hr)+"\n"
ftxt.write(line)
ftxt.close()
if convert2vocformat:
xmlpath=vocannotationdir+"/"+filename
xmlpath=xmlpath[:-3]+"xml"
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder_name = doc.createTextNode('CelebA')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(filename)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('CelebA Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('tdr'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('yanyu'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(img.shape[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(img.shape[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(img.shape[2])))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(1):
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
object_name.appendChild(doc.createTextNode('face'))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[0])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[0]+bbox[2])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[1]+bbox[3])))
bndbox.appendChild(ymax)
f=open(xmlpath,"w")
f.write(doc.toprettyxml(indent = ''))
f.close()
cv2.imshow("img",img)
cv2.waitKey(1)
index=index+1
pbar.finish()
def generatetxt(trainratio=0.7,valratio=0.2,testratio=0.1):
files=os.listdir(labelsdir)
ftrain=open(rootdir+"/"+"train.txt","w")
fval=open(rootdir+"/"+"val.txt","w")
ftrainval=open(rootdir+"/"+"trainval.txt","w")
ftest=open(rootdir+"/"+"test.txt","w")
index=0
for i in range(len(files)):
filename=files[i]
filename=datasetprefix+filename[:-3]+"jpg"+"\n"
if i<trainratio*len(files):
ftrain.write(filename)
ftrainval.write(filename)
elif i<(trainratio+valratio)*len(files):
fval.write(filename)
ftrainval.write(filename)
elif i<(trainratio+valratio+testratio)*len(files):
ftest.write(filename)
ftrain.close()
fval.close()
ftrainval.close()
ftest.close()
def generatevocsets(trainratio=0.7,valratio=0.2,testratio=0.1):
if not os.path.exists(rootdir+"/ImageSets"):
os.mkdir(rootdir+"/ImageSets")
if not os.path.exists(rootdir+"/ImageSets/Main"):
os.mkdir(rootdir+"/ImageSets/Main")
ftrain=open(rootdir+"/ImageSets/Main/train.txt",'w')
fval=open(rootdir+"/ImageSets/Main/val.txt",'w')
ftrainval=open(rootdir+"/ImageSets/Main/trainval.txt",'w')
ftest=open(rootdir+"/ImageSets/Main/test.txt",'w')
files=os.listdir(labelsdir)
for i in range(len(files)):
imgfilename=files[i][:-4]
ftrainval.write(imgfilename+"\n")
if i<int(len(files)*trainratio):
ftrain.write(imgfilename+"\n")
elif i<int(len(files)*(trainratio+valratio)):
fval.write(imgfilename+"\n")
else:
ftest.write(imgfilename+"\n")
ftrain.close()
fval.close()
ftrainval.close()
ftest.close()
if __name__=="__main__":
showgt()
generatevocsets()
generatetxt()
#generate_hdf5()