VOC数据集的格式为:
|----VOC2007
|----Annotations
|----*.xml
|---ImageSets
|----Main
|----text.txt
|----train.txt
|----trainval.txt
|----val.txt
|---JPEGImages
|----*.jpg
其中Annotations文件夹下的xml文件是每张图片的标注信息,ImageSets/mian文件夹下四个txt文件记录了用来训练和测试的图片的文件名,JPEGImages文件夹下存放数据集的所有图片
coco2014的数据集
下载链接::数据集标签文件,数据集所有图片,该数据集中Train2014.zip存放所有图片,COCO_Text.zip里面用一个json文件存放图片的标注信息,部分图片的标注如下所示:
{"imgs":
{"378466":{"width": 612, "file_name":"COCO_train2014_000000378466.jpg", "set":"train", "id": 378466, "height": 612},
"370250":{"width": 427, "file_name":"COCO_train2014_000000370250.jpg", "set": "test","id": 370250, "height": 640},
"36606":{"width": 640, "file_name":"COCO_train2014_000000036606.jpg", "set": "val","id": 36606, "height": 480}
将coco-text的数据集格式转换为pascal_voc的格式
1、利用coco-text.json中的信息,生成类似于pascal_voc的ImageSets文件夹中的信息,将下列两个python文件放在与下载的数据集同一级目录中,先运行format_annotation.py文件将数据集解压并生成一个formatted_dataset文件夹,该文件夹下的文件结构就是符合voc结构,再运行ann2voc2007.py文件,生成xml标注文件
coco_text.py
__author__ = 'andreasveit'
__version__ = '1.1'
# Interface for accessing the COCO-Text dataset.
# COCO-Text is a large dataset designed for text detection and recognition.
# This is a Python API that assists in loading, parsing and visualizing the
# annotations. The format of the COCO-Text annotations is also described on
# the project website http://vision.cornell.edu/se3/coco-text/. In addition to this API, please download both
# the COCO images and annotations.
# This dataset is based on Microsoft COCO. Please visit http://mscoco.org/
# for more information on COCO, including for the image data, object annotatins
# and caption annotations.
# An alternative to using the API is to load the annotations directly
# into Python dictionary:
# with open(annotation_filename) as json_file:
# coco_text = json.load(json_file)
# Using the API provides additional utility functions.
# The following API functions are defined:
# COCO_Text - COCO-Text api class that loads COCO annotations and prepare data structures.
# getAnnIds - Get ann ids that satisfy given filter conditions.
# getImgIds - Get img ids that satisfy given filter conditions.
# loadAnns - Load anns with the specified ids.
# loadImgs - Load imgs with the specified ids.
# showAnns - Display the specified annotations.
# loadRes - Load algorithm results and create API for accessing them.
# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
# COCO-Text Toolbox. Version 1.1
# Data and paper available at: http://vision.cornell.edu/se3/coco-text/
# Code based on Microsoft COCO Toolbox Version 1.0 by Piotr Dollar and Tsung-Yi Lin
# extended and adapted by Andreas Veit, 2016.
# Licensed under the Simplified BSD License [see bsd.txt]
import json
import datetime
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
import numpy as np
import copy
import os
class COCO_Text:
def __init__(self, annotation_file=None):
"""
Constructor of COCO-Text helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:return:
"""
# load dataset
self.dataset = {}
self.anns = {}
self.imgToAnns = {}
self.catToImgs = {}
self.imgs = {}
self.cats = {}
self.val = []
self.train = []
if not annotation_file == None:
assert os.path.isfile(annotation_file), "file does not exist"
print
'loading annotations into memory...'
time_t = datetime.datetime.utcnow()
dataset = json.load(open(annotation_file, 'r'))
print
datetime.datetime.utcnow() - time_t
self.dataset = dataset
self.createIndex()
def createIndex(self):
# create index
print
'creating index...'
self.imgToAnns = {int(cocoid): self.dataset['imgToAnns'][cocoid] for cocoid in self.dataset['imgToAnns']}
self.imgs = {int(cocoid): self.dataset['imgs'][cocoid] for cocoid in self.dataset['imgs']}
self.anns = {int(annid): self.dataset['anns'][annid] for annid in self.dataset['anns']}
self.cats = self.dataset['cats']
self.val = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'val']
self.train = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'train']
print
'index created!'
def info(self):
"""
Print information about the annotation file.
:return:
"""
for key, value in self.dataset['info'].items():
print
'%s: %s' % (key, value)
def filtering(self, filterDict, criteria):
return [key for key in filterDict if all(criterion(filterDict[key]) for criterion in criteria)]
def getAnnByCat(self, properties):
"""
Get ann ids that satisfy given properties
:param properties (list of tuples of the form [(category type, category)] e.g., [('readability','readable')]
: get anns for given categories - anns have to satisfy all given property tuples
:return: ids (int array) : integer array of ann ids
"""
return self.filtering(self.anns, [lambda d, x=a, y=b: d[x] == y for (a, b) in properties])
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[]):
"""
Get ann ids that satisfy given filter conditions. default skips that filter
:param imgIds (int array) : get anns for given imgs
catIds (list of tuples of the form [(category type, category)] e.g., [('readability','readable')]
: get anns for given cats
areaRng (float array) : get anns for given area range (e.g. [0 inf])
:return: ids (int array) : integer array of ann ids
"""
imgIds = imgIds if type(imgIds) == list else [imgIds]
catIds = catIds if type(catIds) == list else [catIds]
if len(imgIds) == len(catIds) == len(areaRng) == 0:
anns = self.anns.keys()
else:
if not len(imgIds) == 0:
anns = sum([self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns], [])
else:
anns = self.anns.keys()
anns = anns if len(catIds) == 0 else list(set(anns).intersection(set(self.getAnnByCat(catIds))))
anns = anns if len(areaRng) == 0 else [ann for ann in anns if
self.anns[ann]['area'] > areaRng[0] and self.anns[ann]['area'] <
areaRng[1]]
return anns
def getImgIds(self, imgIds=[], catIds=[]):
'''
Get img ids that satisfy given filter conditions.
:param imgIds (int array) : get imgs for given ids
:param catIds (int array) : get imgs with all given cats
:return: ids (int array) : integer array of img ids
'''
imgIds = imgIds if type(imgIds) == list else [imgIds]
catIds = catIds if type(catIds) == list else [catIds]
if len(imgIds) == len(catIds) == 0:
ids = self.imgs.keys()
else:
ids = set(imgIds)
if not len(catIds) == 0:
ids = ids.intersection(set([self.anns[annid]['image_id'] for annid in self.getAnnByCat(catIds)]))
return list(ids)
def loadAnns(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying anns
:return: anns (object array) : loaded ann objects
"""
if type(ids) == list:
return [self.anns[id] for id in ids]
elif type(ids) == int:
return [self.anns[ids]]
def loadImgs(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying img
:return: imgs (object array) : loaded img objects
"""
if type(ids) == list:
return [self.imgs[id] for id in ids]
elif type(ids) == int:
return [self.imgs[ids]]
def showAnns(self, anns):
"""
Display the specified annotations.
:param anns (array of object): annotations to display
:return: None
"""
if len(anns) == 0:
return 0
ax = plt.gca()
rectangles = []
color = []
for ann in anns:
c = np.random.random((1, 3)).tolist()[0]
left, top, width, height = ann['bbox']
rectangles.append(Rectangle([left, top], width, height, alpha=0.4))
color.append(c)
if 'utf8_string' in ann.keys():
ax.annotate(ann['utf8_string'], (left, top - 4), color=c)
p = PatchCollection(rectangles, facecolors=color, edgecolors=(0, 0, 0, 1), linewidths=3, alpha=0.4)
ax.add_collection(p)
def loadRes(self, resFile):
"""
Load result file and return a result api object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
res = COCO_Text()
res.dataset['imgs'] = [img for img in self.dataset['imgs']]
print
'Loading and preparing results... '
time_t = datetime.datetime.utcnow()
if type(resFile) == str:
anns = json.load(open(resFile))
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [int(ann['image_id']) for ann in anns]
if set(annsImgIds) != (set(annsImgIds) & set(self.getImgIds())):
print
'Results do not correspond to current coco set'
print
'skipping ', str(len(set(annsImgIds)) - len(set(annsImgIds) & set(self.getImgIds()))), ' images'
annsImgIds = list(set(annsImgIds) & set(self.getImgIds()))
res.imgToAnns = {cocoid: [] for cocoid in annsImgIds}
res.imgs = {cocoid: self.imgs[cocoid] for cocoid in annsImgIds}
assert anns[0]['bbox'] != [], 'results have incorrect format'
for id, ann in enumerate(anns):
if ann['image_id'] not in annsImgIds:
continue
bb = ann['bbox']
ann['area'] = bb[2] * bb[3]
ann['id'] = id
res.anns[id] = ann
res.imgToAnns[ann['image_id']].append(id)
print
'DONE (t=%0.2fs)' % ((datetime.datetime.utcnow() - time_t).total_seconds())
return res
format _annotation.py
#!/usr/bin/env python
import argparse
import os
import pprint
import sys
import time
from os.path import isfile, join
from os import listdir
# sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../coco-text/coco-text-tool'))
import coco_text
def generate_xml_from_annotation():
# to be implement
print 'to be implement'
def format_coco_text():
print 'format coco_text dataset: 80percent training, 10percent valing, 10percent testing'
# read annotations
# in : annotate_id imagename bbox(xmin,ymin,w,h);
# out: imgprefix label(text) bbox1(xmin,ymin,xmax,ymax)
# imgprefix label(text) bbox2
# import the annotations of coco-text
if not os.path.exists('train2014'):
print 'train2014/ not found, please unzipping'
return -1;
if not os.path.exists('COCO_Text.json'):
print 'COCO_Text.json not found, please unzipping'
return -1;
train_file = open('formatted_dataset/ImageSets/Main/train.txt','w')
trainval_file = open('formatted_dataset/ImageSets/Main/trainval.txt','w')
test_file = open('formatted_dataset/ImageSets/Main/test.txt','w')
val_file = open('formatted_dataset/ImageSets/Main/val.txt','w')
annotation_in = coco_text.COCO_Text('COCO_Text.json')
annotation_out = open('formatted_dataset/images.annotations', 'w')
# select training image
ann_ids = annotation_in.getAnnIds(imgIds=annotation_in.train,
catIds=[('legibility','legible'),('class','machine printed')])
print 'train annotations:' + str(len(ann_ids))
anns = annotation_in.loadAnns(ann_ids)
imgid_set = set()
for ann in anns:
im_id_str = str(ann['image_id'])
imgprefix = im_id_str
for i in xrange(0, 12 - len(im_id_str)):
imgprefix = '0' + imgprefix
imgprefix = 'COCO_train2014_' + imgprefix
img_name = imgprefix + '.jpg'
# images.annotations
bbox = ann['bbox']
xmin = int(round(bbox[0]))
ymin = int(round(bbox[1]))
xmax = int(round(bbox[0] + bbox[2]))
ymax = int(round(bbox[1] + bbox[3]))
annotation_out.write(img_name + ' text ' + str(xmin) + ' ' + str(ymin) + ' ' + str(xmax) + ' ' + str(ymax) + '\n')
if not ann['image_id'] in imgid_set:
# ImageSets train
train_file.write(imgprefix + '\n')
trainval_file.write(imgprefix + '\n')
# JPEGImages train
if not os.path.isfile('formatted_dataset/JPEGImages/' + img_name):
os.system('mv train2014/' + img_name + ' formatted_dataset/JPEGImages')
imgid_set.add(ann['image_id'])
# select valing and testing image
ann_ids = annotation_in.getAnnIds(imgIds=annotation_in.val,
catIds=[('legibility','legible'),('class','machine printed')])
print 'val annotations:' + str(len(ann_ids))
anns = annotation_in.loadAnns(ann_ids)
imgid_set = set()
cnt = 0
for ann in anns:
cnt += 1
im_id_str = str(ann['image_id'])
imgprefix = im_id_str
for i in xrange(0, 12 - len(im_id_str)):
imgprefix = '0' + imgprefix
imgprefix = 'COCO_train2014_' + imgprefix
img_name = imgprefix + '.jpg'
# images.annotations
bbox = ann['bbox']
xmin = int(round(bbox[0]))
ymin = int(round(bbox[1]))
xmax = int(round(bbox[0] + bbox[2]))
ymax = int(round(bbox[1] + bbox[3]))
annotation_out.write(img_name + ' text ' + str(xmin) + ' ' + str(ymin) + ' ' + str(xmax) + ' ' + str(ymax) + '\n')
if not ann['image_id'] in imgid_set:
# ImageSets train or test
if cnt % 4 == 1:
test_file.write(imgprefix + '\n')
else:
val_file.write(imgprefix + '\n')
trainval_file.write(imgprefix + '\n')
# JPEGImages val or test
if not os.path.isfile('formatted_dataset/JPEGImages/' + img_name):
os.system('mv train2014/' + img_name + ' formatted_dataset/JPEGImages')
imgid_set.add(ann['image_id'])
def format_byted_chi():
print 'format byted_chi dataset: 80percent training, 10percent valing, 10percent testing'
# read annotations
# in : imgpath bbox1(xmin,ymin,xmax,ymax);bbox2;bbox3
# out: imgprefix label(text) bbox1(xmin,ymin,xmax,ymax)
# imgprefix label(text) bbox2
if not os.path.exists('chinese_text_detection'):
print 'chinese_text_detection/ not found, please unzipping'
return -1;
annotation_in = open('chinese_text_detection/image_to_rois.txt', 'r')
annotation_out = open('formatted_dataset/images.annotations', 'w')
cnt = 0
for line in annotation_in:
strs = line.split()
assert len(strs) == 2, 'Not regular byted_chi line'
image_path = strs[0] # the first item
image_name = image_path.split('/')[-1] # the last item
new_img_name = 'byted_chi_' + str(cnt) + '.jpg';
# JPEGImages
os.system('mv chinese_text_detection/' + image_path
+ ' formatted_dataset/JPEGImages/' + new_img_name)
bboxes = strs[1].split(';')
# images.annotations
for bbox in bboxes:
box = bbox.split(',')
assert len(box) == 4, 'Not regular byted_chi bbox'
annotation_out.write(new_img_name + ' text ' +
box[0] + ' ' + box[1] + ' ' + box[2] + ' ' + box[3] + '\n')
cnt += 1
folder_num = 5 # cross validation of folder_num folds
for fold in xrange(0, folder_num):
folder_dir = 'formatted_dataset/ImageSets/folder_num_' + str(fold)
# ImageSets
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
train_file = open(folder_dir + '/train.txt','w')
trainval_file = open(folder_dir + '/trainval.txt','w')
test_file = open(folder_dir + '/test.txt','w')
val_file = open(folder_dir + '/val.txt','w')
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
annotation_in = open('chinese_text_detection/image_to_rois.txt', 'r')
cnt = 0
for line in annotation_in:
new_img_pre = 'byted_chi_' + str(cnt);
if cnt % (2 * folder_num) == fold:
test_file.write(new_img_pre + '\n')
elif cnt % (2 * folder_num) == fold + 1:
val_file.write(new_img_pre + '\n')
trainval_file.write(new_img_pre + '\n')
else:
train_file.write(new_img_pre + '\n')
trainval_file.write(new_img_pre + '\n')
cnt += 1
class args:
dataset = 'coco-text'
if __name__ == "__main__":
working_dir = os.path.join(os.path.dirname(__file__), '../coco-text')
assert os.path.exists(working_dir), 'Not exists: ' + working_dir
assert os.path.isdir(working_dir), 'Not a dir: ' + working_dir
os.chdir(working_dir)
if not os.path.exists('formatted_dataset/Annotations'):
os.makedirs('formatted_dataset/Annotations')
if not os.path.exists('formatted_dataset/ImageSets/Main'):
os.makedirs('formatted_dataset/ImageSets/Main')
if not os.path.exists('formatted_dataset/JPEGImages'):
os.makedirs('formatted_dataset/JPEGImages')
print 'formating ' + args.dataset
if args.dataset == "byted-chi":
print 'remove chinese_text_detection'
os.system('rm -rf chinese_text_detection')
print 'unzip chinese_text_detection'
os.system('tar zxf chinese_text_detection.tar.gz')
print 'formating ...'
format_byted_chi()
os.system('rm -rf formatted_dataset/ImageSets/Main/')
os.system('ln -s folder_num_0/ formatted_dataset/ImageSets/Main')
elif args.dataset == "coco-text":
print 'remove COCO_Text.json'
os.system('rm COCO_Text.json')
print 'remove train2014'
os.system('rm -rf train2014')
print 'unzip COCO_Text.zip'
os.system('unzip COCO_Text.zip')
print 'unzip train2014.zip'
os.system('unzip train2014.zip')
print 'formating ...'
format_coco_text()
else:
print "not support dataset, to be implemented"
ann2voc2007.py
# coding:utf-8
from PIL import Image
from xml.dom.minidom import Document
import os
def main():
imgpath = 'formatted_dataset/JPEGImages/'
txtpath = 'formatted_dataset/images.annotations'
xmlpath_new = 'formatted_dataset/Annotations/'
coco = {}
# 得到图像的标注信息
file_object = open(txtpath, 'rU')
try:
for line in file_object:
line = line.rstrip('\n')
strs = line.split(' ')
print strs[0]
foldername = 'VOC2007'
# 用xml替换jpg,得到同名文件
xmlname = strs[0].replace('.jpg', '.xml')
info = Image.open(imgpath + strs[0])
# read image size
(width, height) = info.size
strs[2] = max(int(strs[2]), 1)
strs[3] = max(int(strs[3]), 1)
strs[4] = min(int(strs[4]), width);
strs[5] = min(int(strs[5]), height);
# 过滤异常
if strs[2] >= strs[4] or strs[3] >= strs[5] or strs[2] <= 0 or strs[3] <= 0 or strs[4] > width or strs[
5] > height:
continue
if os.path.exists(imgpath + strs[0]):
if xmlname in coco:
Createnode = coco[xmlname]
object_node = Createnode.createElement('object')
Root = Createnode.getElementsByTagName('annotation')[0]
Root.appendChild(object_node)
node = Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node = Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node = Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node = Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node = Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node = Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node = Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node = Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node = Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
else:
Createnode = Document() # 创建DOM文档对象
Root = Createnode.createElement('annotation') # 创建根元素
Createnode.appendChild(Root)
# folder
folder = Createnode.createElement('folder')
folder.appendChild(Createnode.createTextNode(foldername))
Root.appendChild(folder)
# filename
filename = Createnode.createElement('filename')
filename.appendChild(Createnode.createTextNode(strs[0]))
Root.appendChild(filename)
# source
source_node = Createnode.createElement('source')
Root.appendChild(source_node)
node = Createnode.createElement('database')
node.appendChild(Createnode.createTextNode('MS COCO-Text'))
source_node.appendChild(node)
node = Createnode.createElement('annotation')
node.appendChild(Createnode.createTextNode('MS COCO-Text 2014'))
source_node.appendChild(node)
node = Createnode.createElement('image')
node.appendChild(Createnode.createTextNode('NULL'))
source_node.appendChild(node)
node = Createnode.createElement('flickrid');
node.appendChild(Createnode.createTextNode('NULL'));
source_node.appendChild(node);
# owner
owner_node = Createnode.createElement('owner')
Root.appendChild(owner_node)
node = Createnode.createElement('flickrid')
node.appendChild(Createnode.createTextNode('NULL'))
owner_node.appendChild(node)
node = Createnode.createElement('name')
node.appendChild(Createnode.createTextNode('ligen'))
owner_node.appendChild(node)
# size
size_node = Createnode.createElement('size')
Root.appendChild(size_node)
node = Createnode.createElement('width')
node.appendChild(Createnode.createTextNode(str(width)))
size_node.appendChild(node)
node = Createnode.createElement('height');
node.appendChild(Createnode.createTextNode(str(height)))
size_node.appendChild(node)
node = Createnode.createElement('depth')
node.appendChild(Createnode.createTextNode('3'))
size_node.appendChild(node)
# segmented
node = Createnode.createElement('segmented')
node.appendChild(Createnode.createTextNode('0'))
Root.appendChild(node)
# object
object_node = Createnode.createElement('object')
Root.appendChild(object_node)
node = Createnode.createElement('name')
node.appendChild(Createnode.createTextNode(strs[1]))
object_node.appendChild(node)
node = Createnode.createElement('pose')
node.appendChild(Createnode.createTextNode('Unspecified'))
object_node.appendChild(node)
node = Createnode.createElement('truncated')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
node = Createnode.createElement('difficult')
node.appendChild(Createnode.createTextNode('0'))
object_node.appendChild(node)
bndbox_node = Createnode.createElement('bndbox')
object_node.appendChild(bndbox_node)
node = Createnode.createElement('xmin')
node.appendChild(Createnode.createTextNode(str(strs[2])))
bndbox_node.appendChild(node)
node = Createnode.createElement('ymin')
node.appendChild(Createnode.createTextNode(str(strs[3])))
bndbox_node.appendChild(node)
node = Createnode.createElement('xmax')
node.appendChild(Createnode.createTextNode(str(strs[4])))
bndbox_node.appendChild(node)
node = Createnode.createElement('ymax')
node.appendChild(Createnode.createTextNode(str(strs[5])))
bndbox_node.appendChild(node)
coco[xmlname] = Createnode
finally:
file_object.close()
print 'begin load xml...'
for key in coco:
print key
f = open(xmlpath_new + key, 'w')
f.write(coco[key].toprettyxml(indent='\t'))
f.close()
if __name__ == "__main__":
main()
format_annotation.py运行完,生成的文件目录如下:

再用ann2voc2007.py生成对应的xml标注文件,目录结构如下:


本文介绍如何将COCO-Text数据集转换为Pascal VOC格式,包括使用提供的Python脚本进行数据集解压、重组和生成XML标注文件的过程。
3381

被折叠的 条评论
为什么被折叠?



