前言
对于现有的VOC格式的数据集,因为图片太大不好训练所以需要把图片进行分割。但是分割图片后继续标注图片需要大量的工作量,所以需要把对应的标签进行提取转换,并保存为新分割图对应的标签。本代码还可以选择某一类标签进行迁移。
XML格式学习
XML文件内的信息可以看作是一棵信息树。最根本的节点成为根(root),每个节点可以访问的信息有节点标签(tag)、节点属性(attrib)、节点文本(text)。XML有不同的解析方式,本文以etree方式为例读取xml:
import xml.etree.ElementTree as ET
tree = ET.parse('test.xml')
root = tree.getroot() # 使用getroot()获取根节点,得到的是一个Element对象
# 对应示例中的annotation
# tag = element.tag #访问Element标签
# attrib = element.attrib #访问Element属性
# text = element.text #访问Element文本
tag = root.tag # annotation
# 访问root下的子节点,并访问其包含的文本
folder_name = root.find('folder').text # JPEGImages
img_name = root.find('filename').text # image.jpg
path_name = root.find('path').text # /YourVOCPath/JPEGImages/image.jpg
# 属性在VOC数据集中并为涉及,加一行例子做参考
attrib = root.find('testAtrrib').attrib # author="testtestAtrrib"
# 访问size节点下的width文本需要先访问size节点,再找到width节点
size_node = root.find('size')
width = size_node.find('width').text # 640 , type:string
# 寻找所有的object节点,并且放入列表中
object_notes_list = root.findall('object')
for object_note in object_notes_list:
# 用于每个节点遍历
# TODO 访问每个object,判断bndbox是否在选中筛选的区域内
# 一个标签文件示例
<annotation>
<testAtrrib author="testtestAtrrib"></testAtrrib>
<folder>JPEGImages</folder>
<filename>image.jpg</filename>
<path>/YourVOCPath/JPEGImages/image.jpg</path>
<size>
<width>640</width>
<height>640</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>label1</name>
<bndbox>
<xmin>103</xmin>
<ymin>381</ymin>
<xmax>173</xmax>
<ymax>433</ymax>
</bndbox>
</object>
<object>
<name>label1</name>
<bndbox>
<xmin>103</xmin>
<ymin>381</ymin>
<xmax>173</xmax>
<ymax>433</ymax>
</bndbox>
</object>
<object>
<name>label1</name>
<bndbox>
<xmin>217</xmin>
<ymin>542</ymin>
<xmax>282</xmax>
<ymax>607</ymax>
</bndbox>
</object>
</annotation>
以上就是本代码中用到的访问标签信息所用到的代码。
VOC标签文件构建
根据示例,可以看到一个标签文件的根都是annotation,在训练YOLO v5时用到的信息有folder、filename、path、size、segmented。作为截取图片的新标签,在筛选object之前应该创建好新的xml树。注意这里的文件夹名、文件名、路径等都是该标签对应的图片的信息。size所有的数字信息都是str类型的。
def new_voc_xml(folder_name, filename, path_name, size_list):
"""
创建VOC_xml base框架
folder_name: JPEGImages
xml_name: image.jpg
path_name: /YourVOCPath/JPEGImages/image.jpg
size_list: [width, height, depth]
"""
new_tree = ET.Element("annotation")
build_new_node(new_tree, "folder", folder_name)
build_new_node(new_tree, "filename", filename)
build_new_node(new_tree, "path", path_name)
size = ET.SubElement(new_tree, "size")
build_new_node(size, "width", str(size_list[0]))
build_new_node(size, "height", str(size_list[1]))
build_new_node(size, "depth", str(size_list[2]))
build_new_node(new_tree, "segmented", "0")
return new_tree
此处提一下新的xml标签保存的代码:
new_xml_tree = ET.ElementTree(new_tree)
new_root = new_xml_tree.getroot() # 得到根元素,Element类
pretty_xml(new_root, '\t', '\n') # 执行美化方法
new_xml_tree.write("test1.xml")
新的xml文件看起来不太美观,所以需要一个美化格式的函数:
def pretty_xml(element, indent, newline, level=0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
if element: # 判断element是否有子元素
if (element.text is None) or element.text.isspace(): # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉,Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将element转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
pretty_xml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
截取图片
截图使用image.crop(a0,b0,a1,b1),(a0,b0)是截取矩形窗左上角坐标,(a1,b1)为截取矩形窗右下角坐标,a0<a1, b0<b1。
ReadImage = Image.open(ImageFile)
cut_area = ReadImage.crop([a0, b0, a1, b1])
cut_area.save(path_name)
在新的xml树 更新object
def is_update_tree(new_tree, name, xmin, ymin, xmax, ymax, a0, b0, a1, b1):
"""
对选中区域内对应对筛选目标进行坐标转换,并更新在新的xml树上
xmin, ymin, xmax, ymax: object 的bnx坐标
a0, b0, a1, b1 : 图片区域坐标
"""
if xmin >= a0 and ymin >= b0:
update_xmin = xmin - a0
update_ymin = ymin - b0
if xmax <= a1 and ymax <= b1:
update_xmax = xmax - a0
update_ymax = ymax - b0
elif abs(a1 - xmin) <= 32 and abs(b1 - ymin) <= 32:
update_xmax = a1
update_ymax = b1
else:
return new_tree
object = ET.SubElement(new_tree, "object")
build_new_node(object, "name", name)
bndbox = ET.SubElement(object, "bndbox")
build_new_node(bndbox, "xmin", str(update_xmin))
build_new_node(bndbox, "ymin", str(update_ymin))
build_new_node(bndbox, "xmax", str(update_xmax))
build_new_node(bndbox, "ymax", str(update_ymax))
new_tree.append(object)
# print("find" + name)
return new_tree
代码
WindowWidth = 640
WindowHeight = 640
SelectClass = ‘screw’
以上三个参数是需要截图的窗口大小,和需要筛选迁移的标签
"""
选择裁剪图片窗口的大小,并且筛选遍历各个窗口内对应筛选种类的标签,对标签进行VOC格式存储
"""
import xml.etree.ElementTree as ET
import os
from PIL import Image
def check_path(Path):
"""
# 修改格式和确认路径存在
"""
if not os.path.exists(Path):
os.makedirs(Path)
print('Path does not exist. Build new Path:{}'.format(Path))
if Path[-1] == '/':
Path += '/'
return
def pretty_xml(element, indent, newline, level=0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
if element: # 判断element是否有子元素
if (element.text is None) or element.text.isspace(): # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉,Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将element转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
pretty_xml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
def new_voc_xml(folder_name, filename, path_name, size_list):
"""
创建VOC_xml base框架
folder_name: data_91_101
xml_name: 091_0053.jpg
path_name: /Users/miag/Documents/项目/code/data_91_101/091_0053.jpg
size_list: [width, height, depth]
"""
new_tree = ET.Element("annotation")
build_new_node(new_tree, "folder", folder_name)
build_new_node(new_tree, "filename", filename)
build_new_node(new_tree, "path", path_name)
size = ET.SubElement(new_tree, "size")
build_new_node(size, "width", str(size_list[0]))
build_new_node(size, "height", str(size_list[1]))
build_new_node(size, "depth", str(size_list[2]))
build_new_node(new_tree, "segmented", "0")
return new_tree
def build_new_node(root, tag, text):
child_node = ET.SubElement(root, tag)
child_node.text = text
def is_update_tree(new_tree, name, xmin, ymin, xmax, ymax, a0, b0, a1, b1):
"""
对选中区域内对应对筛选目标进行坐标转换,并更新在新的xml树上
"""
if xmin >= a0 and ymin >= b0:
update_xmin = xmin - a0
update_ymin = ymin - b0
if xmax <= a1 and ymax <= b1:
update_xmax = xmax - a0
update_ymax = ymax - b0
elif abs(a1 - xmin) <= 32 and abs(b1 - ymin) <= 32:
update_xmax = a1
update_ymax = b1
else:
return new_tree
object = ET.SubElement(new_tree, "object")
build_new_node(object, "name", name)
bndbox = ET.SubElement(object, "bndbox")
build_new_node(bndbox, "xmin", str(update_xmin))
build_new_node(bndbox, "ymin", str(update_ymin))
build_new_node(bndbox, "xmax", str(update_xmax))
build_new_node(bndbox, "ymax", str(update_ymax))
new_tree.append(object)
# print("find" + name)
return new_tree
def get_bndbox(object):
bndbox = object.find('bndbox') # 访问bndbox节点
xmin = bndbox.find("xmin").text # 从bndbox节点中找到xmin的值
xmax = bndbox.find("xmax").text
ymin = bndbox.find("ymin").text # 从bndbox节点中找到xmin的值
ymax = bndbox.find("ymax").text
# print(type(ymax))
return int(xmin), int(ymin), int(xmax), int(ymax)
ImgPath = r'/YourImageFolderPathNeedtoBeCut/img/'
AnnoPath = r'/YourLabelFolderPathNeedtoBeCut/label/'
ProcessedImgPath = r'/YourOutputImageSavePath/JPEGImages/'
ProcessedAnnoPath = r'/YourOutputLabelSavePath/Annotations/'
# 修改格式和确认路径存在
check_path(ImgPath)
check_path(AnnoPath)
check_path(ProcessedImgPath)
check_path(ProcessedAnnoPath)
ImageList = os.listdir(ImgPath)
ImageType = ['jpg', 'png', 'jpeg']
WindowWidth = 640
WindowHeight = 640
SelectClass = 'screw'
for image in ImageList:
# ImageTitle, ext = os.path.splitext(image)
ImageTitle, ext = image.split('.')
if not (ext in ImageType):
ImageList.remove(image)
continue
ImageFile = ImgPath + image
XmlFile = AnnoPath + ImageTitle + '.xml'
# 打开对应的xml文件
tree = ET.parse(XmlFile)
root = tree.getroot() # 使用getroot()获取根节点,得到的是一个Element对象
# 读取输入图片
ReadImage = Image.open(ImageFile)
width, height = ReadImage.size
depth = len(ReadImage.mode)
print("Input image {}: width{}, height{}, depth{}".format(image, width, height, depth))
# 标记坐标
position_w, position_h = 0, 0
# 循环每个区域 a(0,8000),b(0,6000), 起点在距离边界0处
for a0 in range(0, (width - WindowWidth), WindowWidth):
for b0 in range(0, (height - WindowHeight), WindowHeight):
a1 = a0 + WindowWidth - 1
b1 = b0 + WindowHeight - 1
Annotations = ProcessedAnnoPath.split('/')[-2] # Annotations
filename = ImageTitle + '_{}{}.{}'.format(position_w, position_h, ext)
path_name = ProcessedImgPath + filename
size_list = [WindowWidth, WindowHeight, depth]
new_tree = new_voc_xml(Annotations, filename, path_name, size_list)
for object in root.findall('object'):
name = object.find('name').text # 访问Element文本
if name == SelectClass or SelectClass is None:
# print('------> get ' + name )
xmin, ymin, xmax, ymax = get_bndbox(object)
new_tree = is_update_tree(new_tree, name, xmin, ymin, xmax, ymax, a0, b0, a1, b1)
new_xml_tree = ET.ElementTree(new_tree)
new_root = new_xml_tree.getroot() # 得到根元素,Element类
if new_root.findall('object'):
print('object counts: {}'.format(len(new_root.findall('object'))))
pretty_xml(new_root, '\t', '\n') # 执行美化方法
xml_file = ProcessedAnnoPath + ImageTitle + '_{}{}.xml'.format(position_w, position_h)
new_xml_tree.write(xml_file)
cut_area = ReadImage.crop([a0, b0, a1, b1])
cut_area.save(path_name)
print('Succeed to save xml and img: ' + filename)
new_tree = None
position_h += 1
position_w += 1
position_h =0
# break
注意事项
没有判断截图窗口和原图的大小关系,如果数据集图片大小不一致需要自行注意。