1、统计分布目的
(1)为了对数据集中所要识别对象是大、中还是小目标有整体把握;
(2)便于对模型进行针对性的修改。
2、统计分布方式
长宽各按5%分别进行统计,共计60份(300/5=60);面积按占比5%统计,共计20份(100/5=20)。
3、代码如下
import xml.etree.ElementTree as ET
import cv2
import os
import sys
# 缩图,将图片缩为300*300
def resize(image_path, image_save):
image_lists = os.listdir(image_path)
for image in image_lists:
path = os.path.join(image_path, image)
save_path = os.path.join(image_save, image)
image = cv2.imread(path)
image_resize = cv2.resize(image, (300, 300))
cv2.imwrite(save_path, image_resize)
# 修改xml,使其与缩减后的图片相对应
def xml_update(xml_path, xml_save):
xml_lists = os.listdir(xml_path)
for xml in xml_lists:
path = os.path.join(xml_path, xml)
xml_save_path = os.path.join(xml_save, xml)
# print(xml_path1)
# 缩xml
tree = ET.parse(path)
root = tree.getroot()
# 在xml里找到描述原图的size