代码实现功能如下:
- 筛选小目标, 只有一张图片中的某一标签对应的gt都是小目标才会被筛选出来
- 过滤高清图片, 只有 h>1324 or w>1324才会被当作高清图片 (可以自己设定)
- 过滤半高清图片, 在不满足2的基础上, 当hw > 10241024才会被过滤出来
import os
from PIL import Image
import numpy as np
import shutil
from tqdm import tqdm
"""
筛选是小目标的图片
小目标的定义是目标的宽高和图片的宽高比小于0.1
"""
def filter_small_object(labels_path, small_path):
labels_path = labels_path
label_list = os.listdir(labels_path)
images_path = labels_path.replace('labels', 'images')
small = small_path
if not os.path.exists(small):
os.mkdir(small)
if not os.path.exists(os.path.join(small, 'images')):
os.mkdir(os.path.join(small, 'images'))
if not os.path.exists(os.path.join(small, 'labels')):
os.mkdir(os.path.join(small, 'labels'))
for list in tqdm(label_list, desc='遍历labels'):
'''这部分可以不添加, yolov5的wh就是比例'''
list_img = list.replace('txt', 'jpg')
'''筛选某一标签的小目标, 一张图片中某一标签的目标都是小目标才会被筛选出来'''
label_p = os.path.join(labels_path, list)
with open(label_p, 'r') as f:
label_list = f.readlines()
label_list =[i.strip().split(" ") for i in label_list]
label_list = [i for i in label_list if i[0] == '0']
if label_list == []:
continue
ob_w = np.array(label_list)[:, 3]
ob_h = np.array(label_list)[:, 4]
ob_w = np.array([float(i) for i in ob_w])
ob_h = np.array([float(i) for i in ob_h])
if all(ob_h < 0.1) and all(ob_w < 0.1):
shutil.copy(os.path.join(images_path, list_img), os.path.join(small, 'images'))
shutil.copy(os.path.join(labels_path, list), os.path.join(small, 'labels'))
'''
过滤后的小目标图片中已经存在高清图片移动到新的文件夹中
高清图片: width > 1324 or height > 1324, 移动到big_images_true文件夹中
半高清图片: width*height>1024*1024 and (not(width > 1324 or height > 1324)), 移动到meduidmed_images文件夹中
'''
def filter_big_image(small_path, big_path, med_image_path):
small_img_path = os.path.join(small_path, 'images')
big_img_path = os.path.join(big_path, 'images')
big_label_path = os.path.join(big_path, 'labels')
med_img_path = os.path.join(med_image_path, 'images')
med_label_path = os.path.join(med_image_path, 'labels')
if not os.path.exists(big_img_path):
os.makedirs(big_img_path)
if not os.path.exists(big_label_path):
os.makedirs(big_label_path)
if not os.path.exists(med_img_path):
os.makedirs(med_img_path)
if not os.path.exists(med_label_path):
os.makedirs(med_label_path)
small_list = os.listdir(small_img_path)
for img_name in tqdm(small_list, desc='遍历small_obj'):
image = np.array(Image.open(os.path.join(small_img_path, img_name)))
h = image.shape[0]
w = image.shape[1]
if h > 1324 or w > 1324:
shutil.move(os.path.join(small_img_path, img_name), big_img_path)
shutil.move(os.path.join(small_path, 'labels', img_name.replace('jpg', 'txt')), big_label_path)
elif w*h > 1024*1024:
shutil.move(os.path.join(small_img_path, img_name), med_img_path)
shutil.move(os.path.join(small_path, 'labels', img_name.replace('jpg', 'txt')), med_label_path)
"""
查看是不是还存在高清图片或者半高清图片
"""
def show_big_img(small_path):
small_list = os.listdir(os.path.join(small_path, 'images'))
for img_name in tqdm(small_list, desc='遍历small_obj'):
image = np.array(Image.open(os.path.join(small_path, 'images', img_name)))
h = image.shape[0]
w = image.shape[1]
if h*w > 1024*1024:
print(img_name)
def main(labels_path, small_path, big_image_path, med_image_path):
filter_small_object(labels_path, small_path)
print('筛选完成')
filter_big_image(small_path, big_image_path, med_image_path)
print('筛选后的小目标图片中已经存在高清图片移动到新的文件夹中完成')
show_big_img(small_path)
if __name__=='__main__':
labels_path = './yolov5/train/labels'
small_path = './yolov5/train/small_obj'
big_image_path = './yolov5/train/small_obj/big_img_ture'
med_image_path = './yolov5/train/small_obj/med_img_ture'
main(labels_path=labels_path, small_path=small_path, big_image_path=big_image_path, med_image_path=med_image_path)