yolov5数据集小目标筛选

yuanjun0416

已于 2024-01-23 13:48:05 修改

阅读量877

点赞数 8

分类专栏：目标检测数据集处理文章标签： YOLO python

于 2023-12-22 20:02:21 首次发布

本文链接：https://blog.youkuaiyun.com/shilichangtin/article/details/135160078

版权

目标检测数据集处理专栏收录该内容

4 篇文章

订阅专栏

该文章详细描述了如何使用Python脚本在Yolov5框架下，根据目标的宽高比和图片尺寸，筛选出小目标图片并将其分为高清和半高清图片，存储在不同的文件夹中。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

代码实现功能如下：

筛选小目标, 只有一张图片中的某一标签对应的gt都是小目标才会被筛选出来
过滤高清图片, 只有 h>1324 or w>1324才会被当作高清图片 (可以自己设定)
过滤半高清图片, 在不满足2的基础上, 当hw > 10241024才会被过滤出来

import os
from PIL import Image
import numpy as np
import shutil
from tqdm import tqdm
"""
筛选是小目标的图片
小目标的定义是目标的宽高和图片的宽高比小于0.1
"""
def filter_small_object(labels_path, small_path):
    labels_path = labels_path
    label_list = os.listdir(labels_path)
    images_path = labels_path.replace('labels', 'images') #yolov5 images所在的位置
    small = small_path
    if not os.path.exists(small):
        os.mkdir(small)
    if not os.path.exists(os.path.join(small, 'images')):
        os.mkdir(os.path.join(small, 'images'))
    if not os.path.exists(os.path.join(small, 'labels')):
        os.mkdir(os.path.join(small, 'labels'))


    for list in tqdm(label_list, desc='遍历labels'): #遍历labels中的所有文件
        
        '''这部分可以不添加, yolov5的wh就是比例'''
        list_img = list.replace('txt', 'jpg')        
        # image_p = os.path.join(images_path, list_img) #yolov5 中的images中的每一张图片
        # try:
        #     Image.open(image_p).load()
        # except OSError:
        #     print(image_p)
        #     continue
        # image = np.array(Image.open(image_p))
        # img_h = image.shape[0]
        # img_w = image.shape[1]

        '''筛选某一标签的小目标, 一张图片中某一标签的目标都是小目标才会被筛选出来'''
        label_p = os.path.join(labels_path, list)
        with open(label_p, 'r') as f:
            label_list = f.readlines()
            label_list =[i.strip().split(" ") for i in label_list]
            label_list = [i for i in label_list if i[0] == '0']   #筛选出目标label不是'0'的所有目标
            if label_list == []:  # 如果没有标签'0'
                continue 
            ob_w = np.array(label_list)[:, 3] #yolov5 数据的格式 x_center, y_center, width, height 全部都是相对值， 相对于整幅图片的尺寸
            ob_h = np.array(label_list)[:, 4]
            ob_w = np.array([float(i) for i in ob_w])
            ob_h = np.array([float(i) for i in ob_h])
            # if float(ob_w) == 0 or float(ob_h) == 0:
            #     print(label_p)
            #     continue
            if all(ob_h < 0.1) and all(ob_w < 0.1):  # 如果图片中'0'标签的所有目标都是小目标
                shutil.copy(os.path.join(images_path, list_img), os.path.join(small, 'images'))
                shutil.copy(os.path.join(labels_path, list), os.path.join(small, 'labels'))


'''
过滤后的小目标图片中已经存在高清图片移动到新的文件夹中
高清图片:  width > 1324 or height > 1324, 移动到big_images_true文件夹中
半高清图片:  width*height>1024*1024 and (not(width > 1324 or height > 1324)), 移动到meduidmed_images文件夹中
'''
def filter_big_image(small_path, big_path, med_image_path):
    small_img_path = os.path.join(small_path, 'images')
    big_img_path = os.path.join(big_path, 'images')  # 存放高清图片的路径
    big_label_path = os.path.join(big_path, 'labels')  # 存放高清图片label的路径
    med_img_path = os.path.join(med_image_path, 'images')
    med_label_path = os.path.join(med_image_path, 'labels')
    if not os.path.exists(big_img_path):
        os.makedirs(big_img_path)
    if not os.path.exists(big_label_path):
        os.makedirs(big_label_path)
    if not os.path.exists(med_img_path):
        os.makedirs(med_img_path)
    if not os.path.exists(med_label_path):
        os.makedirs(med_label_path)
    small_list = os.listdir(small_img_path)
    for img_name in tqdm(small_list, desc='遍历small_obj'):
        image = np.array(Image.open(os.path.join(small_img_path, img_name)))
        h = image.shape[0]
        w = image.shape[1]
        if h > 1324 or w > 1324:
            shutil.move(os.path.join(small_img_path, img_name), big_img_path)
            shutil.move(os.path.join(small_path, 'labels', img_name.replace('jpg', 'txt')), big_label_path)
        elif w*h > 1024*1024:
            shutil.move(os.path.join(small_img_path, img_name), med_img_path)
            shutil.move(os.path.join(small_path, 'labels', img_name.replace('jpg', 'txt')), med_label_path)
            

"""
查看是不是还存在高清图片或者半高清图片
"""
def show_big_img(small_path):
    small_list = os.listdir(os.path.join(small_path, 'images'))
    for img_name in tqdm(small_list, desc='遍历small_obj'):
        image = np.array(Image.open(os.path.join(small_path, 'images', img_name)))
        h = image.shape[0]
        w = image.shape[1]
        if h*w > 1024*1024:
            print(img_name)

def main(labels_path, small_path, big_image_path, med_image_path):
    filter_small_object(labels_path, small_path)
    print('筛选完成')
    filter_big_image(small_path, big_image_path, med_image_path)
    print('筛选后的小目标图片中已经存在高清图片移动到新的文件夹中完成')
    show_big_img(small_path)
    
if __name__=='__main__':
    labels_path = './yolov5/train/labels' #yolov5 labels所在的位置
    small_path = './yolov5/train/small_obj'  # 过滤的小目标图片和labels保存位置
    big_image_path = './yolov5/train/small_obj/big_img_ture' #过滤后的小目标图片中已经存在的高清图片存放地址
    med_image_path = './yolov5/train/small_obj/med_img_ture' #过滤后的小目标图片中已经存在的半高清图片存放地址
    main(labels_path=labels_path, small_path=small_path, big_image_path=big_image_path, med_image_path=med_image_path)