1、数据类别数:将不同类别的图片放入不同的文件夹下:
2、生成标签列表
import warnings
warnings.filterwarnings('ignore')
import imghdr # 判断图片格式用的
import random
# import seaborn as sns
from time import time
import paddle.fluid as fluid
import paddle
import numpy as np
from PIL import Image
import os
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
# 图片目录
data_1 = 'F:/dataSet/IMG/1'
data_2 = 'F:/dataSet/IMG/2'
data_3 = 'F:/dataSet/IMG/3'
data_path = [data_1, data_2, data_3]
train_img_num = 0
test_img_num = 0
with open('train_data.list', 'w') as f_train: # 需要存放的文件
with open('test_data.list', 'w') as f_test:
for label,path in enumerate(data_path):
data_imgs = os.listdir(path)
for i in range(len(data_imgs)):
try:
img_path = os.path.join(path, data_imgs[i]) # 合成路径
img_type = imghdr.what(img_path) # 获取图片类型
if (img_type == 'jpeg')|(img_type == 'jpg'): # jpeg/jpg格式图片保存
img_arr = np.array(Image.open(img_path)) # 获取图片数据形式
sp = img_arr.shape
# print("通道数:",img_arr.shape[2])
if len(img_arr.shape) != 1: # 非彩色图不要
if i % 10 == 0:
test_img_num += 1
f_test.write(img_path + "\t" + str(label) + '\n')
else:
train_img_num += 1
# print(img_path + "\t" + str(label+1) + '\n')
f_train.write(img_path + "\t" + str(label) + '\n')
except:
pass
print('图像列表已生成。')
print(f'训练图片{train_img_num}张,测试图片{test_img_num}张。')