goal:将40000张数据按照9:1的比例分割为训练集与测试集
代码大致如下
C:\Users\sun\Desktop\data devide_color
该文件夹下有10个分类目录:
import random
import shutil
import cv2
import glob
import os
x = [1,2,3,4,5,6]
# random.shuffle可以随机乱序列表中的元素
random.shuffle(x)
print(x)
up_path = r"C:\Users\sun\Desktop\data devide_color"
up_newpath = r"C:\Users\sun\Desktop\data devide_color"
train_path = r"C:\Users\sun\Desktop\b_classification\classification-pytorch-main\color_datasets\train\Anomaly"
test_path = r"C:\Users\sun\Desktop\b_classification\classification-pytorch-main\color_datasets\test\Anomaly"
list = os.listdir(up_path)
for lists in list:
path = os.path.join(up_path, lists)
list_pisture = os.listdir(path)
list_pisture_len = len(list_pisture)
train_picture = list_pisture_len*0.9
test_picture = list_pisture_len*0.1
list_train = list_pisture[0:int(train_picture)]
list_test = list_pisture[int(train_picture):list_pisture_len+1]
# 该部分用于训练集的生成
for list_trains in list_train:
oldpath = os.path.join(path, list_trains)
shutil.copy(oldpath,train_path)
# 该部分用于测试集的生成
for list_tests in list_test:
oldpath = os.path.join(path, list_tests)
shutil.copy(oldpath,test_path)