Cars196
import scipy.io
import os
import shutil
# 读取 cars_annos.mat 文件
mat = scipy.io.loadmat('cars/cars_annos.mat')
annotations = mat['annotations'][0]
class_names = mat['class_names'][0]
# 创建 train 和 test 目录
train_dir = 'cars/train'
test_dir = 'cars/test'
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
test_num = 0
train_num = 0
# 遍历每个 annotation
for anno in annotations:
img_path = anno[0][0].split('/')[-1] # car_ims/011204.jpg
# print(img_path)
class_id = int(anno[5][0][0]) # 获取类别ID
is_test = int(anno[6][0][0]) # 获取是否为训练集 (0 是训练集, 1 是测试集)
# 获取类别名称
class_name = class_names[class_id - 1][0].replace(' ', '_').replace('/', '_')
# 拼接新文件名 num_name
new_class_name = f"{str(class_id).zfill(3)}_{class_name}"
# 目标路径
if not is_test:
dst_dir = train_dir
train_num += 1
else:
dst_dir = test_dir
test_num += 1
# 复制图像到目标目录
src_img_path = os.path.join('cars/car_ims', img_path)
dst_dir = os.path.join(dst_dir, new_class_name)
os.makedirs(dst_dir, exist_ok=True)
# Copying cars/car_ims/016185.jpg to cars/test/196_smart_fortwo_Convertible_2012
print(f"Copying {src_img_path} to {dst_dir}")
shutil.copy(src_img_path, dst_dir)
print(f"Train set: {train_num}, Test set: {test_num}")
print("Dataset split completed!")
PETs
import shutil
import os
# 训练集和测试集标签文件
train_val_file = "./annotations/trainval.txt"
test_file = "./annotations/test.txt"
# 创建训练集和测试集文件夹
processed_image_dir = "images"
train_dir = "train"
test_dir = "test"
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
# 读取训练集文件名
with open(train_val_file, 'r') as f:
train_list = f.readlines()
# 读取测试集文件名
with open(test_file, 'r') as f:
test_list = f.readlines()
# 处理训练集图像
for line in train_list:
image_name = line.split()[0] + ".jpg"
parts = image_name.split('_')
category = "_".join(parts[:-1])
category_dir = os.path.join(train_dir, category)
os.makedirs(category_dir, exist_ok=True)
src_path = os.path.join(processed_image_dir, image_name)
dst_path = os.path.join(category_dir, image_name)
if os.path.exists(src_path):
shutil.copy(src_path, dst_path)
# print(f"copy {src_path} to {dst_path}")
else:
print(f"Image {src_path} not found")
# 处理测试集图像
for line in test_list:
image_name = line.split()[0] + ".jpg"
parts = image_name.split('_')
category = "_".join(parts[:-1])
category_dir = os.path.join(test_dir, category)
os.makedirs(category_dir, exist_ok=True)
src_path = os.path.join(processed_image_dir, image_name)
dst_path = os.path.join(category_dir, image_name)
if os.path.exists(src_path):
shutil.copy(src_path, dst_path)
else:
print(f"Image {src_path} not found")
Flowers102
随机分为train(0.8),test(0.2),并以class name命名。
import scipy.io as sio
import os
# class_names = [
# "pink primrose", "hard-leaved pocket orchid", "canterbury bells", "sweet pea",
# "english marigold", "tiger lily", "moon orchid", "bird of paradise", "monkshood",
# "globe thistle", "snapdragon", "colt's foot", "king protea", "spear thistle",
# "yellow iris", "globe-flower", "purple coneflower", "peruvian lily",
# "balloon flower", "giant white arum lily", "fire lily", "pincushion flower",
# "fritillary", "red ginger", "grape hyacinth", "corn poppy", "prince of wales feathers",
# "stemless gentian", "artichoke", "sweet william", "carnation", "garden phlox",
# "love in the mist", "mexican aster", "alpine sea holly", "ruby-lipped cattleya",
# "cape flower", "great masterwort", "siam tulip", "lenten rose", "barbeton daisy",
# "daffodil", "sword lily", "poinsettia", "bolero deep blue", "wallflower",
# "marigold", "buttercup", "oxeye daisy", "common dandelion", "petunia", "wild pansy",
# "primula", "sunflower", "pelargonium", "bishop of llandaff", "gaura", "geranium",
# "orange dahlia", "pink-yellow dahlia?", "cautleya spicata", "japanese anemone",
# "black-eyed susan", "silverbush", "californian poppy", "osteospermum", "spring crocus",
# "bearded iris", "windflower", "tree poppy", "gazania", "azalea", "water lily",
# "rose", "thorn apple", "morning glory", "passion flower", "lotus", "toad lily",
# "anthurium", "frangipani", "clematis", "hibiscus", "columbine", "desert-rose",
# "tree mallow", "magnolia", "cyclamen", "watercress", "canna lily", "hippeastrum",
# "bee balm", "pink quill", "foxglove", "bougainvillea", "camellia", "mallow",
# "mexican petunia", "bromelia", "blanket flower", "trumpet creeper", "blackberry lily"]
label2name = {'21': 'fire lily',
'3': 'canterbury bells',
'45': 'bolero deep blue',
'1': 'pink primrose',
'34': 'mexican aster',
'27': 'prince of wales feathers',
'7': 'moon orchid',
'16': 'globe-flower',
'25': 'grape hyacinth',
'26': 'corn poppy',
'79': 'toad lily',
'39': 'siam tulip',
'24': 'red ginger',
'67': 'spring crocus',
'35': 'alpine sea holly',
'32': 'garden phlox',
'10': 'globe thistle',
'6': 'tiger lily',
'93': 'ball moss',
'33': 'love in the mist',
'9': 'monkshood',
'102': 'blackberry lily',
'14': 'spear thistle',
'19': 'balloon flower',
'100': 'blanket flower',
'13': 'king protea',
'49': 'oxeye daisy',
'15': 'yellow iris',
'61': 'cautleya spicata',
'31': 'carnation',
'64': 'silverbush',
'68': 'bearded iris',
'63': 'black-eyed susan',
'69': 'windflower',
'62': 'japanese anemone',
'20': 'giant white arum lily',
'38': 'great masterwort',
'4': 'sweet pea',
'86': 'tree mallow',
'101': 'trumpet creeper',
'42': 'daffodil',
'22': 'pincushion flower',
'2': 'hard-leaved pocket orchid',
'54': 'sunflower',
'66': 'osteospermum',
'70': 'tree poppy',
'85': 'desert-rose',
'99': 'bromelia',
'87': 'magnolia',
'5': 'english marigold',
'92': 'bee balm',
'28': 'stemless gentian',
'97': 'mallow',
'57': 'gaura',
'40': 'lenten rose',
'47': 'marigold',
'59': 'orange dahlia',
'48': 'buttercup',
'55': 'pelargonium',
'36': 'ruby-lipped cattleya',
'91': 'hippeastrum',
'29': 'artichoke',
'71': 'gazania',
'90': 'canna lily',
'18': 'peruvian lily',
'98': 'mexican petunia',
'8': 'bird of paradise',
'30': 'sweet william',
'17': 'purple coneflower',
'52': 'wild pansy',
'84': 'columbine',
'12': "colt's foot",
'11': 'snapdragon',
'96': 'camellia',
'23': 'fritillary',
'50': 'common dandelion',
'44': 'poinsettia',
'53': 'primula',
'72': 'azalea',
'65': 'californian poppy',
'80': 'anthurium',
'76': 'morning glory',
'37': 'cape flower',
'56': 'bishop of llandaff',
'60': 'pink-yellow dahlia',
'82': 'clematis',
'58': 'geranium',
'75': 'thorn apple',
'41': 'barbeton daisy',
'95': 'bougainvillea',
'43': 'sword lily',
'83': 'hibiscus',
'78': 'lotus lotus',
'88': 'cyclamen',
'94': 'foxglove',
'81': 'frangipani',
'74': 'rose',
'89': 'watercress',
'73': 'water lily',
'46': 'wallflower',
'77': 'passion flower',
'51': 'petunia'}
# print(len(label2name))
# 定义 .mat 文件的路径
mat_file_path = '/grp01/cs_hszhao/cs002u03/dataset/Flower102/imagelabels.mat'
# 加载 .mat 文件
mat_data = sio.loadmat(mat_file_path)
print(mat_data)
# 提取图像标签
image_labels = mat_data.get('labels', None) # 使用 'labels' 作为键来访问数据
# 提取图像标签(MATLAB 数据通常是二维数组)
image_labels = image_labels[0] # 提取为一维数组
# 输出前 10 个标签查看
print("前 10 个图像标签:", image_labels[:10])
# 输出标签信息
print(f"标签总数: {len(image_labels)}")
print(f"最小标签值: {min(image_labels)}")
print(f"最大标签值: {max(image_labels)}")
print("type: ", type(image_labels[0])) # <class 'numpy.uint8'>
# 将图像编号和标签以字典形式保存
image_label_dict = {f"image_{i+1:05d}.jpg": str(label) for i, label in enumerate(image_labels)}
label_image_dict = {str(label): [] for label in set(image_labels)}
for img_name, label in image_label_dict.items():
label_image_dict[label].append(img_name) # ['image_06734.jpg', 'image_06735.jpg',...]
# randomly split into train(0.8) and test(0.2) and save to file named by category name
import random
import shutil
import os
random.seed(0)
root = "/grp01/cs_hszhao/cs002u03/dataset/Flower102/jpg"
train_root = "/grp01/cs_hszhao/cs002u03/dataset/Flower102/train"
test_root = "/grp01/cs_hszhao/cs002u03/dataset/Flower102/test"
os.makedirs(train_root, exist_ok=True)
os.makedirs(test_root, exist_ok=True)
for label, images in label_image_dict.items():
random.shuffle(images)
train_num = int(len(images) * 0.8)
train_images = images[:train_num]
test_images = images[train_num:]
train_root_perC = os.path.join(train_root, f"{label.zfill(2)}_{label2name[label].replace(' ', '_')}")
test_root_perC = os.path.join(test_root, f"{label.zfill(2)}_{label2name[label].replace(' ', '_')}")
os.makedirs(train_root_perC, exist_ok=True)
os.makedirs(test_root_perC, exist_ok=True)
for img in train_images:
shutil.copy(os.path.join(root, img), os.path.join(train_root_perC, img))
for img in test_images:
shutil.copy(os.path.join(root, img), os.path.join(test_root_perC, img))
#######################################################
Caltech101
去除google background,留下101个类别。
import os
import shutil
import random
random.seed(0)
src_root = "/grp01/cs_hszhao/cs002u03/dataset/caltech-101/101_ObjectCategories"
dst_root = "/grp01/cs_hszhao/cs002u03/dataset/caltech-101/organize"
train_root = os.path.join(dst_root, "train")
test_root = os.path.join(dst_root, "test")
os.makedirs(train_root, exist_ok=True)
os.makedirs(test_root, exist_ok=True)
class_names = os.listdir(src_root)
for class_name in class_names:
img_names = os.listdir(os.path.join(src_root, class_name))
img_names = sorted(img_names)
random.shuffle(img_names)
train_length = 30
train_paths = img_names[:train_length]
test_paths = img_names[train_length:]
os.makedirs(os.path.join(train_root, class_name), exist_ok=True)
os.makedirs(os.path.join(test_root, class_name), exist_ok=True)
for img_name in train_paths:
src_path = os.path.join(src_root, class_name, img_name)
dst_path = os.path.join(train_root, class_name, img_name)
shutil.copy(src_path, dst_path)
for img_name in test_paths:
src_path = os.path.join(src_root, class_name, img_name)
dst_path = os.path.join(test_root, class_name, img_name)
shutil.copy(src_path, dst_path)
CUB
import os
import shutil
# 设置数据集路径
data_dir = "CUB_200_2011"
images_dir = os.path.join(data_dir, "images")
train_dir = os.path.join(data_dir, "train")
test_dir = os.path.join(data_dir, "test")
# 创建输出目录
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
# 读取图像文件和训练/测试分割信息
with open(os.path.join(data_dir, "images.txt"), "r") as f:
images = [line.strip().split(" ")[1] for line in f.readlines()]
with open(os.path.join(data_dir, "train_test_split.txt"), "r") as f:
train_test_split = [int(line.strip().split(" ")[1]) for line in f.readlines()]
# 遍历所有图像并按训练/测试分配到对应的文件夹中
for img, is_train in zip(images, train_test_split):
source = os.path.join(images_dir, img)
target_folder = train_dir if is_train == 1 else test_dir
# 创建对应的类别文件夹
category_folder = os.path.join(target_folder, img.split("/")[0])
os.makedirs(category_folder, exist_ok=True)
# 拷贝图像到目标文件夹
shutil.copy(source, os.path.join(target_folder, img))
print("数据集整理完成!")
Cifar100
import os
import pickle
import numpy as np
from PIL import Image
# 定义源数据集文件路径
cifar_dir = 'cifar-100-python' # 解压后的 CIFAR-100 数据集文件夹
train_file = os.path.join(cifar_dir, 'train')
test_file = os.path.join(cifar_dir, 'test')
meta_file = os.path.join(cifar_dir, 'meta')
# 定义目标路径
output_dir = 'cifar-100-python/data' # 将整理后的数据集存储到这个目录
train_output_dir = os.path.join(output_dir, 'train')
test_output_dir = os.path.join(output_dir, 'test')
# 检查输出文件夹,不存在则创建
os.makedirs(train_output_dir, exist_ok=True)
os.makedirs(test_output_dir, exist_ok=True)
# 加载 CIFAR-100 的元数据(类别名称)
with open(meta_file, 'rb') as f:
meta = pickle.load(f, encoding='bytes')
fine_label_names = meta[b'fine_label_names'] # CIFAR-100 中的细粒度类别名称
fine_label_names = [name.decode('utf-8') for name in fine_label_names]
# 辅助函数:将图像数据保存为 PNG 格式
def save_image(image_array, filename):
image = Image.fromarray(image_array) # 将 numpy 数组转换为图像
image.save(filename) # 保存图像文件
# 辅助函数:解码 CIFAR-100 数据并保存图像
def decode_and_save(data_file, output_dir):
with open(data_file, 'rb') as f:
data_dict = pickle.load(f, encoding='bytes')
# 提取数据和标签
images = data_dict[b'data']
labels = data_dict[b'fine_labels']
filenames = data_dict[b'filenames']
# 遍历所有图像
for i in range(len(images)):
# 获取图像数据,并转换为 32x32x3 的格式
img_array = np.reshape(images[i], (3, 32, 32)).transpose(1, 2, 0)
label = labels[i]
class_name = fine_label_names[label] # 根据标签获取类别名称
filename = filenames[i].decode('utf-8') # 获取文件名并解码
# 创建以类别命名的文件夹
class_dir = os.path.join(output_dir, class_name)
os.makedirs(class_dir, exist_ok=True)
# 保存图像到类别文件夹中
save_image(img_array, os.path.join(class_dir, filename))
# 处理训练集和测试集
print("开始处理训练集...")
decode_and_save(train_file, train_output_dir)
print("开始处理测试集...")
decode_and_save(test_file, test_output_dir)
print("CIFAR-100 数据集整理完成!")
IN200-S
import os
import shutil
import random
random.seed(0)
# 原始 ImageNet 数据集根目录
source_dir = '/grp01/cs_hszhao/cs002u03/dataset/ImageNet-1k/train'
source_val_dir = '/grp01/cs_hszhao/cs002u03/dataset/ImageNet-1k/val_1k'
train_dest_dir = '/grp01/cs_hszhao/cs002u03/dataset/IN200_S/train'
val_dest_dir = '/grp01/cs_hszhao/cs002u03/dataset/IN200_S/val'
num_images_per_class = 100
num_classes = 200
if not os.path.exists(train_dest_dir):
os.makedirs(train_dest_dir)
# 获取所有类别(子文件夹名称)
all_classes = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]
# 随机选择 200 个类别
selected_classes = random.sample(all_classes, num_classes)
# 遍历每个选择的类别
for class_name in selected_classes:
# 获取该类别下的所有图片路径
class_dir = os.path.join(source_dir, class_name)
all_images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
# 随机选择 100 张图片
selected_images = random.sample(all_images, num_images_per_class)
# 创建目标类别文件夹
train_class_dir = os.path.join(train_dest_dir, class_name)
val_class_dir = os.path.join(val_dest_dir, class_name)
os.makedirs(train_class_dir, exist_ok=True)
os.makedirs(val_class_dir, exist_ok=True)
# 将选择的图片移动到目标文件夹中
for image_name in selected_images:
source_image_path = os.path.join(class_dir, image_name)
target_image_path = os.path.join(train_class_dir, image_name)
shutil.copy(source_image_path, target_image_path)
val_images = os.listdir(os.path.join(source_val_dir, class_name))
for image_name in val_images:
source_image_path = os.path.join(source_val_dir, class_name, image_name)
target_image_path = os.path.join(val_dest_dir, class_name, image_name)
shutil.copy(source_image_path, target_image_path)
print(f'Copied {num_images_per_class} images for class {class_name} to {train_class_dir}')
print(f'Successfully copied {num_classes} classes with {num_images_per_class} images each to {source_dir}.')
IN100-S
生成train_100.json
wnids_IN100 = ['n01498041', 'n01514859', 'n01582220', 'n01608432', 'n01616318',
'n01443537', 'n01776313', 'n01806567', 'n01833805', 'n01882714',
'n01910747', 'n01944390', 'n01985128', 'n02007558', 'n02071294',
'n02085620', 'n02114855', 'n02123045', 'n02128385', 'n02129165',
'n02129604', 'n02165456', 'n02190166', 'n02219486', 'n02226429',
'n02279972', 'n02317335', 'n02326432', 'n02342885', 'n02363005',
'n02391049', 'n02395406', 'n02403003', 'n02422699', 'n02442845',
'n02444819', 'n02480855', 'n02510455', 'n02640242', 'n02672831',
'n02687172', 'n02701002', 'n02730930', 'n02769748', 'n02782093',
'n02787622', 'n02793495', 'n02799071', 'n02802426', 'n02814860',
'n02840245', 'n02906734', 'n02948072', 'n02980441', 'n02999410',
'n03014705', 'n03028079', 'n03032252', 'n03125729', 'n03160309',
'n03179701', 'n03220513', 'n03249569', 'n03291819', 'n03384352',
'n03388043', 'n03450230', 'n03481172', 'n03594734', 'n03594945',
'n03627232', 'n03642806', 'n03649909', 'n03661043', 'n03676483',
'n03724870', 'n03733281', 'n03759954', 'n03761084', 'n03773504',
'n03804744', 'n03916031', 'n03938244', 'n04004767', 'n04026417',
'n04090263', 'n04133789', 'n04153751', 'n04296562', 'n04330267',
'n04371774', 'n04404412', 'n04465501', 'n04485082', 'n04507155',
'n04536866', 'n04579432', 'n04606251', 'n07714990', 'n07745940']
IN100_words = ['stingray', 'hen', 'magpie', 'kite', 'vulture',
'goldfish', 'tick', 'quail', 'hummingbird', 'koala',
'jellyfish', 'snail', 'crawfish', 'flamingo', 'orca',
'chihuahua', 'coyote', 'tabby', 'leopard', 'lion',
'tiger','ladybug', 'fly' , 'ant', 'grasshopper',
'monarch', 'starfish', 'hare', 'hamster', 'beaver',
'zebra', 'pig', 'ox', 'impala', 'mink',
'otter', 'gorilla', 'panda', 'sturgeon', 'accordion',
'carrier', 'ambulance', 'apron', 'backpack', 'balloon',
'banjo','barn','baseball', 'basketball', 'beacon',
'binder', 'broom', 'candle', 'castle', 'chain',
'chest', 'church', 'cinema', 'cradle', 'dam',
'desk', 'dome', 'drum','envelope', 'forklift',
'fountain', 'gown', 'hammer','jean', 'jeep',
'knot', 'laptop', 'mower', 'library','lipstick',
'mask', 'maze', 'microphone','microwave','missile',
'nail', 'perfume','pillow','printer','purse',
'rifle', 'sandal', 'screw','stage','stove',
'swing','television','tractor','tripod','umbrella',
'violin','whistle','wreck', 'broccoli', 'strawberry'
]
wnids_IN100, IN100_words = zip(*sorted(zip(wnids_IN100, IN100_words)))
categories = []
for index, (wnid, word) in enumerate(sorted(zip(wnids_IN100, IN100_words))):
categories.append((index, wnid, word))
print(categories[:5])
# generate train_100.json
root_dir = "/mnt/petrelfs/share/imagenet/images/train"
import os
import json
data = []
for item in categories:
index, wnid, word = item
category_dir = os.path.join(root_dir, wnid)
images = os.listdir(category_dir)[:200]
for image in images:
data.append({
"image_file": os.path.join(wnid, image),
"label": index,
"word": word
})
with open("/mnt/petrelfs/yangshuo/IP-Adapter-main/data/ImageNet-1K/train_100.json", "w") as f:
json.dump(data, f)
生成train文件夹,将train_100.json中的图片移动到train文件夹中
import json
import shutil
import os
root = "/grp01/cs_hszhao/cs002u03/dataset/IN100/train"
json_file = json.load(open('/grp01/cs_hszhao/cs002u03/dataset/IN100_sub/train_100.json'))
# print(json_file[:5])
for item in json_file:
category = item['image_file'].split('/')[0]
image_path = os.path.join(root, item['image_file'])
os.makedirs('/grp01/cs_hszhao/cs002u03/dataset/IN100_sub/train/' + category, exist_ok=True)
shutil.copy(image_path, '/grp01/cs_hszhao/cs002u03/dataset/IN100_sub/train/' + category)
print('done')
生成val文件夹,并将图片移动到val文件夹中
import os
import shutil
wnids = ['n01498041', 'n01514859', 'n01582220', 'n01608432', 'n01616318',
'n01443537', 'n01776313', 'n01806567', 'n01833805', 'n01882714',
'n01910747', 'n01944390', 'n01985128', 'n02007558', 'n02071294',
'n02085620', 'n02114855', 'n02123045', 'n02128385', 'n02129165',
'n02129604', 'n02165456', 'n02190166', 'n02219486', 'n02226429',
'n02279972', 'n02317335', 'n02326432', 'n02342885', 'n02363005',
'n02391049', 'n02395406', 'n02403003', 'n02422699', 'n02442845',
'n02444819', 'n02480855', 'n02510455', 'n02640242', 'n02672831',
'n02687172', 'n02701002', 'n02730930', 'n02769748', 'n02782093',
'n02787622', 'n02793495', 'n02799071', 'n02802426', 'n02814860',
'n02840245', 'n02906734', 'n02948072', 'n02980441', 'n02999410',
'n03014705', 'n03028079', 'n03032252', 'n03125729', 'n03160309',
'n03179701', 'n03220513', 'n03249569', 'n03291819', 'n03384352',
'n03388043', 'n03450230', 'n03481172', 'n03594734', 'n03594945',
'n03627232', 'n03642806', 'n03649909', 'n03661043', 'n03676483',
'n03724870', 'n03733281', 'n03759954', 'n03761084', 'n03773504',
'n03804744', 'n03916031', 'n03938244', 'n04004767', 'n04026417',
'n04090263', 'n04133789', 'n04153751', 'n04296562', 'n04330267',
'n04371774', 'n04404412', 'n04465501', 'n04485082', 'n04507155',
'n04536866', 'n04579432', 'n04606251', 'n07714990', 'n07745940']
root = "/grp01/cs_hszhao/cs002u03/dataset/ImageNet-1k/val_1k"
output_path = "/grp01/cs_hszhao/cs002u03/dataset/IN100/val"
for wnid in wnids:
output_dir = os.path.join(output_path, wnid)
if os.path.isdir(output_dir):
pass
else:
os.makedirs(output_dir, exist_ok=True)
filenames = os.listdir(os.path.join(root, wnid))
for filename in filenames:
shutil.copy(os.path.join(os.path.join(root, wnid), filename), os.path.join(output_dir, filename))
MedMNIST
import os
import numpy as np
from PIL import Image
# 1. 加载 npz 文件
# data = np.load('breastmnist_224.npz')
data = np.load("pathmnist_224.npz")
# data = np.load("organsmnist_224.npz")
# 2. 提取训练、验证和测试数据
train_images, train_labels = data['train_images'], data['train_labels']
val_images, val_labels = data['val_images'], data['val_labels']
test_images, test_labels = data['test_images'], data['test_labels']
# 3. 定义保存图像的根目录
# output_dir = "MedMnist/breastmnist_224"
output_dir = 'MedMnist/pathmnist_224'
# output_dir = 'MedMnist/organsmnist_224'
os.makedirs(output_dir, exist_ok=True)
# 4. 创建不同数据集(train, val, test)对应的保存目录
data_splits = {
# 'train': (train_images, train_labels),
'val': (val_images, val_labels),
'test': (test_images, test_labels)
}
# 5. 将图像按照标签保存到对应目录
for split_name, (images, labels) in data_splits.items():
# 创建数据集对应的根目录 (如 train/, val/, test/)
split_dir = os.path.join(output_dir, split_name)
os.makedirs(split_dir, exist_ok=True)
# 遍历图像和标签,将每张图像保存到相应的标签目录中
for idx, (image, label) in enumerate(zip(images, labels)):
# 创建以标签为名称的子目录 (如 train/0/, train/1/)
label_dir = os.path.join(split_dir, str(int(label)))
os.makedirs(label_dir, exist_ok=True)
# 构建图像的保存路径 (如 train/0/00001.png)
image_filename = os.path.join(label_dir, f"{idx:05d}.png")
# if len(image.shape) == 2:
# image = np.expand_dims(image, axis=-1)
if image.dtype != np.uint8:
image = image.astype(np.uint8)
# 如果图像有多个通道,则需要转换为 PIL 格式
pil_image = Image.fromarray(image)
# 保存图像
pil_image.save(image_filename)
print(f"{split_name} images saved to {split_dir}")
print("All images have been saved successfully!")

被折叠的 条评论
为什么被折叠?



