torchvision 数据加载和可视化:ImageFolder、make_grid

本文介绍使用PyTorch框架下的torchvision工具包进行图像处理的方法。内容涵盖图像转换、数据集加载、图像分类及数据可视化等关键步骤,并提供实际代码示例。

torchvision 是 pytorch 框架适配的相当好用的工具包,它封装了最流行的数据集(torchvision.datasets)、模型(torchvision.models)和常用于 CV 的图像转换组件(torchvision.transforms)和其它工具:

有时间一定要通读一遍官方文档 TORCHVISION,内容不多,简明易懂,有助于上手。


以 notebook 的方式实践 torchvision
# 导入必要的包
import torch
import torchvision
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import models, datasets, transforms

%pylab inline  # 魔法方法用于显示 plt.show()
一、torchvision.transforms

transforms.Compose([ ... ]) 定义常用的图像转换流程,以字典的方式保存方便调用:

# 按照数据集的图像大小选择转换组件和参数
data_transforms = {
   
   
    # 训练数据集的转换组件
    'train': transforms.Compose([
        transforms.Resize(230),  # 图片自适应缩小(或放大)到最大边长为230的大小 == transforms.Scale(230)
        transforms.CenterCrop(224),  # 居中裁剪成 224×224的图
        transforms.RandomHorizontalFlip(p=0.5),  # 随机水平翻转图像,图像被翻转的概率默认为 p=0.5
        transforms.ToTensor(), 
import torch import torch.nn as nn from torchvision import transforms, datasets import json import os import torch.optim as optim from model import MobileNetV2 import numpy as np from torch.optim.lr_scheduler import CosineAnnealingLR def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # 简化数据预处理 - 避免多进程问题 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.8, 1.0)), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(10), transforms.ColorJitter(brightness=0.2, contrast=0.2), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } image_path = "./data_set/" train_dataset = datasets.ImageFolder(root=image_path + "train", transform=data_transform["train"]) train_num = len(train_dataset) validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform["val"]) val_num = len(validate_dataset) print(f"Train samples: {train_num}, Val samples: {val_num}") print(f"Class distribution - Train: {train_dataset.class_to_idx}") # 类别平衡处理 class_counts = [len([x for x in train_dataset.samples if x[1] == i]) for i in range(len(train_dataset.classes))] print(f"Class counts: {class_counts}") # 使用简单的数据加载器,避免多进程问题 batch_size = 16 # 减小batch_size train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, # 改用shuffle而不是sampler num_workers=0) # Windows设为0 validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=0) # Windows设为0 # 创建模型 net = MobileNetV2(num_classes=3) # 加载预训练权重 model_weight_path = "./mobilenet_v2-b0353104.pth" if os.path.exists(model_weight_path): pre_weights = torch.load(model_weight_path, map_location=device) # 更灵活的权重加载 pre_dict = {k: v for k, v in pre_weights.items() if k in net.state_dict() and net.state_dict()[k].shape == v.shape} net.load_state_dict(pre_dict, strict=False) print(f"Loaded pretrained weights") else: print("Pretrained weights not found, training from scratch") # 冻结特征层 for param in net.features.parameters(): param.requires_grad = False # 只训练分类器 for param in net.classifier.parameters(): param.requires_grad = True net.to(device) # 损失函数 - 简化版本,去掉权重 loss_function = nn.CrossEntropyLoss() # 优化器 optimizer = optim.Adam([ {'params': net.features.parameters(), 'lr': 0.00001}, {'params': net.classifier.parameters(), 'lr': 0.0001} ], weight_decay=0.001) # 学习率调度器 scheduler = CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6) # 早停机制 best_acc = 0.0 patience = 8 patience_counter = 0 save_path = './bestmodel.pth' print("Starting training...") for epoch in range(30): # 减少epoch数量 # train net.train() running_loss = 0.0 correct_train = 0 total_train = 0 for step, data in enumerate(train_loader, start=0): images, labels = data images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = net(images) loss = loss_function(outputs, labels) loss.backward() # 梯度裁剪 torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0) optimizer.step() running_loss += loss.item() # 计算训练准确率 _, predicted = torch.max(outputs.data, 1) total_train += labels.size(0) correct_train += (predicted == labels).sum().item() # 进度显示 if step % 10 == 0: # 减少输出频率 rate = (step + 1) / len(train_loader) a = "*" * int(rate * 30) b = "." * int((1 - rate) * 30) print("\rtrain: {:3.0f}%[{}->{}] loss: {:.4f}".format(rate * 100, a, b, loss.item()), end="") train_acc = 100 * correct_train / total_train avg_loss = running_loss / len(train_loader) print(f"\nEpoch {epoch+1}: Train Loss: {avg_loss:.4f}, Train Acc: {train_acc:.2f}%") # validate net.eval() correct_val = 0 total_val = 0 with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data val_images, val_labels = val_images.to(device), val_labels.to(device) outputs = net(val_images) _, predicted = torch.max(outputs.data, 1) total_val += val_labels.size(0) correct_val += (predicted == val_labels).sum().item() val_accurate = correct_val / total_val print(f"Val Accuracy: {val_accurate:.4f} ({correct_val}/{total_val})") # 学习率调度 scheduler.step() current_lr = optimizer.param_groups[0]['lr'] print(f"Current LR: {current_lr:.8f}") # 保存类别映射 cla_dict = {v: k for k, v in train_dataset.class_to_idx.items()} json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # 早停模型保存 if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) patience_counter = 0 print(f"✅ New best model saved with accuracy: {best_acc:.4f}") else: patience_counter += 1 print(f"⏳ No improvement for {patience_counter} epochs") # 渐进式解冻 if epoch == 10: print("Unfreezing last 3 feature layers...") for param in list(net.features.children())[-3:]: for p in param.parameters(): p.requires_grad = True # 早停检查 if patience_counter >= patience: print(f"🛑 Early stopping at epoch {epoch+1}") break print('-' * 50) print('🎉 Finished Training') print(f'🏆 Best validation accuracy: {best_acc:.4f}') if __name__ == '__main__': # 在Windows上必须加这个 torch.multiprocessing.freeze_support() main() 可以写个代码输出图片 这个更直观
最新发布
10-02
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score import torch import torch.nn as nn import torch.optim as optim import torchvision.transforms as transforms from torchvision.datasets import ImageFolder from torch.utils.data import DataLoader from sklearn.utils import shuffle from torch.utils.data import random_split # 定义VGG16模型 class VGG16(nn.Module): def __init__(self, num_classes=10): super(VGG16, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, num_classes) ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x # 参数设置 num_classes = 10 batch_size = 16 num_epochs = 100 lr = 0.0001 # 数据增强 transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) # 载入数据集 dataset = ImageFolder('D:\实习\dataset1', transform=transform) torch.manual_seed(42) # 划分训练集、验证集、测试集 train_size = int(0.7 * len(dataset)) val_size = int(0.2 * len(dataset)) test_size = len(dataset) - train_size - val_size train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(42)) # 创建数据加载器 train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_set, batch_size=batch_size) test_loader = DataLoader(test_set, batch_size=batch_size) # 初始化模 model = VGG16(num_classes) # 定义损失函数优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) # 设置设备(CPU或GPU) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 将模型移至设备 model.to(device) import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix import seaborn as sns # 训练过程中收集的准确率损失数据 train_losses = [] train_accuracies = [] val_losses = [] val_accuracies = [] # 训练 for epoch in range(num_epochs): running_loss = 0.0 correct = 0 total = 0 for inputs, labels in train_loader: inputs = inputs.to(device) labels = labels.to(device) # 正向传播 outputs = model(inputs) loss = criterion(outputs, labels) # 反向传播优化 optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted.to(device) == labels.to(device)).sum().item() # 打印训练集的损失率准确率 epoch_loss = running_loss / len(train_set) epoch_accuracy = 100 * correct / total # 收集训练验证集的准确率损失数据 train_losses.append(epoch_loss) train_accuracies.append(epoch_accuracy) print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%") # 验证 correct = 0 total = 0 total_loss = 0.0 with torch.no_grad(): for inputs, labels in val_loader: inputs = inputs.to(device) # 将输入数据转移到相同的设备上 labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted.to(device) == labels.to(device)).sum().item() loss = criterion(outputs, labels) total_loss += loss.item() val_accuracy = 100 * correct / total val_loss = total_loss / len(val_loader) val_losses.append(val_loss) val_accuracies.append(val_accuracy) print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%") # 测试 y_pred = [] y_true = [] test_loss = 0.0 predicted_labels = [] true_labels = [] with torch.no_grad(): for inputs, labels in test_loader: inputs = inputs.to(device) # 将输入数据转移到相同的设备上 labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) y_pred.extend(predicted.tolist()) y_true.extend(labels.tolist()) total += labels.size(0) correct += (predicted == labels).sum().item() test_loss += criterion(outputs, labels) # 收集预测标签真实标签,以便计算更多指标 predicted_labels.extend(predicted.cpu().numpy()) true_labels.extend(labels.cpu().numpy()) test_accuracy = (100 * correct / total) test_loss = test_loss / len(test_loader) print(f'Test Loss: {test_loss},Test Accuracy: {test_accuracy:.2f}%') # 计算混淆矩阵 conf_matrix = confusion_matrix(y_true, y_pred) # 可视化混淆矩阵 plt.figure(figsize=(10, 8)) sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=range(num_classes), yticklabels=range(num_classes)) plt.title('Confusion Matrix') plt.xlabel('Predicted') plt.ylabel('True') plt.savefig('E:/Confusion_Matrix.png') # 打印其他指标 print(f"Accuracy: {accuracy_score(y_true, y_pred)}") print(f"Recall: {recall_score(y_true, y_pred, average='macro')}") print(f"F1 Score: {f1_score(y_true, y_pred, average='macro')}") print(f"Precision: {precision_score(y_true, y_pred, average='macro')}") # 绘制训练验证集的准确率图表 plt.figure(figsize=(10, 5)) plt.subplot(1, 2, 1) plt.plot(train_accuracies, label='Train Accuracy') plt.plot(val_accuracies, label='Validation Accuracy') plt.title('Training and Validation Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() plt.grid(True) # 绘制训练验证集的损失图表 plt.subplot(1, 2, 2) plt.plot(train_losses, label='Train Loss') plt.plot(val_losses, label='Validation Loss') plt.title('Training and Validation Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.grid(True) plt.savefig('D:\实习\\train_vali\training_validation_loss_accuracy.png') 出现如下错误: --------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[17], line 84 77 transform = transforms.Compose([ 78 transforms.RandomResizedCrop(224), 79 transforms.RandomHorizontalFlip(), 80 transforms.ToTensor(), 81 ]) 83 # 载入数据集 ---> 84 dataset = ImageFolder('D:\实习\dataset1', transform=transform) 85 torch.manual_seed(42) 86 # 划分训练集、验证集、测试集 File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:309, in ImageFolder.__init__(self, root, transform, target_transform, loader, is_valid_file) 301 def __init__( 302 self, 303 root: str, (...) 307 is_valid_file: Optional[Callable[[str], bool]] = None, 308 ): --> 309 super().__init__( 310 root, 311 loader, 312 IMG_EXTENSIONS if is_valid_file is None else None, 313 transform=transform, 314 target_transform=target_transform, 315 is_valid_file=is_valid_file, 316 ) 317 self.imgs = self.samples File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:144, in DatasetFolder.__init__(self, root, loader, extensions, transform, target_transform, is_valid_file) 134 def __init__( 135 self, 136 root: str, (...) 141 is_valid_file: Optional[Callable[[str], bool]] = None, 142 ) -> None: 143 super().__init__(root, transform=transform, target_transform=target_transform) --> 144 classes, class_to_idx = self.find_classes(self.root) 145 samples = self.make_dataset(self.root, class_to_idx, extensions, is_valid_file) 147 self.loader = loader File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:218, in DatasetFolder.find_classes(self, directory) 191 def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]: 192 """Find the class folders in a dataset structured as follows:: 193 194 directory/ (...) 216 (Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index. 217 """ --> 218 return find_classes(directory) File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:42, in find_classes(directory) 40 classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir()) 41 if not classes: ---> 42 raise FileNotFoundError(f"Couldn't find any class folder in {directory}.") 44 class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} 45 return classes, class_to_idx FileNotFoundError: Couldn't find any class folder in D:\实习\dataset1.
06-23
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Skr.B

WUHOOO~

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值