B站:唐国强-轻松学pytorch实战课程跟训
up主提供的资料云盘连接:
https://pan.baidu.com/s/1TO2m6itlr_KLVXayE3gW8A?pwd=tmg1 提取码: tmg1
参考博文:CV 图像分类 - GTSRB, fine-tune resnet50-优快云博客
up主和博主使用的都是jupyterlab,我使用的是pycharm,所以代码容易出现报错,下面是我运行后的pycharm版本代码:
import time import matplotlib import numpy as np import pandas as pd # matplotlib.use('TkAgg') import matplotlib.pyplot as plt import cv2 import seaborn as sns import shutil import os import torch import torchvision import torchvision.transforms as transforms import torch.nn.functional as F import torchvision.utils as vutils from torchvision import utils from tqdm import tqdm from pylab import rcParams from matplotlib import rc from matplotlib.ticker import MaxNLocator from pathlib import Path from glob import glob from PIL import Image from collections import defaultdict from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, classification_report from torch.optim import lr_scheduler from torchvision.datasets import ImageFolder from torch.utils.data import DataLoader from torchvision import models from torch import nn, optim # 设置 Seaborn 的样式和调色板 sns.set(style='whitegrid', palette='muted', font_scale=1.2) HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"] sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE)) # 设置 matplotlib 的图表大小 matplotlib.rcParams['figure.figsize'] = (12, 8) # 设置随机种子 RANDOM_SEED = 666 np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) device = torch.device("cuda" if torch.cuda.is_available() else torch.device("cpu")) Train_Path = 'D:/PythonProject/GTSRB/GTSRB/Final_Training/Images/*' Test_Path = 'D:/PythonProject/GTSRB/GTSRB/Final_Test/Images/*' # 训练集文件夹 train_folders = sorted(glob(Train_Path)) # 测试集文件 test_files = sorted(glob(Test_Path)) len(train_folders) # 43 len(test_files) # 12631 # 函数:根据路径,加载图片 def load_image(img_path, resize=True): img = cv2.imread(img_path) ########### 直接读取图片,不进行RGB变换 if img is None: raise ValueError(f"Image not found at path: {img_path}") if resize: img = cv2.resize(img, (64,64)) # 改变图片尺寸大小 return img # 函数:显示图片 def show_img(img_path): img = load_image(img_path) # 调用函数 ############### 将BGR转换为RGB以正确显示颜色 img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.imshow(img_rgb) plt.axis('off') plt.show() # 确保显示图片 # 函数:显示一批图片(一个网格所包含的图片) # 函数:显示一批图片(一个网格所包含的图片) def show_imgs_grid(img_paths): """ img_paths : 很多图片的路径 """ images = [load_image(path) for path in img_paths] # 根据路径,读取一批图片 print("images length : ", len(images)) images = torch.as_tensor(images) # list类型转换为tensor类型 print("images shape : ", images.shape) images = images.permute(0, 3, 1, 2) # 维度换位 print("维度变换后的images shape : ", images.shape) grid_imgs = torchvision.utils.make_grid(images, nrow=8) # 将若干幅图像拼成一幅图像 plt.figure(figsize=(24, 12)) # 画布大小 print("grid_imgs shape : ", grid_imgs.shape) plt.imshow(grid_imgs.permute(1, 2, 0)) # 维度交换 plt.axis('off') # 关闭坐标轴 # 依次从43个文件夹中,从每个文件夹中,随机获取一张图片的路径 sample_images = [np.random.choice(glob(f'{file_name}/*.ppm')) for file_name in train_folders] # 显示第一张图片 # show_img(sample_images[0]) # 显示这批图片 # show_imgs_grid(sample_images) class_names = ['STOP', '禁止通行', '直行', '环岛行驶'] # 类别对应的文件夹序号: 00014, 00017, 00035, 00040 class_indices = [14, 17, 35, 40] # 新建目录,将原始的train数据集分割成:train, val, test, 比例是70%, 20%, 10% DATA_DIR = Path('New_Data_4_classes') DATASETS = ['train', 'val', 'test'] for dt in DATASETS: for cls in class_names: (DATA_DIR/dt/cls).mkdir(parents=True, exist_ok=True) # exist_ok为True,则在目标目录已存在的情况下不会触发FileExistsError异常 # 从原始数据集拷贝图片到目标文件夹 for i, cls_index in enumerate(class_indices): image_paths = np.array(glob(f'{train_folders[int(cls_index)]}/*.ppm')) # 标签对应的所有图片路径 class_name = class_names[i] # 标签 print(f'{class_name} : {len(image_paths)}') np.random.shuffle(image_paths) # 打乱图片路径 # 数据集切分,train : 70%, val : 20%, test : 10% # 本质上是索引切分 ds_split = np.split( image_paths, indices_or_sections=[int(0.7 * len(image_paths)), int(0.9 * len(image_paths))] ) # 拼接 dataset = zip(DATASETS, ds_split) for dt, img_pathes in dataset: print(f'{dt}, {len(img_pathes)}') for path in img_pathes: # 拷贝图片 shutil.copy(path, f'{DATA_DIR}/{dt}/{class_name}/') # 数据增强 mean_nums = [0.485, 0.456, 0.406] std_nums = [0.229, 0.224, 0.225] transform = { 'train': transforms.Compose([ transforms.RandomResizedCrop(size=256), # 随机裁剪 transforms.RandomRotation(degrees=15), # 随机旋转 transforms.RandomHorizontalFlip(), # 随机水平翻转 transforms.ToTensor(), # 转换为tensor ]), 'val': transforms.Compose([ transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), transforms.Normalize(mean_nums, std_nums) ]), 'test': transforms.Compose([ transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), transforms.Normalize(mean_nums, std_nums) ]) } # 定义数据加载器(ImageFolder假设所有的文件按文件夹保存,每个文件夹下存储同一个类别的图片,文件夹名为类名) Image_datasets = { d : ImageFolder(f'{DATA_DIR}/{d}', transform[d]) for d in DATASETS } # 批数据读取 data_loaders = { d : DataLoader(Image_datasets[d], batch_size=8, shuffle=True) for d in DATASETS } # 统计train, val, test 数据集大小 dataset_size = {d : len(Image_datasets[d]) for d in DATASETS} # 查看train的类别 class_names = Image_datasets['train'].classes # 可视化显示数据增强后的图片(注意:中文字符显示) from matplotlib.font_manager import FontProperties def imshow(inp, title=None): my_font = FontProperties(fname='SimHei.ttf', size=12) inp = inp.numpy().transpose((1,2,0)) # 转置 mean = np.array([mean_nums]) std = np.array([std_nums]) inp = std * inp + mean # 还原 inp = np.clip(inp, 0, 1) # 限制像素值在0~1之间 plt.imshow(inp) if title is not None: plt.title(title, fontproperties=my_font) plt.axis('off') # 获取一批数据 inputs, labels = next(iter(data_loaders['train'])) inputs = inputs.to('cpu') out = torchvision.utils.make_grid(inputs).to('cpu') imshow(out, title=[class_names[x.item()] for x in labels.to('cpu')]) plt.show() # 迁移学习 pretrained_model_path = 'D:/PythonProject/GTSRB/resnet50-19c8e357.pth' def create_model(n_classes, pretrained_path=None): model = models.resnet50(pretrained=False) # 下载预训练模型 # 冻结模型参数 for param in model.parameters(): param.requires_grad = False # 如果提供了预训练路径,则加载权重 if pretrained_path: # 检查文件是否存在,这里简单处理,实际应用中可能需要更健壮的错误处理 if torch.distributed.get_rank() == 0 or not torch.distributed.is_initialized(): state_dict = torch.load(pretrained_path, map_location=torch.device('cpu')) # 如果权重文件是在GPU上保存的,并且当前环境没有GPU,则需要映射到CPU # map_location=torch.device('cpu') 确保在任何情况下都能加载 model.load_state_dict(state_dict) # 全连接层输入特征 n_features = model.fc.in_features # 新的全连接层输入特征 model.fc = nn.Linear(n_features, n_classes) # 将模型移动到适当的设备(CPU或GPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") return model.to(device) # 创建模型对象 clf_model = create_model(len(class_names)) #训练函数 def train(model, data_loader, criterion, optimizer, device, scheduler, n_examples): model.train() train_loss = [] correct_pred = 0 # 判断正确的图片个数 for inputs, labels in data_loader: inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() # 梯度置零 outputs = model(inputs) # 输出 loss = criterion(outputs, labels) # 计算损失 _, preds = torch.max(outputs, dim=1) # 获取到概率最大值的索引 correct_pred += torch.sum(preds == labels) # 累计正确数 train_loss.append(loss.item()) # 累计损失 loss.backward() # 反向传播 optimizer.step() # 更新参数 scheduler.step() # 更新学习率 # 返回平均损失,平均准确率 return np.mean(train_loss), correct_pred.double() / n_examples #验证函数 def evaluation(model, data_loader, criterion, device, n_examples): model.eval() eval_loss = [] correct_pred = 0 with torch.no_grad(): for inputs, labels in data_loader: inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) # 输出 loss = criterion(outputs, labels) # 损失 _, preds = torch.max(outputs, dim=1) # 获取到概率最大值的索引 correct_pred += torch.sum(preds == labels) # 累计正确数 eval_loss.append(loss.item()) # 累计损失 return np.mean(eval_loss), correct_pred.double() / n_examples # 函数:开始训练 def train_model(model, data_loader, dataset_size, device, n_epochs=2): optimizer = optim.Adam(model.parameters(), lr=0.001) # 优化器 scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # 动态学习率 criterion = nn.CrossEntropyLoss().to(device) # 损失函数 # 假设最好的accuracy, history best_accuracy = 0.0 # 保存历史结果 history = defaultdict(list) # 构建一个默认value为list的字典 for epoch in range(n_epochs): print(f'\n-- Epoch : {epoch + 1} / {n_epochs}') train_loss, train_accuracy = train(model, data_loader['train'], criterion, optimizer, device, scheduler, dataset_size['train']) print(f'Train Loss : {train_loss}, Train accuracy : {train_accuracy}') val_loss, val_accuracy = evaluation(model, data_loader['val'], criterion, device, dataset_size['val']) print(f'Val loss : {val_loss}, val accuracy : {val_accuracy}') # 保存所有结果 # 修改为获取标量值 train_accuracy_scalar = train_accuracy.item() # 将张量转换为标量 train_loss_scalar = train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss # 如果 train_loss 是张量,则转换;否则保持不变 val_accuracy_scalar = val_accuracy.item() # 将张量转换为标量 val_loss_scalar = val_loss.item() if isinstance(val_loss, torch.Tensor) else val_loss # 如果 val_loss 是张量,则转换;否则保持不变 # 然后将这些标量值添加到 history 字典中 history['train_acc'].append(train_accuracy_scalar) history['train_loss'].append(train_loss_scalar) history['val_acc'].append(val_accuracy_scalar) history['val_loss'].append(val_loss_scalar) if val_accuracy > best_accuracy: # 保存最佳模型 torch.save(model.state_dict(), 'best_model_state_2.pkl') # 最好得分 best_accuracy = val_accuracy print(f'==== Best Accuracy : {best_accuracy}') # 加载模型 model.load_state_dict(torch.load("best_model_state_2.pkl")) return model, history start_time = time.time() # 获取开始时间 best_model, history = train_model(clf_model, data_loaders, dataset_size, device) end_time = time.time() # 获取结束时间 elapsed_time = end_time - start_time # 计算经过的时间 # 绘制loss、accuracy曲线 def plot_training_history(history): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) ax1.plot(history['train_loss'], label='train loss') ax1.plot(history['val_loss'], label='val loss') ax1.set_ylim([-0.05, 1.05]) ax1.legend() ax1.set_ylabel('Loss') ax1.set_xlabel('Epoch') ax2.plot(history['train_acc'], label='train acc') ax2.plot(history['val_acc'], label='val acc') ax2.set_ylim([-0.05, 1.05]) ax2.legend() ax2.set_ylabel('Accuracy') ax2.set_xlabel('Epoch') fig.suptitle('Training History') plt.show() plot_training_history(history) #预测结果可视化 def show_predictions(model, class_names, n_imgs=6): model.eval() images_handled = 0 plt.figure() with torch.no_grad(): my_font = FontProperties(fname='SimHei.ttf', size=12) for i, (inputs, labels) in enumerate(data_loaders['test']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, dim=1) for j in range(inputs.shape[0]): images_handled += 1 ax = plt.subplot(2, n_imgs // 2, images_handled) ax.set_title(f'predicted : {class_names[preds[j]]}', fontproperties=my_font) imshow(inputs.cpu().data[j]) ax.axis('off') if images_handled == n_imgs: return plt.show() show_predictions(best_model, class_names, n_imgs=8) #检测效果分类展示 def get_predictions(model, data_loaders): model.eval() predictions = [] # 预测值 real_values = [] # 真值 with torch.no_grad(): for inputs, labels in data_loaders: inputs = inputs.to(device) labels = labels.to(device) # 预测输出 outputs = model(inputs) # 获取概率最大值索引 _, preds = torch.max(outputs, dim=1) # 保存预测值和真值 predictions.extend(preds) real_values.extend(labels) # print(predictions) # print(real_values) # 类型转换 predictions = torch.as_tensor(predictions).cpu() real_values = torch.as_tensor(real_values).cpu() return predictions, real_values plt.show() y_pred, y_test = get_predictions(best_model, data_loaders['test']) print(classification_report(y_test, y_pred, target_names=class_names))