交通指示牌识别

这篇博客介绍了一个交通指示牌识别项目,主要涉及单分类和多分类问题,使用Python进行模型训练和评估。数据集包含5W多个样本,分为43个类别,重点识别四个类别。通过数据切分、加载、模型定义(包括两个模型)以及训练和验证流程,最终进行模型评估,包括混淆矩阵和单张图片预测。讨论了验证集准确度高于训练集的可能原因和解决方案。
部署运行你感兴趣的模型镜像

1. 背景说明

涉及单分类,多分类问题,共计5W多样本,43个类,以及一个标注文件(包括文件名、宽、高以及坐标)。其中每类图片数量不均等,图片尺寸也不尽相同。

数据源:German Traffic Sign Benchmarks

下载训练与测试数据目录如下:

这里只识别“STOP”、“禁止通行”、“直行”、“环岛行驶”四个类别。对应训练目录下的类别文件夹分别为00014, 00017, 00035, 00040。

2. 数据集切分

1. 将Final_training 目录按7:2:1切分为训练集、验证集、测试集三个文件夹,每个文件夹又分为“STOP”、“禁止通行”、“直行”、“环岛行驶”四个子目录。

11

import shutil
from pathlib import Path
from glob import glob
import numpy as np

def split_train_val_test_dataset(data_dir, data_sets, class_names, class_indices, train_folder):
    # 1. 创建对应目录
    for dt in data_sets:
        for cls in class_names:
            # exist_ok=True时,在目录已存在的情况下,不会触发FileExistsError异常
            (data_dir/dt/cls).mkdir(parents=True, exist_ok=True)

    # 2. 将原始数据集进行切分,并拷贝图片到目标文件夹
    for i, cls_index in enumerate(class_indices):
        img_paths = np.array(glob(f'{train_folder[int(cls_index)]}/*.ppm'))
        class_name = class_names[i]  # 标签
        print(f'{class_name}: {len(img_paths)}')
        np.random.shuffle(img_paths)   # 打乱图片路径
        # 对img_paths进行切分,本质上是索引切分,indices_or_sections定义切分点(0.7和0.9)
        ds_split = np.split(
            img_paths,
            indices_or_sections=[int(0.7*len(img_paths)), int(0.9*len(img_paths))]
        )
        dataset = zip(data_sets, ds_split)  # 拼接

        for dt, img_paths in dataset:
            print(f'\t{dt}, {len(img_paths)}')
            for path in img_paths:
                shutil.copy(path, f'{data_dir}/{dt}/{class_name}/')

2. 调用

def main():
    TRAIN_PATH = '../data/GTSRB_Final_Training_Images/GTSRB/Final_Training/Images/*'
    TEST_PATH = '../data/GTSRB_Final_Test_Images/GTSRB/Final_Test/Images/*'

    CLASS_NAMES = ['STOP', '禁止通行', '直行', '环岛行驶']
    CLASS_INDICES = [14, 17, 35, 40]  # 类别对应的文件夹序号:00014, 00017, 00035, 00040
    # 新建目录,将train数据集分割为train:val:test=7:2:1
    DATA_DIR = Path('../data/New_Data_4_classes')
    DATASETS = ['train', 'val', 'test']

    train_folder = sorted(glob(TRAIN_PATH))
    test_folder = sorted(glob(TEST_PATH))
    split_train_val_test_dataset(DATA_DIR, DATASETS, CLASS_NAMES, CLASS_INDICES, train_folder)

每个目录下的图片数

STOP: 780
    train, 546
    val, 156
    test, 78
禁止通行: 1110
    train, 777
    val, 222
    test, 111
直行: 1200
    train, 840
    val, 240
    test, 120
环岛行驶: 360
    train, 251
    val, 73
    test, 36

3. 数据加载

1. 格式转换

def get_transform():
    mean_nums = [0.485, 0.456, 0.406]
    std_nums = [0.229, 0.224, 0.225]

    transform = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(size=256),
            transforms.RandomRotation(degrees=15),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean_nums, std_nums)
        ]),
        'val': transforms.Compose([
            transforms.Resize(size=256),
            transforms.CenterCrop(size=224),
            transforms.ToTensor(),
            transforms.Normalize(mean_nums, std_nums)
        ]),
        'test': transforms.Compose([
            transforms.Resize(size=256),
            transforms.CenterCrop(size=224),
            transforms.ToTensor(),
            transforms.Normalize(mean_nums, std_nums)
        ])
    }
    return transform

2. 分批加载

def load_data(data_dir, data_sets):
    transform = get_transform()
    iamge_datasets = {
        d: ImageFolder(f'{data_dir}/{d}', transform[d]) for d in data_sets
    }
    data_loaders = {
        d: DataLoader(iamge_datasets[d], batch_size=8, shuffle=True, pin_memory=True) for d in data_sets
    }
    data_size = {
        d: len(iamge_datasets[d]) for d in data_sets
    }
    print(data_size)
    # {'train': 3154, 'val': 1238, 'test': 657}
    class_names = iamge_datasets['train'].classes
    print(class_names)
    # ['STOP', '环岛行驶', '直行', '禁止通行']
    return data_loaders, class_names

3. 调用

from pathlib import Path
def main():
    DATA_DIR = Path('../data/New_Data_4_classes')
    DATASETS = ['train', 'val', 'test']
    data_loaders, class_names = load_data(DATA_DIR, DATASETS)

4. 模型定义

4.1 模型1

def create_model(n_classes):
    # 1.加载预训练模型
    model = models.resnet50(pretrained=True)
    # 2.替换全连接层,有多少个类别,就有多少个输出
    model.fc = nn.Linear(model.fc.in_features, n_classes)
    return model

打印模型信息

 model = create_model(4)
 print(model)

最后一层

(fc): Linear(in_features=2048, out_features=4, bias=True)

4.2 模型2

def create_model_2(n_classes):
    # 1.加载预训练模型
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    # 2.替换全连接层
    model.fc = nn.Sequential(
        nn.Flatten(),
        nn.BatchNorm1d(2048),
        nn.Dropout(0.5),
        nn.Linear(2048, 512),
        nn.ReLU(),

        nn.BatchNorm1d(512),
        nn.Dropout(0.5),
        nn.Linear(512, n_classes),
        nn.LogSoftmax(dim=1)
    )
    return model

5. 模型训练

5.1 单轮训练

def train_epoch(model, train_loader, criterion, optimizer, sheduler):
    model.train()
    train_loss = 0.0
    for batch_id, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    sheduler.step()  # 更新学习率
    train_loss /= len(train_loader.dataset)
    return train_loss

5.2 单轮验证

def val_epoch(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            val_loss += criterion(output, target).item()
            _, pred = torch.max(output, dim=1)
            val_acc += torch.sum(pred == target).item()
    val_loss /= len(val_loader.dataset)
    val_acc /= len(val_loader.dataset)
    return val_loss, val_acc

5.3 主训练流程

def train(model, data_loader, criterion, optimizer, scheduler, epochs, model_path):
    print('{0:>20} | {1:>20} | {2:>20} | {3:>20} |'.format('Epoch', 'Training loss', 'Val loss', 'Val acc'))
    best_loss = np.inf
    for epoch in range(epochs):
        train_loss = train_epoch(model, data_loader['train'], criterion, optimizer, scheduler)
        val_loss, val_acc = val_epoch(model, data_loader['val'], criterion)
        print('{0:>20} | {1:>20} | {2:>20} | {3:>20.2f} |'.format(epoch, train_loss, val_loss, val_acc))

        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), model_path)
    return model

5.4 调用

from pathlib import Path
from torch.optim import lr_scheduler

def main():
    EPOCHS = 10
    BATCH_SIZE = 8
    DATA_DIR = Path('../data/New_Data_4_classes')
    DATASETS = ['train', 'val', 'test']
    MODEL_PATH = '../model/best_model.pkl'
    data_loaders, class_names = load_data(DATA_DIR, DATASETS, BATCH_SIZE)
    model = create_model(len(class_names))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # 动态学习率

    model = train(model, data_loaders, criterion, optimizer, scheduler, EPOCHS, MODEL_PATH)
    model.load_state_dict(torch.load(MODEL_PATH))

6. 模型评估

6.1 打印分析报告

from sklearn.metrics import confusion_matrix, classification_report
# 计算预测值和真实标签,统计各个指标
def get_predictions(model, data_loader, class_names):
    model.eval()
    y_pred_list = []
    y_test_list = []
    with torch.no_grad():
        for batch_id, (data, target) in enumerate(data_loader['test']):
            output = model(data)
            _, pred = torch.max(output, dim=1)
            y_pred_list.extend(pred)
            y_test_list.extend(target)
    y_pred_list = torch.as_tensor(y_pred_list).cpu()
    y_test_list = torch.as_tensor(y_test_list).cpu()
    print(classification_report(y_pred_list, y_test_list, target_names=class_names))
    return y_pred_list, y_test_list

6.2 绘制混淆矩阵

def plot_cm(y_test, y_pred, class_names):
    cm = confusion_matrix(y_test, y_pred)
    df_cm = pd.DataFrame(cm, index=class_names, columns=class_names)
    hmap = sns.heatmap(df_cm, annot=True)
    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=0, ha='right')
    plt.ylabel('True Label')
    plt.xlabel('Pred Label')
    plt.show()

6.3  单张图片预测

# 预测一张图片,计算各个类别的概率
def predict_proba(model, img_path):
    image = Image.open(img_path)
    transform = get_transform()
    image = transform['test'](image).unsqueeze(0)
    pred = model(image)
    prob = F.softmax(pred, dim=1)
    prob = prob.detach().cpu().numpy().flatten()
    return prob

img_path = '00018.ppm'
pred = predict_proba(model, img_path)
# [0.526681   0.04194336 0.2741005  0.15727507]

7. E&E

1. 为什么验证集准确度大于训练集准确度?

原因1. 数据集太小,造成训练集和测试集分布不均,从而训练集内部方差大于验证集,因而误差更大。

解决方法:

1. 扩充数据集;

2. 重新切分数据集,使其分布均匀。

原因2. 由dropout引起,它本质上能够确保测试准确性最好,优于训练集的准确性。

您可能感兴趣的与本文相关的镜像

Python3.10

Python3.10

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值