1. 背景说明
涉及单分类,多分类问题,共计5W多样本,43个类,以及一个标注文件(包括文件名、宽、高以及坐标)。其中每类图片数量不均等,图片尺寸也不尽相同。
数据源:German Traffic Sign Benchmarks

下载训练与测试数据目录如下:

这里只识别“STOP”、“禁止通行”、“直行”、“环岛行驶”四个类别。对应训练目录下的类别文件夹分别为00014, 00017, 00035, 00040。
2. 数据集切分
1. 将Final_training 目录按7:2:1切分为训练集、验证集、测试集三个文件夹,每个文件夹又分为“STOP”、“禁止通行”、“直行”、“环岛行驶”四个子目录。

11
import shutil
from pathlib import Path
from glob import glob
import numpy as np
def split_train_val_test_dataset(data_dir, data_sets, class_names, class_indices, train_folder):
# 1. 创建对应目录
for dt in data_sets:
for cls in class_names:
# exist_ok=True时,在目录已存在的情况下,不会触发FileExistsError异常
(data_dir/dt/cls).mkdir(parents=True, exist_ok=True)
# 2. 将原始数据集进行切分,并拷贝图片到目标文件夹
for i, cls_index in enumerate(class_indices):
img_paths = np.array(glob(f'{train_folder[int(cls_index)]}/*.ppm'))
class_name = class_names[i] # 标签
print(f'{class_name}: {len(img_paths)}')
np.random.shuffle(img_paths) # 打乱图片路径
# 对img_paths进行切分,本质上是索引切分,indices_or_sections定义切分点(0.7和0.9)
ds_split = np.split(
img_paths,
indices_or_sections=[int(0.7*len(img_paths)), int(0.9*len(img_paths))]
)
dataset = zip(data_sets, ds_split) # 拼接
for dt, img_paths in dataset:
print(f'\t{dt}, {len(img_paths)}')
for path in img_paths:
shutil.copy(path, f'{data_dir}/{dt}/{class_name}/')
2. 调用
def main():
TRAIN_PATH = '../data/GTSRB_Final_Training_Images/GTSRB/Final_Training/Images/*'
TEST_PATH = '../data/GTSRB_Final_Test_Images/GTSRB/Final_Test/Images/*'
CLASS_NAMES = ['STOP', '禁止通行', '直行', '环岛行驶']
CLASS_INDICES = [14, 17, 35, 40] # 类别对应的文件夹序号:00014, 00017, 00035, 00040
# 新建目录,将train数据集分割为train:val:test=7:2:1
DATA_DIR = Path('../data/New_Data_4_classes')
DATASETS = ['train', 'val', 'test']
train_folder = sorted(glob(TRAIN_PATH))
test_folder = sorted(glob(TEST_PATH))
split_train_val_test_dataset(DATA_DIR, DATASETS, CLASS_NAMES, CLASS_INDICES, train_folder)
每个目录下的图片数
STOP: 780
train, 546
val, 156
test, 78
禁止通行: 1110
train, 777
val, 222
test, 111
直行: 1200
train, 840
val, 240
test, 120
环岛行驶: 360
train, 251
val, 73
test, 36
3. 数据加载
1. 格式转换
def get_transform():
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]
transform = {
'train': transforms.Compose([
transforms.RandomResizedCrop(size=256),
transforms.RandomRotation(degrees=15),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)
]),
'val': transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)
]),
'test': transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)
])
}
return transform
2. 分批加载
def load_data(data_dir, data_sets):
transform = get_transform()
iamge_datasets = {
d: ImageFolder(f'{data_dir}/{d}', transform[d]) for d in data_sets
}
data_loaders = {
d: DataLoader(iamge_datasets[d], batch_size=8, shuffle=True, pin_memory=True) for d in data_sets
}
data_size = {
d: len(iamge_datasets[d]) for d in data_sets
}
print(data_size)
# {'train': 3154, 'val': 1238, 'test': 657}
class_names = iamge_datasets['train'].classes
print(class_names)
# ['STOP', '环岛行驶', '直行', '禁止通行']
return data_loaders, class_names
3. 调用
from pathlib import Path
def main():
DATA_DIR = Path('../data/New_Data_4_classes')
DATASETS = ['train', 'val', 'test']
data_loaders, class_names = load_data(DATA_DIR, DATASETS)
4. 模型定义
4.1 模型1
def create_model(n_classes):
# 1.加载预训练模型
model = models.resnet50(pretrained=True)
# 2.替换全连接层,有多少个类别,就有多少个输出
model.fc = nn.Linear(model.fc.in_features, n_classes)
return model
打印模型信息
model = create_model(4)
print(model)
最后一层
(fc): Linear(in_features=2048, out_features=4, bias=True)
4.2 模型2
def create_model_2(n_classes):
# 1.加载预训练模型
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
# 2.替换全连接层
model.fc = nn.Sequential(
nn.Flatten(),
nn.BatchNorm1d(2048),
nn.Dropout(0.5),
nn.Linear(2048, 512),
nn.ReLU(),
nn.BatchNorm1d(512),
nn.Dropout(0.5),
nn.Linear(512, n_classes),
nn.LogSoftmax(dim=1)
)
return model
5. 模型训练
5.1 单轮训练
def train_epoch(model, train_loader, criterion, optimizer, sheduler):
model.train()
train_loss = 0.0
for batch_id, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()
sheduler.step() # 更新学习率
train_loss /= len(train_loader.dataset)
return train_loss
5.2 单轮验证
def val_epoch(model, val_loader, criterion):
model.eval()
val_loss = 0.0
val_acc = 0.0
with torch.no_grad():
for data, target in val_loader:
output = model(data)
val_loss += criterion(output, target).item()
_, pred = torch.max(output, dim=1)
val_acc += torch.sum(pred == target).item()
val_loss /= len(val_loader.dataset)
val_acc /= len(val_loader.dataset)
return val_loss, val_acc
5.3 主训练流程
def train(model, data_loader, criterion, optimizer, scheduler, epochs, model_path):
print('{0:>20} | {1:>20} | {2:>20} | {3:>20} |'.format('Epoch', 'Training loss', 'Val loss', 'Val acc'))
best_loss = np.inf
for epoch in range(epochs):
train_loss = train_epoch(model, data_loader['train'], criterion, optimizer, scheduler)
val_loss, val_acc = val_epoch(model, data_loader['val'], criterion)
print('{0:>20} | {1:>20} | {2:>20} | {3:>20.2f} |'.format(epoch, train_loss, val_loss, val_acc))
if val_loss < best_loss:
best_loss = val_loss
torch.save(model.state_dict(), model_path)
return model
5.4 调用
from pathlib import Path
from torch.optim import lr_scheduler
def main():
EPOCHS = 10
BATCH_SIZE = 8
DATA_DIR = Path('../data/New_Data_4_classes')
DATASETS = ['train', 'val', 'test']
MODEL_PATH = '../model/best_model.pkl'
data_loaders, class_names = load_data(DATA_DIR, DATASETS, BATCH_SIZE)
model = create_model(len(class_names))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # 动态学习率
model = train(model, data_loaders, criterion, optimizer, scheduler, EPOCHS, MODEL_PATH)
model.load_state_dict(torch.load(MODEL_PATH))
6. 模型评估
6.1 打印分析报告
from sklearn.metrics import confusion_matrix, classification_report
# 计算预测值和真实标签,统计各个指标
def get_predictions(model, data_loader, class_names):
model.eval()
y_pred_list = []
y_test_list = []
with torch.no_grad():
for batch_id, (data, target) in enumerate(data_loader['test']):
output = model(data)
_, pred = torch.max(output, dim=1)
y_pred_list.extend(pred)
y_test_list.extend(target)
y_pred_list = torch.as_tensor(y_pred_list).cpu()
y_test_list = torch.as_tensor(y_test_list).cpu()
print(classification_report(y_pred_list, y_test_list, target_names=class_names))
return y_pred_list, y_test_list
6.2 绘制混淆矩阵
def plot_cm(y_test, y_pred, class_names):
cm = confusion_matrix(y_test, y_pred)
df_cm = pd.DataFrame(cm, index=class_names, columns=class_names)
hmap = sns.heatmap(df_cm, annot=True)
hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=0, ha='right')
plt.ylabel('True Label')
plt.xlabel('Pred Label')
plt.show()
6.3 单张图片预测
# 预测一张图片,计算各个类别的概率
def predict_proba(model, img_path):
image = Image.open(img_path)
transform = get_transform()
image = transform['test'](image).unsqueeze(0)
pred = model(image)
prob = F.softmax(pred, dim=1)
prob = prob.detach().cpu().numpy().flatten()
return prob
img_path = '00018.ppm'
pred = predict_proba(model, img_path)
# [0.526681 0.04194336 0.2741005 0.15727507]
7. E&E
1. 为什么验证集准确度大于训练集准确度?
原因1. 数据集太小,造成训练集和测试集分布不均,从而训练集内部方差大于验证集,因而误差更大。
解决方法:
1. 扩充数据集;
2. 重新切分数据集,使其分布均匀。
原因2. 由dropout引起,它本质上能够确保测试准确性最好,优于训练集的准确性。
这篇博客介绍了一个交通指示牌识别项目,主要涉及单分类和多分类问题,使用Python进行模型训练和评估。数据集包含5W多个样本,分为43个类别,重点识别四个类别。通过数据切分、加载、模型定义(包括两个模型)以及训练和验证流程,最终进行模型评估,包括混淆矩阵和单张图片预测。讨论了验证集准确度高于训练集的可能原因和解决方案。
400

被折叠的 条评论
为什么被折叠?



