1. 数据集简介
CIFAR10数据集共有6W张彩色图像,图像大小是32*32*3的,共计10个类,每类6K张图片。
其中训练集5W张,构成了5个训练批,每一批1W张,但一个训练批中的各类图像并不一定数量相同,总的来看训练集,每一类都有5K张;测试集1W张单独构成一批,其来自10个分类,每类随机取1K张。

2. 数据加载
2.1 数据集下载
只下载一次, 批量迭代读取
def load_data(batch_size):
# 1. 构建数据转换器,进行数据增强
transform = get_transform()
# 2. 下载数据集
train_set = datasets.CIFAR10(root='../data/', train=True, download=True, transform=transform)
test_set = datasets.CIFAR10(root='../data/', train=False, download=True, transform=transform)
# 3. 生成数据迭代器
if torch.cuda.is_available():
# 使用GPU时,需要设置num_workers、pin_memory
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
else: # 使用cpu
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, pin_memory=True)
return train_loader, test_loader
2.2 数据增强
定义数据转换器
def get_transform():
transform1 = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),
(0.5, 0.5, 0.5)) # 归一化[-1,1]
])
transform2 = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
transforms.RandomHorizontalFlip(0.3),
transforms.RandomVerticalFlip(0.3),
transforms.RandomRotation(10),
transforms.ColorJitter(0.25, 0.25, 0.25, 0.25)
])
return transform1
2.3 分割验证集
数据加载,从训练集中分割出验证集(占比30%), 根据索引进行采样
from torch.utils.data.sampler import SubsetRandomSampler
# 从训练集中分割出验证集(占比30%), 根据索引进行采样
val_ratio=0.3
train_size = len(train_set)
val_size = int(np.floor(train_size * val_ratio)) # 验证集的数量
indices = list(range(train_size)) # 训练集的索引
np.random.shuffle(indices)
train_indices, val_indices = indices[val_size:], indices[:val_size]
# 获取训练集和验证集的索引
train_sample = SubsetRandomSampler(train_indices)
val_sanple = SubsetRandomSampler(val_indices)
train_loader = DataLoader(train_set, sampler=train_sample, batch_size=batch_size, pin_memory=True)
val_loader = DataLoader(train_set, sampler=val_sanple, batch_size=batch_size, pin_memory=True)
print(len(train_loader.dataset))
# 50000
# 注意:采样后应使用.sampler属性计算长度!!!!
print(len(train_loader.sampler))
# 35000
print(len(val_loader.dataset))
# 50000
print(len(val_loader.sampler))
# 15000
11
3. 模型构建
定义网络模型,注意模型网络结构设计,很重要!!!决定最终模型效果。
其中:feature map特征图尺寸的计算公式:
((原图片尺寸 - 卷积核尺寸) / 步长) + 1
# 这里原图片尺寸为32
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 卷积层1:32*32*3 --> 28*28*6 其中(32-5)/1+1=28
self.conv1 = nn.Conv2d(3, 6, 5) # input:3, output:6, kernel:5
# 池化层:可复用 28*28*6 --> 14*14*6 其中28/2=14
self.pool = nn.MaxPool2d(2, 2) # kernel:2, stride:2
# 卷积层2:14*14*6 --> 10*10*6 其中(14-5)/1+1=10
self.conv2 = nn.Conv2d(6, 16, 5) # input:6, output:16, kernel:5
# 全连接层1: 5*5*16 --> 120
self.fc1 = nn.Linear(16*5*5, 120)
# 全连接层2: 120 --> 84
self.fc2 = nn.Linear(120, 84)
# 全连接层3: 84 --> 10
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# 32*32*3 --> 28*28*6 --> 14*14*6
x = self.pool(F.relu(self.conv1(x)))
# 14*14*6 --> 10*10*16 --> 5*5*16
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 5*5*16) # 拉平
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
4. 模型训练
定义模型训练流程并保存模型
def train(model, train_loader, criterion, optimizer, epochs, model_path):
model.train()
for epoch in range(epochs):
train_loss = 0.0
for batch_id, (images, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader.dataset)
print("Epoch: {0}, Loss: {1: .4f}".format(epoch+1, train_loss))
# 保存模型
torch.save(model.state_dict(), model_path)
5. 模型评估
5.1 测试集评估
评估模型在测试集上的准确率
def test_eval(model, test_loader):
model.eval()
# 累计预测正确的样本数
correct = 0.0
with torch.no_grad():
for batch_id, (data, target) in enumerate(test_loader):
output = model(data)
_, pred = torch.max(output, dim=1)
correct += (pred == target).sum().item()
correct /= len(test_loader.dataset)
print('在测试集(1W张图片)上的准确率: {:.4f}%'.format(correct*100))
输出
在测试集(1W张图片)上的准确率: 62.9300%
5.2 评估各个类别预测的准确率
def all_class_acc(model, test_loader, batch_size, target_names):
model.eval()
# 记录每个类别预测正确的个数
class_correct = list(0. for i in range(10))
# 记录每个类别的样本量
total = list(0. for i in range(10))
with torch.no_grad():
for (images, labels) in test_loader:
outputs = model(images)
_, preds = torch.max(outputs.data, dim=1)
c = (preds == labels).squeeze() # 去掉0维
for i in range(batch_size):
total[labels[i]] += 1
class_correct[labels[i]] += c[i].item() # 1+True=2
for i in range(10):
print('类别: {0}, 正确率: {1}'.format(target_names[i], class_correct[i] / total[i]))
输出:
类别: plane, 正确率: 0.679
类别: car, 正确率: 0.752
类别: bird, 正确率: 0.462
类别: cat, 正确率: 0.45
类别: deer, 正确率: 0.518
类别: dog, 正确率: 0.524
类别: frog, 正确率: 0.705
类别: horse, 正确率: 0.767
类别: ship, 正确率: 0.782
类别: truck, 正确率: 0.654
6. 模型调用Pipeline
11
def main():
# 1. 超参数定义
EPOCHS = 5
BATCH_SIZE = 8
MODEL_PATH = '../model/cifar_net.model'
LABELS = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 2. 模型相关定义
model = Net()
criterion = nn.CrossEntropyLoss() # 多分类使用交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
print(optimizer.defaults)
# {'lr': 0.001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}
# 3. 加载数据
train_loader, test_loader = load_data(BATCH_SIZE)
# 4. 模型训练及保存
train(model, train_loader, criterion, optimizer, EPOCHS)
# 5. 加载模型
model = Net()
model.load_state_dict(torch.load(MODEL_PATH))
# 6. 测试集评估(整体评估)
test_eval(model, test_loader)
# 7. 各个类别评估
all_class_acc(model, test_loader, BATCH_SIZE, LABELS)
7. 在线预测
7.1 aap.py
1. 搭建Flask环境
from flask import Flask, request, jsonify
from service_streamer import ThreadedStreamer
from predict import predict_image, predict_image_by_batch
# 解决访问Https时不受信任SSL证书问题
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
app = Flask(__name__)
if __name__ == '__main__':
app.run(debug=True)
2. 单条预测
@app.route('/predict', method=['POST'])
def predict():
if 'iamge' not in request.files:
return jsonify({'Error', 'Image not found!'}), 400
if request.method == 'POST':
image = request.files['image'].read()
name = predict_image(image)
return jsonify({'Name: ', name})
3. 批量预测
streamer = ThreadedStreamer(predict_image_by_batch, batch_size=8)
@app.route('/batch_predict', method=['POST'])
def batch_predict():
if request.method == 'POST':
image = request.files['image'].read()
print(streamer.predict([image]*4))
name = streamer.predict([image])[0]
# 模拟批量图片
# name = predict_image([image]*4)
return jsonify({'Name: ', name})
7.2 predict.py
1. 加载模型
import io
import json
import torch
from torchvision import models, transforms
from PIL import Image
# 读取保存ImageNet类别名称的json文件
with open('idx_class.json') as f:
idx_class = json.load(f)
# 加载预训练模型
model = models.densenet161(pretrained=True)
# 在线预测
model.eval()
2. 图片转换
def image_transformer(image_data):
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.486),
(0.227, 0.226, 0.225)) # 归一化[-1,1]
])
image = Image.open(io.BytesIO(image_data))
trans = transform(image).unsqueeze(0)
return trans
3. 图片预测函数
def predict_image(image_data):
img = image_transformer(image_data)
output = model(img)
_, pred = output.max(dim=1)
return idx_class[pred.item()]
4. 批量图片预测函数
def predict_image_by_batch(image_data_batch):
img_list = [image_transformer(img) for img in image_data_batch]
# 将多张图片拼接成一个tensor
tensor = torch.cat(img_list)
outputs = model(tensor)
_, preds = outputs.max(dim=1)
return [idx_class[i] for i in preds.tolist()]
本文介绍了使用深度学习对CIFAR10数据集进行图像识别的过程,包括数据下载、数据增强、模型构建、训练、验证集划分、模型评估以及在线预测的实现。在测试集上达到了62.93%的准确率,并详细展示了各分类的预测准确率。
1252

被折叠的 条评论
为什么被折叠?



