1、原理
2、model.py
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
# nn.Sequential可以把一系列的层结构打包,组合成一个新的结构
self.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
nn.BatchNorm2d(48),
# inplace=True 是pytorch通过一种方法增加计算量,但是可以降低内存使用容量的方法,可以通过此方法,在内存中载入更大模型
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.BatchNorm2d(128),
# output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.BatchNorm2d(192),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.BatchNorm2d(192),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
)
# 最后面的三层,分类器;nn.Dropout一般是放在全连接层之间;p=0.5是随机失活的比例
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
# 初始化权重def __init__(self, num_classes=1000, init_weights=(False)True):
# pytorch不用自己权重初始化,自动用kaiming初始化
if init_weights:
self._initialize_weights()
# 定义正向传播过程,x是训练样本,输入进去
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1) # 展平是从index[1]开始的,第0维度是batch不动
x = self.classifier(x)
return x
def _initialize_weights(self):
# self.modules():返回一个迭代器,这个迭代器会遍历网络中所有的模块(所有层结构,卷积层,全连接层)
for m in self.modules():
# 判断每一个层结构是属于哪一个类别;判断m, 是否等于nn.Conv2d类型
if isinstance(m, nn.Conv2d):
# 当m是卷积层,则对其权重参数初始化
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# 若偏置不为空,则用0进行初始化
if m.bias is not None:
nn.init.constant_(m.bias, 0)
# 如果传进来的是全链接层,则用normal_正态分布进行初始化,均值为0,方差0.01;偏置项初始化0
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
3、train.py
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from model import AlexNet
import os
import json
import time
# 如果当前有可以使用的GPU设备的话,就使用第一块GPU,否则使用CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
data_transform = {
# 当key为train时,训练集预处理方法 ; 随即裁剪224*224
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(), # 水平上随机翻转
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
# 获取数据集所在的根目录 获取当前文件所在的目录,../..返回上上层目录
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = data_root + "/data_set/gabage_data/" # flower data set path
# datasets.ImageFolder加载数据集,data_transform数据预处理
train_dataset = datasets.ImageFolder(root=image_path + "/train",
transform=data_transform["train"])
train_num = len(train_dataset)
# 训练集有多少图片
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
# 获取分类的名称所对应的索引
flower_list = train_dataset.class_to_idx
# 遍历所获得字典flower_list,并且将键值反过来--》预测完后所返回的索引通过字典得到类别
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file; json文件是索引与类别对应
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = datasets.ImageFolder(root=image_path + "/val",
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
test_data_iter = iter(validate_loader)
test_image, test_label = test_data_iter.next()
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=12, init_weights=True)
# 网络指定到规定的设备中
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters());这个优化器的优化对象是所有的参数w
optimizer = optim.Adam(net.parameters(), lr=0.0002)
# 保存权重的路径
save_path = './AlexNet02.pth'
# 只保存训练正确率最高的模型
best_acc = 0.0
for epoch in range(200):
# train;dropout只适合用在训练集中,所以net.train()在训练集中开启dropout,net.eval在验证集中关闭
net.train()
# 训练过程中的平均损失
running_loss = 0.0
t1 = time.perf_counter()
for step, data in enumerate(train_loader, start=0):
# 将数据分为图像和标签
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device)) # 把训练图像指定到设备中
# 计算预测值与真实值的损失,同时将labels指定到设备上
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# print train process(打印训练进度)
# 当前的训练步数 / 训练一轮所需要的步数 = rate (训练进度)
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "." * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
print()
# 计算训练时间
print(time.perf_counter()-t1)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
# 这是验证部分
# with torch.no_grad():禁止pytorch对参数的跟踪,在验证过程中是不会计算损失梯度的
with torch.no_grad():
for data_test in validate_loader:
test_images, test_labels = data_test # 遍历验证机,划分为图片和标签
outputs = net(test_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
# 将预测与真实标签进行对比,对的为1,错的为0进行求和--》求出正确的样本个数
acc += (predict_y == test_labels.to(device)).sum().item()
accurate_test = acc / val_num
# 若当前准确率>历史最优准确率
if accurate_test > best_acc:
best_acc = accurate_test
torch.save(net.state_dict(), save_path)
print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, running_loss / step, acc / val_num))
# 训练完成会达到最优参数
print('Finished Training')
# 实现76的准确率
4、predict.py
import torch
from model import AlexNet
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import json
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img = Image.open("./02.jpg")
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension--》batch
img = torch.unsqueeze(img, dim=0)
# read class_indict
try:
json_file = open('./class_indices.json', 'r')
class_indict = json.load(json_file)
except Exception as e:
print(e)
exit(-1)
# create model
model = AlexNet(num_classes=5)
# load model weights
model_weight_path = "./AlexNet.pth"
model.load_state_dict(torch.load(model_weight_path))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img))
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)], predict[predict_cla].item())
plt.show()