2 AlexNet分类

1、原理

https://blog.youkuaiyun.com/zyqdragon/article/details/72353420?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159799974219725219950752%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=159799974219725219950752&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_click~default-2-72353420.pc_ecpm_v3_pc_rank_v3&utm_term=AlexNet&spm=1018.2118.3001.4187

2、model.py

import torch.nn as nn
import torch


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()

        # nn.Sequential可以把一系列的层结构打包,组合成一个新的结构
        self.features = nn.Sequential(
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
            nn.BatchNorm2d(48),
            # inplace=True 是pytorch通过一种方法增加计算量,但是可以降低内存使用容量的方法,可以通过此方法,在内存中载入更大模型
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]

            nn.Conv2d(48, 128, kernel_size=5, padding=2),
            nn.BatchNorm2d(128),
            # output[128, 27, 27]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
        )

        # 最后面的三层,分类器;nn.Dropout一般是放在全连接层之间;p=0.5是随机失活的比例
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128 * 6 * 6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )

        # 初始化权重def __init__(self, num_classes=1000, init_weights=(False)True):
        # pytorch不用自己权重初始化,自动用kaiming初始化
        if init_weights:
            self._initialize_weights()

# 定义正向传播过程,x是训练样本,输入进去
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1) # 展平是从index[1]开始的,第0维度是batch不动
        x = self.classifier(x)
        return x

    def _initialize_weights(self):

        # self.modules():返回一个迭代器,这个迭代器会遍历网络中所有的模块(所有层结构,卷积层,全连接层)
        for m in self.modules():

            # 判断每一个层结构是属于哪一个类别;判断m, 是否等于nn.Conv2d类型
            if isinstance(m, nn.Conv2d):

                # 当m是卷积层,则对其权重参数初始化
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

                # 若偏置不为空,则用0进行初始化
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            # 如果传进来的是全链接层,则用normal_正态分布进行初始化,均值为0,方差0.01;偏置项初始化0
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

3、train.py

import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from model import AlexNet
import os
import json
import time

# 如果当前有可以使用的GPU设备的话,就使用第一块GPU,否则使用CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

data_transform = {
    # 当key为train时,训练集预处理方法  ;  随即裁剪224*224
    "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),  # 水平上随机翻转
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
    "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

# 获取数据集所在的根目录                   获取当前文件所在的目录,../..返回上上层目录
data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
image_path = data_root + "/data_set/gabage_data/"  # flower data set path

# datasets.ImageFolder加载数据集,data_transform数据预处理
train_dataset = datasets.ImageFolder(root=image_path + "/train",
                                     transform=data_transform["train"])
train_num = len(train_dataset)
# 训练集有多少图片

# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
#            获取分类的名称所对应的索引
flower_list = train_dataset.class_to_idx

# 遍历所获得字典flower_list,并且将键值反过来--》预测完后所返回的索引通过字典得到类别
cla_dict = dict((val, key) for key, val in flower_list.items())

# write dict into json file; json文件是索引与类别对应
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)


batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size, shuffle=True,
                                           num_workers=0)

validate_dataset = datasets.ImageFolder(root=image_path + "/val",
                                        transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=0)

test_data_iter = iter(validate_loader)
test_image, test_label = test_data_iter.next()

# def imshow(img):
#     img = img / 2 + 0.5  # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))


net = AlexNet(num_classes=12, init_weights=True)

# 网络指定到规定的设备中
net.to(device)
loss_function = nn.CrossEntropyLoss()

# pata = list(net.parameters());这个优化器的优化对象是所有的参数w
optimizer = optim.Adam(net.parameters(), lr=0.0002)

# 保存权重的路径
save_path = './AlexNet02.pth'
# 只保存训练正确率最高的模型
best_acc = 0.0
for epoch in range(200):

 # train;dropout只适合用在训练集中,所以net.train()在训练集中开启dropout,net.eval在验证集中关闭
    net.train()
    # 训练过程中的平均损失
    running_loss = 0.0
    t1 = time.perf_counter()
    for step, data in enumerate(train_loader, start=0):

        # 将数据分为图像和标签
        images, labels = data
        optimizer.zero_grad()
        outputs = net(images.to(device))  # 把训练图像指定到设备中

        #       计算预测值与真实值的损失,同时将labels指定到设备上
        loss = loss_function(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        # print train process(打印训练进度)
        #      当前的训练步数 / 训练一轮所需要的步数 = rate (训练进度)
        rate = (step + 1) / len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
    print()
    # 计算训练时间
    print(time.perf_counter()-t1)

    # validate
    net.eval()
    acc = 0.0  # accumulate accurate number / epoch

    # 这是验证部分
    # with torch.no_grad():禁止pytorch对参数的跟踪,在验证过程中是不会计算损失梯度的
    with torch.no_grad():
        for data_test in validate_loader:
            test_images, test_labels = data_test    # 遍历验证机,划分为图片和标签
            outputs = net(test_images.to(device))
            predict_y = torch.max(outputs, dim=1)[1]

            #       将预测与真实标签进行对比,对的为1,错的为0进行求和--》求出正确的样本个数
            acc += (predict_y == test_labels.to(device)).sum().item()
        accurate_test = acc / val_num

        # 若当前准确率>历史最优准确率
        if accurate_test > best_acc:
            best_acc = accurate_test
            torch.save(net.state_dict(), save_path)
        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
              (epoch + 1, running_loss / step, acc / val_num))
        # 训练完成会达到最优参数

print('Finished Training')

# 实现76的准确率

4、predict.py

import torch
from model import AlexNet
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import json

data_transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# load image
img = Image.open("./02.jpg")
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension--》batch
img = torch.unsqueeze(img, dim=0)

# read class_indict
try:
    json_file = open('./class_indices.json', 'r')
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)

# create model
model = AlexNet(num_classes=5)
# load model weights
model_weight_path = "./AlexNet.pth"
model.load_state_dict(torch.load(model_weight_path))
model.eval()
with torch.no_grad():
    # predict class
    output = torch.squeeze(model(img))
    predict = torch.softmax(output, dim=0)
    predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)], predict[predict_cla].item())
plt.show()

 

### 使用 AlexNet 实现图像分类 为了利用 AlexNet 进行有效的图像分类,需理解其架构设计并掌握具体实施方法。AlexNet 的结构由一个输入层、一个输出层、五个卷积层、三个池化层以及两个全连接层组成[^1]。 #### 构建模型 构建 AlexNet 模型时应严格遵循原始论文中的配置参数设置。以下是 Python 中使用 PyTorch 库创建 AlexNet 模型的一个简单例子: ```python import torch.nn as nn import torch class AlexNet(nn.Module): def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x ``` 此代码片段定义了一个名为 `AlexNet` 的类,它继承自 `nn.Module` 并实现了前向传播过程。通过调用该类可以实例化一个新的 AlexNet 网络对象用于训练或推理阶段。 #### 数据预处理 在应用 AlexNet 前,准备合适的数据集至关重要。通常情况下,需要对原始图片执行标准化操作,并调整大小至固定尺寸(如 22227),以便适应网络输入需求。此外,采用数据增强技术如随机裁剪、水平翻转等有助于提高泛化能力,减少过拟合现象的发生。 #### 训练与评估 完成上述准备工作之后,即可按照常规流程开展监督学习任务——即指定损失函数(交叉熵)、优化算法(SGD 或 Adam)以及其他超参设定;接着迭代更新权重直至收敛或者达到预定的最大轮次限制。期间还需定期保存最佳性能版本的模型文件供后续部署使用。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值