PyTorch模型训推迁移到昇腾实战

1.目标

  • 通过一个简单的CNN模型完成从GPU到NPU的训练和推理的迁移
  • 掌握torch_npu、MindIE Torch的基本使用

2.前置准备

2.1 CANN环境安装

参考官方指导:链接

2.2 MindIE安装

参考官方指导:链接

2.3 torch_npu安装

参考官方指导:链接

2.4 初始化环境变量

# 根据上面步骤软件的安装位置来初始化环境变量

# cann环境变量初始化
source $CANN_INSTALL_PATH/set_env.sh

# MindIE环境变量初始化
source $MINDIE_INSTALL_PATH/set_env.sh

3.训练迁移

3.1 迁移分析

迁移分析流程:
在这里插入图片描述

参考官方指导:链接

3.2 CNN模型适配

  • 准备cnn网络模型样例代码
    提供cnn.py样例代码如下:
import torch 
import torch.nn as nn
from torch import optim
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage


device = torch.device('cuda:0')


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 3, bias=False)     
        self.conv2 = nn.Conv2d(6, 16, 3, bias=False)
        self.fc1 = nn.Linear(16*6*6, 1000)
        self.fc2 = nn.Linear(1000, 100)
        self.fc3 = nn.Linear(100, 10) 
        self.max_pool2d = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten()   
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.max_pool2d(x)  
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)        
        return x


def main():
    # 定义对数据的预处理
    transform = transforms.Compose([
            transforms.ToTensor(),# 转为Tensor
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 
                                ])

    trainset = tv.datasets.CIFAR10(
                        root='DataSet/',
                        train=True, 
                        download=True,    # 如果之前没手动下载数据集,这里要改为True
                        transform=transform)

    trainloader = torch.utils.data.DataLoader(
                        trainset, 
                        batch_size=64,
                        shuffle=True, 
                        num_workers=2)

    # 测试集
    testset = tv.datasets.CIFAR10(
                        'DataSet/',
                        train=False, 
                        download=True,   # 如果之前没手动下载数据集,这里要改为True 
                        transform=transform)

    testloader = torch.utils.data.DataLoader(
                        testset,
                        batch_size=64, 
                        shuffle=False,
                        num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


    data, label = trainset[0] 

    net = Net().to(device)
    print(net)


    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)


    print("--------开始训练--------")
    epochs = 2
    for epoch in range(epochs):  
        running_loss = 0.0    
        for i, data in enumerate(trainloader, 0):
            
            inputs, labels = data
            inputs = inputs.to(device) 
            labels = labels.to(device) 
            
            optimizer.zero_grad()
            
            outputs = net(inputs)
            
            loss = criterion(outputs, labels)
            
            loss.backward()   
            
            optimizer.step()
            
            # 打印log信息
            running_loss += loss.item()
            if i % 100 == 99: # 每100个batch打印一下训练状态
                print('[%d, %5d] loss: %.3f'% (epoch+1, i+1, running_loss / 100))
                running_loss = 0.0

    # 保存模型权重
    torch.save(net.state_dict(),'cnn.pth')      
    print('------完成训练------------')


    print("-----开始推理测试--------")
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data    
            outputs = net(images.to(device)).to("cpu")
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

    accuracy = 100 * correct / total
    print(f'{total}张测试集中的准确率为: {accuracy:.2f}%')


if __name__ == '__main__':
    main()
  • 使用手工迁移方式,对cnn.py进行适配,共2处地方,修改如下:
    在这里插入图片描述

3.3 执行训练

# 拉起训练
python cnn.py

4.推理迁移

推理使用的是MindIE Torch完成PyTorch模型的推理流程

参考官方指导:链接

4.1 模型权重转成torchscript格式

  • 转换脚本export.py如下:
import sys
import os
import argparse

import torch
from cnn import Net as CNN
from cnn import device


def parse_args():
    parser = argparse.ArgumentParser(description='Export torchscript model file')
    # model_path: 训练生成的pth模型权重路径
    parser.add_argument('--model_path',help='pth file path', type=str,
                        default='./xxx.pth'
                        )
    # ts_save_path: 转成torchscript格式的模型文件名
    parser.add_argument('--ts_save_path', help='torch script model save path', type=str,
                        default='cnn.torchscript')
    
    args = parser.parse_args()
    return args

def check_args(args):
    if not os.path.exists(args.model_path):
        raise FileNotFoundError(f'model file {args.model_path} not exists')
    
def convert_ts_model(model_path, ts_save_path):
    # load model
    print("model path: ", model_path)
    model = CNN()
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()

    # convert model, batchsize需要和训练的batchsize保持一致
    input_data = torch.ones(64, 3, 32, 32).to(dtype=torch.float16).to(device)
    ts_model = torch.jit.trace(model, input_data).to("cpu")
    ts_model.save(ts_save_path)
    print(f'torch script model saved to {ts_save_path}')


if __name__ == '__main__':
    print('Start to convert torch script model')
    opts = parse_args()
    check_args(opts)

    # load & convert model
    convert_ts_model(opts.model_path, opts.ts_save_path)
    print("Finish Converting model")

  • 执行转换
    python export.py --model_path=./cnn.pth

cnn.pth为上面训练出来的模型权重文件

4.2 使用MindIE Torch完成模型的编译

  • 编译脚本compile.py如下:
import os
import argparse
import torch

import mindietorch # 导入mindie torch对应的sdk


def aie_compile(traced_model, args):
    input_shape_min = (1, 3, args.img_size, args.img_size)
    input_shape_max = (args.max_batch_size, 3, args.img_size, args.img_size)

    traced_model.eval()
    print("mindietorch compile start !")
    # 1.指定要运行的设备ID
    mindietorch.set_device(0)
    # 2.通过mindietorch提供的接口构建输入
    compile_inputs = [ mindietorch.Input(min_shape = input_shape_min, max_shape = input_shape_max, dtype = torch.float16, format = mindietorch.TensorFormat.NCHW) ]
    # 3.通过mindietorch提供的接口执行编译
    compiled_model = mindietorch.compile(
        traced_model,
        inputs = compile_inputs,
        precision_policy = mindietorch.PrecisionPolicy.FP16,
        soc_version = "Ascendxxx",  # 需要改成对应的处理器型号
        optimization_level = 0
    )
    print("mindietorch compile done !")
    print("compiled model is ", compiled_model.graph)

    compiled_model.save(args.pt_dir)
    print("torch aie compiled model saved. ")


if __name__ == '__main__':   
    parser = argparse.ArgumentParser()
    ## Required parameters
    parser.add_argument("--ts_model",
                        default="./cnn.torchscript",
                        type=str,
                        help="The original torch pt file from pretraining")   
    parser.add_argument("--save_dir",
                        default="./",
                        type=str,
                        help="The path of the directory that stores the compiled model")   
    parser.add_argument('--max_batch_size',
                        default=128,
                        type=int,
                        help="max batch size")
    parser.add_argument('--img_size', 
                        type=int, 
                        default=32, 
                        help='image size')
    args = parser.parse_args()

    traced_model = torch.jit.load(args.ts_model)
    traced_model.eval()
    base_name = os.path.basename(args.ts_model).split('.')[0] + "_dynamic_aie.pt"
    args.pt_dir = os.path.join(args.save_dir, base_name)
    
    aie_compile(traced_model, args)

  • 执行编译
    python compile.py --ts_model=./cnn.torchscript

cnn.torchscript为上一步转换得到的模型文件

4.3 使用MindIE Torch执行模型推理

  • 推理脚本infer.py如下:
import argparse
import torch
import torchvision as tv
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

import mindietorch

def parse_args():
    parser = argparse.ArgumentParser(description='Evaluation.')
    parser.add_argument('--data_path', type=str, default='./DataSet/',
                        help='Evaluation dataset path')
    parser.add_argument('--ts_model_path', type=str, default='./cnn_dynamic_aie.pt',
                        help='Original TorchScript model path')
    parser.add_argument('--batch_size', type=int, default=64, help='Batch size')
    return parser.parse_args()


def infer(args):
    mindietorch.set_device(0)
    aie_model = torch.jit.load(args.ts_model_path)
    aie_model.eval()

    # 定义对数据的预处理
    transform = transforms.Compose([
            transforms.ToTensor(),# 转为Tensor
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 
                                ])

    testset = tv.datasets.CIFAR10(
                        args.data_path,
                        train=False, 
                        download=True, 
                        transform=transform)
    testloader = torch.utils.data.DataLoader(
                        testset,
                        batch_size=args.batch_size, 
                        shuffle=False,
                        num_workers=2)

    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(dtype=torch.float16).to("npu:0")       
            outputs = aie_model(images).to("cpu")
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
    # 调用mindietorch资源释放接口
    mindietorch.finalize()

  • 执行推理
    python infer.py
    预期结果如下:
    在这里插入图片描述
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值