文章目录
1.目标
- 通过一个简单的CNN模型完成从GPU到NPU的训练和推理的迁移
- 掌握torch_npu、MindIE Torch的基本使用
2.前置准备
2.1 CANN环境安装
参考官方指导:链接
2.2 MindIE安装
参考官方指导:链接
2.3 torch_npu安装
参考官方指导:链接
2.4 初始化环境变量
# 根据上面步骤软件的安装位置来初始化环境变量
# cann环境变量初始化
source $CANN_INSTALL_PATH/set_env.sh
# MindIE环境变量初始化
source $MINDIE_INSTALL_PATH/set_env.sh
3.训练迁移
3.1 迁移分析
迁移分析流程:
参考官方指导:链接
3.2 CNN模型适配
- 准备cnn网络模型样例代码
提供cnn.py样例代码如下:
import torch
import torch.nn as nn
from torch import optim
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
device = torch.device('cuda:0')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 3, bias=False)
self.conv2 = nn.Conv2d(6, 16, 3, bias=False)
self.fc1 = nn.Linear(16*6*6, 1000)
self.fc2 = nn.Linear(1000, 100)
self.fc3 = nn.Linear(100, 10)
self.max_pool2d = nn.MaxPool2d(2, 2)
self.flatten = nn.Flatten()
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x
def main():
# 定义对数据的预处理
transform = transforms.Compose([
transforms.ToTensor(),# 转为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
trainset = tv.datasets.CIFAR10(
root='DataSet/',
train=True,
download=True, # 如果之前没手动下载数据集,这里要改为True
transform=transform)
trainloader = torch.utils.data.DataLoader(
trainset,
batch_size=64,
shuffle=True,
num_workers=2)
# 测试集
testset = tv.datasets.CIFAR10(
'DataSet/',
train=False,
download=True, # 如果之前没手动下载数据集,这里要改为True
transform=transform)
testloader = torch.utils.data.DataLoader(
testset,
batch_size=64,
shuffle=False,
num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
data, label = trainset[0]
net = Net().to(device)
print(net)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
print("--------开始训练--------")
epochs = 2
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 打印log信息
running_loss += loss.item()
if i % 100 == 99: # 每100个batch打印一下训练状态
print('[%d, %5d] loss: %.3f'% (epoch+1, i+1, running_loss / 100))
running_loss = 0.0
# 保存模型权重
torch.save(net.state_dict(),'cnn.pth')
print('------完成训练------------')
print("-----开始推理测试--------")
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images.to(device)).to("cpu")
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
print(f'{total}张测试集中的准确率为: {accuracy:.2f}%')
if __name__ == '__main__':
main()
- 使用手工迁移方式,对cnn.py进行适配,共2处地方,修改如下:
3.3 执行训练
# 拉起训练
python cnn.py
4.推理迁移
推理使用的是MindIE Torch完成PyTorch模型的推理流程
参考官方指导:链接
4.1 模型权重转成torchscript格式
- 转换脚本export.py如下:
import sys
import os
import argparse
import torch
from cnn import Net as CNN
from cnn import device
def parse_args():
parser = argparse.ArgumentParser(description='Export torchscript model file')
# model_path: 训练生成的pth模型权重路径
parser.add_argument('--model_path',help='pth file path', type=str,
default='./xxx.pth'
)
# ts_save_path: 转成torchscript格式的模型文件名
parser.add_argument('--ts_save_path', help='torch script model save path', type=str,
default='cnn.torchscript')
args = parser.parse_args()
return args
def check_args(args):
if not os.path.exists(args.model_path):
raise FileNotFoundError(f'model file {args.model_path} not exists')
def convert_ts_model(model_path, ts_save_path):
# load model
print("model path: ", model_path)
model = CNN()
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()
# convert model, batchsize需要和训练的batchsize保持一致
input_data = torch.ones(64, 3, 32, 32).to(dtype=torch.float16).to(device)
ts_model = torch.jit.trace(model, input_data).to("cpu")
ts_model.save(ts_save_path)
print(f'torch script model saved to {ts_save_path}')
if __name__ == '__main__':
print('Start to convert torch script model')
opts = parse_args()
check_args(opts)
# load & convert model
convert_ts_model(opts.model_path, opts.ts_save_path)
print("Finish Converting model")
- 执行转换
python export.py --model_path=./cnn.pth
cnn.pth为上面训练出来的模型权重文件
4.2 使用MindIE Torch完成模型的编译
- 编译脚本compile.py如下:
import os
import argparse
import torch
import mindietorch # 导入mindie torch对应的sdk
def aie_compile(traced_model, args):
input_shape_min = (1, 3, args.img_size, args.img_size)
input_shape_max = (args.max_batch_size, 3, args.img_size, args.img_size)
traced_model.eval()
print("mindietorch compile start !")
# 1.指定要运行的设备ID
mindietorch.set_device(0)
# 2.通过mindietorch提供的接口构建输入
compile_inputs = [ mindietorch.Input(min_shape = input_shape_min, max_shape = input_shape_max, dtype = torch.float16, format = mindietorch.TensorFormat.NCHW) ]
# 3.通过mindietorch提供的接口执行编译
compiled_model = mindietorch.compile(
traced_model,
inputs = compile_inputs,
precision_policy = mindietorch.PrecisionPolicy.FP16,
soc_version = "Ascendxxx", # 需要改成对应的处理器型号
optimization_level = 0
)
print("mindietorch compile done !")
print("compiled model is ", compiled_model.graph)
compiled_model.save(args.pt_dir)
print("torch aie compiled model saved. ")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
## Required parameters
parser.add_argument("--ts_model",
default="./cnn.torchscript",
type=str,
help="The original torch pt file from pretraining")
parser.add_argument("--save_dir",
default="./",
type=str,
help="The path of the directory that stores the compiled model")
parser.add_argument('--max_batch_size',
default=128,
type=int,
help="max batch size")
parser.add_argument('--img_size',
type=int,
default=32,
help='image size')
args = parser.parse_args()
traced_model = torch.jit.load(args.ts_model)
traced_model.eval()
base_name = os.path.basename(args.ts_model).split('.')[0] + "_dynamic_aie.pt"
args.pt_dir = os.path.join(args.save_dir, base_name)
aie_compile(traced_model, args)
- 执行编译
python compile.py --ts_model=./cnn.torchscript
cnn.torchscript为上一步转换得到的模型文件
4.3 使用MindIE Torch执行模型推理
- 推理脚本infer.py如下:
import argparse
import torch
import torchvision as tv
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import mindietorch
def parse_args():
parser = argparse.ArgumentParser(description='Evaluation.')
parser.add_argument('--data_path', type=str, default='./DataSet/',
help='Evaluation dataset path')
parser.add_argument('--ts_model_path', type=str, default='./cnn_dynamic_aie.pt',
help='Original TorchScript model path')
parser.add_argument('--batch_size', type=int, default=64, help='Batch size')
return parser.parse_args()
def infer(args):
mindietorch.set_device(0)
aie_model = torch.jit.load(args.ts_model_path)
aie_model.eval()
# 定义对数据的预处理
transform = transforms.Compose([
transforms.ToTensor(),# 转为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
testset = tv.datasets.CIFAR10(
args.data_path,
train=False,
download=True,
transform=transform)
testloader = torch.utils.data.DataLoader(
testset,
batch_size=args.batch_size,
shuffle=False,
num_workers=2)
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images = images.to(dtype=torch.float16).to("npu:0")
outputs = aie_model(images).to("cpu")
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
# 调用mindietorch资源释放接口
mindietorch.finalize()
- 执行推理
python infer.py
预期结果如下: