tensort转模型实例(一)

生成 tensort 模型有两种方式:
1、通过 解析ONNX 文件创建网络后生成engine文件,这种方式通常是所有算子都被tensort API 支持。
2、通过tensort API plugin构建网络。这种方式比较灵活,适合支持自定义算子等等。
下面介绍大致的流程:
1、创建builder,依赖logger
2、使用builder 创建 config
3、创建网络
4、搭建网络:方式一,解析onnx;方式二, plugin 搭建网络
5、 build engine
6、序列化到engine模型文件。之前可以设置动态输入之类的。

注意事项:
1、动态batchSize 推理
使用onnx parse 时需要设置profile, 使用 plugin 模式时需要在 addInput 处设置。
动态bathSize 生层的trt 模型在推理阶段一定要 setBindingDimensions ,否则运行报错。

#include <NvInfer.h>
#include <NvInferRuntime.h>
#include<NvOnnxParser.h>
#include<cuda_runtime.h>
#include<stdio.h>
#include <iostream>
#include <fstream>
#include<vector>
#include<math.h>
#include<assert.h>
using namespace std;
using namespace nvonnxparser;
using namespace nvinfer1;
// define logger
class TRTLogger : public nvinfer1::ILogger{
public:
    virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override{
        if(severity <= Severity::kVERBOSE){
            printf("%d: %s\n", severity, msg);
        }
    }
};

nvinfer1::Weights make_weights(float* ptr, int n){
    nvinfer1::Weights w;
    w.count = n;     // The number of weights in the array.
    w.type = nvinfer1::DataType::kFLOAT;
    w.values = ptr;
    return w;
}

//#define USE_PLUGIN
//#define INT8_INFER
const int set_max_batch = 4; 
const int inputsize_h = 3;
const int inputsize_w = 3;
const int input_channel = 1;
const int output_channel = 1;


int build_trt(){
    TRTLogger logger;
    nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(logger); // builder 
    nvinfer1::IBuilderConfig *config = builder->createBuilderConfig();  // configer 
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(1); // network 1: 显式bs
#ifdef USE_PLUGIN
    /*  *********** next defination your network. ***************** 
    1、use plugin to build network  :  image -> conv(3*3 + bias) -> sigmoid ->ouput 
    ***************************************************************/
    // define conv weight 
    float layer1_weight_values[] =  {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
    float layer1_bias_values[] = {0};

    // network input name 、dtype 、 size 
    nvinfer1::ITensor *input = network->addInput("image", nvinfer1::DataType::kFLOAT, 
                                                            nvinfer1::Dims4(set_max_batch, input_channel, inputsize_h, inputsize_w));
    nvinfer1::Weights layer1_conv_weight = make_weights(layer1_weight_values, 3 * 3);
    nvinfer1::Weights layer1_bias_weight = make_weights(layer1_bias_values, 1);
    // add conv layer
    auto layer1 = network->addConvolutionNd(*input, 1, nvinfer1::DimsHW(3, 3), layer1_conv_weight, layer1_bias_weight);
    layer1->setStrideNd(nvinfer1::DimsHW{1,1});
    layer1->setPaddingNd(nvinfer1::DimsHW{1,1});
    assert(layer1 != nullptr);
    // add sigmoid layer, use last layer output as input
    auto layer2 = network->addActivation(*(layer1->getOutput(0)), nvinfer1::ActivationType::kSIGMOID);
    // mark output
    network->markOutput(*(layer2->getOutput(0)));
#else
    // onnxparser to network
    nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, logger);
    if(!parser->parseFromFile("../demo.onnx", 1)){
        printf("Failed to parser demo.onnx\n");
        exit(1);
    }
    //动态输入需要使用设置profile,如果模型有多个输入,则必须多个profile 
    auto input_tensor=network->getInput(0);
    auto input_dims = input_tensor->getDimensions();
    auto profile = builder->createOptimizationProfile();
    input_dims.d[0] = 1;
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
    input_dims.d[0] = set_max_batch;
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
    config->addOptimizationProfile(profile);
#endif

#ifdef INT8_INFER
    config->setFlag(BuilderFlag::kINT8);
#else
    config->setFlag(BuilderFlag::kFP16);
#endif

    config->setMaxWorkspaceSize(1<<28);
    printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f); // 256Mib 
    builder->setMaxBatchSize(set_max_batch);

    //generate engine 
    nvinfer1::ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
    if(engine == nullptr){
        printf("Build engine failed.\n");
        network->destroy();
		config->destroy();
		builder->destroy();
        return -1;
    }
    // serialize to file
    nvinfer1::IHostMemory *model_data = engine->serialize();
    FILE *f = fopen("../engine.trt", "wb");
    fwrite(model_data->data(), 1, model_data->size(), f);
    fclose(f);
     // destroy order
    model_data->destroy();
    engine->destroy();
    network->destroy();
    config->destroy();
    builder->destroy();
    printf("Done.\n");
    return 0;
}

vector<unsigned char> load_file(const string& file){
    ifstream in(file, ios::in | ios::binary);
    if (!in.is_open())
        return {};
    in.seekg(0, ios::end);
    size_t length = in.tellg();
    std::vector<uint8_t> data;
    if (length > 0){
        in.seekg(0, ios::beg);
        data.resize(length);
        in.read((char*)&data[0], length);
        //in.read((char*)data.data(), length);
    }
    in.close();
    return data;
}
int infer_trt(const std::string &trt_file){
    TRTLogger logger;
    auto engine_data=load_file(trt_file);
    nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);
    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(engine_data.data(),engine_data.size());
    if(engine == nullptr){
        printf("Deserialize cuda engine failed.\n");
        runtime->destroy();
        return -1;
    }
    nvinfer1::IExecutionContext *execution_context = engine->createExecutionContext();
    // 动态batch size 必须设置,否则运行报错
    const int inputIndex = engine->getBindingIndex("image");
    execution_context->setBindingDimensions(inputIndex, nvinfer1::Dims4(set_max_batch, input_channel, inputsize_h, inputsize_w));	
    cudaStream_t stream = nullptr;
    cudaStreamCreate(&stream);
    // ---------- 2. data to GPU   ----------------------------
    float input_data_host[] = { 1.0, 1.0, 1.0, 
                                1.0, 1.0, 1.0,
                                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 
                                1.0, 1.0, 1.0,
                                1.0, 1.0, 1.0}; 
    float* input_data_device = nullptr;
    float output_data_host[18];
    float* output_data_device = nullptr;
    cudaMalloc(&input_data_device, sizeof(input_data_host));
    cudaMalloc(&output_data_device, sizeof(output_data_host));
    cudaMemcpyAsync(input_data_device, input_data_host, sizeof(input_data_host), cudaMemcpyHostToDevice, stream);
    // 用一个指针数组指定input和output在gpu中的指针。
    float* bindings[] = {input_data_device, output_data_device};

    // infer
    bool sucess = execution_context->enqueueV2((void**)bindings, stream, nullptr);
    cudaMemcpyAsync(output_data_host, output_data_device, sizeof(output_data_host),cudaMemcpyDeviceToHost, stream);
    cudaStreamSynchronize(stream); // wait cuda end

    for(int i = 0; i< 18; i++){
        std::cout << output_data_host[i] << " ";
    }
    std::cout << std::endl;
   
     // 4. 释放内存 --
    printf("Clean memory\n");
    cudaStreamDestroy(stream);
    cudaFree(input_data_device);
	cudaFree(output_data_device);
    execution_context->destroy();
    engine->destroy();
    runtime->destroy();
    return 0;
}

int main(int argc, char ** argv){
    if(build_trt()!=0){
        return -1;
    }
    //infer_trt(argv[1]);
    infer_trt("../engine.trt");
    return 0;
}

CMakeLists.txt

project(trt_api)
add_definitions(-w)
find_package(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Release)
#cuda 
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
include_directories(/home/a/TensorRT-8.5.1.7/include)
link_directories(/home/a/TensorRT-8.5.1.7/lib)
cuda_add_executable(helloapi tensorrt_helloworld.cpp)
target_link_libraries(helloapi nvinfer)
target_link_libraries(helloapi cudart)
target_link_libraries(helloapi nvonnxparser)
add_definitions(-O2)

模型导出 demo.onnx


import torch
import torch.nn as nn
import torch.onnx

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(1, 1, 3, padding=1)
        #self.myselu = MYSELU(3)
        self.myselu = nn.Sigmoid()
        self.conv.weight.data.fill_(1)
        self.conv.bias.data.fill_(0)
    def forward(self, x):
        x = self.conv(x)
        x = self.myselu(x)
        return x

model = Model().eval()
input = torch.tensor([
    # batch 0
    [
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
    ],

], dtype=torch.float32).view(1, 1, 3, 3)
print(input.shape)
output = model(input)
print(f"inference output = \n{output}")
torch.onnx.export(
    model, # 这里的args,是指输入给model的参数,需要传递tuple,因此用括号
    (input,),
    "demo.onnx", # 储存的文件路径
    verbose=True,# 打印详细信息
    input_names=["image"], # 为输入和输出节点指定名称,方便后面查看或者操作
    output_names=["output"],
    opset_version=11,# 这里的opset,指,各类算子以何种方式导出,对应于symbolic_opset11
    # 表示他有batch、height、width3个维度是动态的,在onnx中给其赋值为-1,通常,我们只设置batch为动态,其他的避免动态
    dynamic_axes={
        "image": {0: "batch"},
        "output": {0: "batch"},
    },
    # 对于插件,需要禁用onnx检查
    # enable_onnx_checker=False
    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK
)
print("Done.!")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值