TensorRT8——ONNX转trt(C++)

TensorRT安装

首先需要确保正确安装CUDA,安装后通过nvcc -V验证是否安装。

下载TensorRT
网址:https://developer.nvidia.com/nvidia-tensorrt-8x-download,下载最新版解压

tar -xzvf TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz

为了节省根目录的内存,TensorRT我放在home下,添加环境变量

sudo vim ~/.bashrc
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/3D/TensorRT-8.4.0.6/lib
source ~/.bashrc

接着,我们通过样例程序判断TRT能否正常工作。我们先是将sampleMNIST源码进行编译,此时会在bin目录生成可执行文件,我们切换过去直接执行。

cd ~/3D/TensorRT-8.4.0.6/samples/sampleMNIST
make
cd ../../bin/
./sample_mnist

TensorRT8——ONNX转trt(C++)
如果输出如下内容,最后显示PASSED,表明样例运行通过。

python支持

在上一部分中,虽然我们已经安装了TensorRT,但是我们的Python环境还不能通过import tensorrt导入,所以需要通过安装对应的.whl来实现。

pip install ../TensorRT-8.4.0.6/python/tensorrt-8.4.0.6-cp37-none-linux_x86_64.whl

TensorRT8——ONNX转trt(C++)

ONNX部署

TensorRT是nvidia公司针对nvidia显卡训练的模型在特定平台进行加速的推理优化库,是一个c++库,仅支持推理,不支持训练;

进行推理,需要先创建IExecutionContext对象,要创建这个对象,就需要先创建一个ICudaEngine的对象(engine),两种创建engine的方式:

  • 使用模型文件创建engine,并可把创建的engine序列化后存储到硬盘以便后面直接使用;
  • 使用之前已经序列化存储的engine,这种方式比较高效些,因为解析模型并生成engine还是挺慢的。

C++

TensorRT版本改动挺多的,大家可以直接查看API文档

#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvInferRuntimeCommon.h"
#include
#include
#include

class Logger : public nvinfer1::ILogger
{
public:
  Logger(Severity severity = Severity::kWARNING) : reportableSeverity(severity)
  {
  }

  void log(Severity severity, char const* msg) noexcept

  {

    if (severity > reportableSeverity)
      return;

    switch (severity)
    {
      case Severity::kINTERNAL_ERROR:
        std::cerr << "INTERNAL_ERROR: ";
        break;
      case Severity::kERROR:
        std::cerr << "ERROR: ";
        break;
      case Severity::kWARNING:
        std::cerr << "WARNING: ";
        break;
      case Severity::kINFO:
        std::cerr << "INFO: ";
        break;
      default:
        std::cerr << "UNKNOWN: ";
        break;
    }
    std::cerr << msg << std::endl;
  }

  Severity reportableSeverity;
};

static Logger g_logger_;

void onnxToTRTModel(const std::string &model_file,
                    nvinfer1::IHostMemory *&trt_model_stream)
{
  int verbosity = (int)nvinfer1::ILogger::Severity::kWARNING;

  nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_);

  nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U <<static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));

  auto parser = nvonnxparser::createParser(*network, g_logger_);

  if (!parser->parseFromFile(model_file.c_str(), verbosity))
  {
    std::string msg("failed to parse onnx file");
    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
    exit(EXIT_FAILURE);
  }

  builder->setMaxBatchSize(1);

  nvinfer1::IBuilderConfig* iBuilderConfig = builder->createBuilderConfig();

  iBuilderConfig ->setMaxWorkspaceSize(1 << 20);

  nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network,*iBuilderConfig);

  trt_model_stream = engine->serialize();

  std::ofstream p("../model.trt", std::ios::binary);
  if (!p) {
      std::cerr << "could not open plan output file" << std::endl;
  }
  p.write(reinterpret_cast<const char *>(trt_model_stream->data()), trt_model_stream->size());
  parser->destroy();
  engine->destroy();
  network->destroy();
   builder->destroy();
   iBuilderConfig->destroy();
}

int main() {
    nvinfer1::IHostMemory *trt_model_stream;
    onnxToTRTModel("../../config/pfe.onnx", trt_model_stream);
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.10)
project(onnxToTensorRT)
find_package(CUDA)

SET(TENSORRT_PATH /home/xiaohu/3D/TensorRT-8.4.0.6/)
SET(TENSORRT_LIB_PATH /home/xiaohu/3D/TensorRT-8.4.0.6/lib)

include_directories(
   include
   ${TENSORRT_PATH}/include
   ${CUDA_INCLUDE_DIRS}
)

set(SOURCE_FILES
   onnxToTensorRT.cpp
)

file(GLOB TENSORRT_LIBS "${TENSORRT_LIB_PATH}/*.so")
cuda_add_executable(${PROJECT_NAME} ${SOURCE_FILES})
target_link_libraries(
   ${PROJECT_NAME}
   ${TENSORRT_LIBS}
   ${CUDA_LIBRARIES}
)

python


import sys
import os
import argparse
import tensorrt as trt

EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument("--onnx_path", type=str,
                        default='static_sim.onnx')
    parser.add_argument("--trt_path", type=str,
                        default='static_sim.trt')
    args = parser.parse_args()
    onnx_file_path = args.onnx_path
    engine_file_path = args.trt_path
    print('get start')
    TRT_LOGGER = trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        config = builder.create_builder_config()
        config.max_workspace_size =( 1 << 30 ) * 2
        builder.max_batch_size = 16
        config.set_flag(trt.BuilderFlag.FP16)

        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print ('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print (parser.get_error(error))

        print(f"raw shape of {network.get_input(0).name} is: ", network.get_input(0).shape)
        print(f"raw shape of {network.get_input(1).name} is: ", network.get_input(1).shape)
        print(f"raw shape of {network.get_input(2).name} is: ", network.get_input(2).shape)

        print('Completed parsing of ONNX file')
        print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
        engine = builder.build_engine(network,config)
        print("Completed creating Engine")
        with open(engine_file_path, "wb") as f:
            f.write(engine.serialize())

trtexec

TensorRT-8.4.3.1/targets/x86_64-linux-gnu/bin/trtexec --onnx=static_sim.onnx --explicitBatch --saveEngine= static_sim.trt --workspace=1024

欢迎大家关注笔者,你的关注是我持续更博的最大动力

Original: https://blog.csdn.net/weixin_42905141/article/details/124452516
Author: 令狐少侠、
Title: TensorRT8——ONNX转trt(C++)

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/559236/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球