YOLOV5使用onnx转tensorrt(engine)

近几日完成ResNet相关实现engine方法,但仅仅基于基于简单分类网络实现转换,且基于Tensorrt C++ API 构建YLOV5实现engine转换相关资料较多,然调用ONNX解析转换engine相关资料较少,因此本文将介绍如何使用onnx构建engine,并推理。

版本:tensorrt版本8.4,可使用8.0以上版本

一.yolov5转onnx方法:

这里我将重点说明,我使用官方export.py能成功导出onnx文件,也能使用python的onnx runtime预测出正确结果,且也能转rknn模型完成测试,但使用tensorrt的onnx解析构建engine时候,便会出错。若知道答案可帮忙回答,万分感谢!

方法一:

需使用github:https://github.com/linghu8812/yolov5 成功转onnx,能被tensorrt的onnx解析,实现网络构建。

其解析构建网络代码:

   const char* onnx_path = "./best.onnx";

    INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题

    IParser* parser = createParser(*network, gLogger);
    parser->parseFromFile(onnx_path, static_cast(ILogger::Severity::kWARNING));
    //解析有错误将返回
    for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
    std::cout << "successfully parse the onnx model" << std::endl;

方法二(修改时间:2022-0905):

可用github yolov7的转换代码https://github.com/WongKinYiu/yolov7/tree/u5 ,已测试可行。同时也测试了yolov7转换,任然可运行。

YOLOV5使用onnx转tensorrt(engine)

二.基于C++ 使用onnx转engine且推理

(1)yolov5 使用onnx转为engine代码,此代码比较原始,未做后处理逻辑而保存代码。

可忽略该版本代码直接使用(2)中代码。

YOLOV5使用onnx转tensorrt(engine)YOLOV5使用onnx转tensorrt(engine)
#include "NvInfer.h"

include

"cuda_runtime_api.h"

include

include

include

include

include

include

include

include

include

include

include

// onnx转换头文件

include "NvOnnxParser.h"

using namespace nvonnxparser;

using namespace std;

#define CHECK(status) \ do\ {\ auto ret = (status);\ if (ret != 0)\ {\ std::cerr << "Cuda failure: " << ret << std::endl;\ abort();\ }\ } while (0)struct alignas(float) Detection { //center_x center_y w h float bbox[4]; float conf; // bbox_conf * cls_conf float class_id;};

// stuff we know about the network and the input/output blobsstatic const int INPUT_H = 640;static const int INPUT_W = 640;static const int OUTPUT_SIZE = 25200*85; //1000 * sizeof(Detection) / sizeof(float) + 1;

const char INPUT_BLOB_NAME = "images";const char OUTPUT_BLOB_NAME = "output";

using namespace nvinfer1;

//static Logger gLogger;

//构建Loggerclass Logger : public ILogger{ void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= Severity::kWARNING) std::cout << msg << std::endl; }} gLogger;

// Creat the engine using only the API and not any parser.ICudaEngine createEngine(unsigned int maxBatchSize, IBuilder builder, IBuilderConfig config){ const char onnx_path = "./best.onnx";

INetworkDefinition</span>* network = builder-&gt;createNetworkV2(<span>1U</span>); <span>//</span><span>&#x6B64;&#x5904;&#x91CD;&#x70B9;1U&#x4E3A;OU&#x5C31;&#x6709;&#x95EE;&#x9898;</span>


IParser
parser = createParser(network, gLogger);
parser
->parseFromFile(onnx_path, static_cast(ILogger::Severity::kWARNING));
//解析有错误将返回
for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
std::cout
<< "successfully parse the onnx model" << std::endl;

</span><span>//</span><span> Build engine</span>
builder-&gt;<span>setMaxBatchSize(maxBatchSize);
config</span>-&gt;setMaxWorkspaceSize(<span>1</span> &lt;&lt; <span>20</span><span>);
</span><span>//</span><span>config-&gt;setFlag(nvinfer1::BuilderFlag::kFP16); </span><span>//</span><span> &#x8BBE;&#x7F6E;&#x7CBE;&#x5EA6;&#x8BA1;&#x7B97;
</span><span>//</span><span>config-&gt;setFlag(nvinfer1::BuilderFlag::kINT8);</span>
ICudaEngine* engine = builder-&gt;buildEngineWithConfig(*network, *<span>config);
std::cout </span>&lt;&lt; <span>&quot;</span><span>successfully  convert onnx to  engine&#xFF01;&#xFF01;&#xFF01; </span><span>&quot;</span> &lt;&lt;<span> std::endl;

</span><span>//</span><span>&#x9500;&#x6BC1;</span>
network-&gt;<span>destroy();
parser</span>-&gt;<span>destroy();

</span><span>return</span><span> engine;

}

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
{

</span><span>//</span><span> Create builder</span>
IBuilder* builder =<span> createInferBuilder(gLogger);
IBuilderConfig</span>* config = builder-&gt;<span>createBuilderConfig();

</span><span>//</span><span> Create model to populate the network, then set the outputs and create an engine</span>
ICudaEngine* engine =<span> createEngine(maxBatchSize, builder, config);

assert(engine </span>!=<span> nullptr);

</span><span>//</span><span> Serialize the engine</span>
(*modelStream) = engine-&gt;<span>serialize();
</span><span>//</span><span> Close everything down</span>
engine-&gt;<span>destroy();
builder</span>-&gt;<span>destroy();
config</span>-&gt;<span>destroy();

}

void doInference(IExecutionContext& context, float input, float output, int batchSize)
{
const ICudaEngine& engine = context.getEngine();
// Pointers to input and output device buffers to pass to engine.

</span><span>//</span><span> Engine requires exactly IEngine::getNbBindings() number of buffers.</span>
assert(engine.getNbBindings() == <span>2</span><span>);
</span><span>void</span>* buffers[<span>2</span><span>];
</span><span>//</span><span> In order to bind the buffers, we need to know the names of the input and output tensors.

</span><span>//</span><span> Note that indices are guaranteed to be less than IEngine::getNbBindings()</span>
<span>const</span> <span>int</span> inputIndex =<span> engine.getBindingIndex(INPUT_BLOB_NAME);
</span><span>const</span> <span>int</span> outputIndex =<span> engine.getBindingIndex(OUTPUT_BLOB_NAME);
</span><span>//</span><span>const int inputIndex = 0;
</span><span>//</span><span>const int outputIndex = 1;
</span><span>//</span><span> Create GPU buffers on device</span>
cudaMalloc(&amp;buffers[inputIndex], batchSize * <span>3</span> * INPUT_H * INPUT_W * <span>sizeof</span>(<span>float</span><span>));
cudaMalloc(</span>&amp;buffers[outputIndex], batchSize * OUTPUT_SIZE * <span>sizeof</span>(<span>float</span><span>));
</span><span>//</span><span> Create stream</span>

cudaStream_t stream;
CHECK(cudaStreamCreate(
&stream));
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize
* OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release stream and buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}

//加工图片变成拥有batch的输入, tensorrt输入需要的格式,为一个维度
void ProcessImage(cv::Mat image, float input_data[]) {
//只处理一张图片,总之结果为一维[batch3INPUT_W*INPUT_H]
//以下代码为投机取巧了

cv::resize(image, image, cv::Size(INPUT_W, INPUT_H),
0, 0, cv::INTER_LINEAR);
std::vector
<:mat> InputImage;

InputImage.push_back(image);

</span><span>int</span> ImgCount =<span> InputImage.size();

</span><span>//</span><span>float input_data[BatchSize * 3 * INPUT_H * INPUT_W];</span>
<span>for</span> (<span>int</span> b = <span>0</span>; b &lt; ImgCount; b++<span>) {
    cv::Mat img </span>=<span> InputImage.at(b);
    </span><span>int</span> w =<span> img.cols;
    </span><span>int</span> h =<span> img.rows;
    </span><span>int</span> i = <span>0</span><span>;
    </span><span>for</span> (<span>int</span> row = <span>0</span>; row &lt; h; ++<span>row) {
        uchar</span>* uc_pixel = img.data + row *<span> img.step;
        </span><span>for</span> (<span>int</span> col = <span>0</span>; col &lt; INPUT_W; ++<span>col) {
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i] = (<span>float</span>)uc_pixel[<span>2</span>] / <span>255.0</span><span>;
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (<span>float</span>)uc_pixel[<span>1</span>] / <span>255.0</span><span>;
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i + <span>2</span> * INPUT_H * INPUT_W] = (<span>float</span>)uc_pixel[<span>0</span>] / <span>255.0</span><span>;
            uc_pixel </span>+= <span>3</span><span>;
            </span>++<span>i;
        }
    }

}

}

int get_trtengine() {

IHostMemory</span>*<span> modelStream{ nullptr };
APIToModel(</span><span>1</span>, &amp;<span>modelStream);
assert(modelStream </span>!=<span> nullptr);

std::ofstream p(</span><span>&quot;</span><span>./best.engine</span><span>&quot;</span><span>, std::ios::binary);
</span><span>if</span> (!<span>p)
{
    std::cerr </span>&lt;&lt; <span>&quot;</span><span>could not open plan output file</span><span>&quot;</span> &lt;&lt;<span> std::endl;
    </span><span>return</span> -<span>1</span><span>;
}
p.write(reinterpret_cast</span>&lt;<span>const</span> <span>char</span>*&gt;(modelStream-&gt;data()), modelStream-&gt;<span>size());
modelStream</span>-&gt;<span>destroy();

</span><span>return</span> <span>0</span><span>;

}

int infer() {

</span><span>//</span><span>&#x52A0;&#x8F7D;engine&#x5F15;&#x64CE;</span>
<span>char</span>*<span> trtModelStream{ nullptr };
size_t size{ </span><span>0</span><span> };
std::ifstream file(</span><span>&quot;</span><span>./best.engine</span><span>&quot;</span><span>, std::ios::binary);
</span><span>if</span><span> (file.good()) {
    file.seekg(</span><span>0</span><span>, file.end);
    size </span>=<span> file.tellg();
    file.seekg(</span><span>0</span><span>, file.beg);
    trtModelStream </span>= <span>new</span> <span>char</span><span>[size];
    assert(trtModelStream);
    file.read(trtModelStream, size);
    file.close();
}
</span><span>//</span><span>&#x53CD;&#x5E8F;&#x5217;&#x4E3A;engine&#xFF0C;&#x521B;&#x5EFA;context</span>


IRuntime
runtime = createInferRuntime(gLogger);
assert(runtime
!= nullptr);
ICudaEngine
engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
assert(engine
!= nullptr);
IExecutionContext
* context = engine->createExecutionContext();
assert(context
!= nullptr);
delete[] trtModelStream;

</span><span>//</span><span>*********************&#x63A8;&#x7406;-&#x5FAA;&#x73AF;&#x63A8;&#x7406;*********************</span><span>//

<span>float</span> time_read_img = <span>0.0</span><span>;
</span><span>float</span> time_infer = <span>0.0</span><span>;
</span><span>static</span> <span>float</span><span> prob[OUTPUT_SIZE];
</span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; <span>1000</span>; i++<span>) {

    </span><span>//</span><span> &#x5904;&#x7406;&#x56FE;&#x7247;&#x4E3A;&#x56FA;&#x5B9A;&#x8F93;&#x51FA;</span>


auto start
= std::chrono::system_clock::now(); //时间函数
std::string path = "./1.jpg";
std::cout
<< "img_path=" << path << endl;
static float data[3 * INPUT_H * INPUT_W];
cv::Mat img
= cv::imread(path);
ProcessImage(img, data);
auto end
= std::chrono::system_clock::now();
time_read_img
= std::chrono::duration_cast<:chrono::milliseconds>(end - start).count() + time_read_img;

    </span><span>//</span><span>Run inference</span>
    start = std::chrono::system_clock::now();  <span>//</span><span>&#x65F6;&#x95F4;&#x51FD;&#x6570;</span>
    doInference(*context, data, prob, <span>1</span><span>);
    end </span>=<span> std::chrono::system_clock::now();
    time_infer </span>= std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() +<span> time_infer;
    std::cout </span>&lt;&lt; std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() &lt;&lt; <span>&quot;</span><span>ms</span><span>&quot;</span> &lt;&lt;<span> std::endl;

    </span><span>//</span><span>&#x8F93;&#x51FA;&#x540E;&#x5904;&#x7406;
    </span><span>//</span><span>std::cout &lt;<"prob="<<prob>
    <span>float</span> cls_float = prob[<span>0</span><span>];
    </span><span>int</span> cls_id = <span>0</span><span>;
    </span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; OUTPUT_SIZE; i++<span>) {
        </span><span>if</span> (cls_float &lt;<span> prob[i]) {
            cls_float </span>=<span> prob[i];
            cls_id </span>=<span> i;
        }
    }
    std::cout </span>&lt;&lt; <span>&quot;</span><span>i=</span><span>&quot;</span> &lt;&lt; i &lt;&lt; <span>&quot;</span><span>\tcls_id=</span><span>&quot;</span> &lt;&lt; cls_id &lt;&lt; <span>&quot;</span><span>\t cls_float=</span><span>&quot;</span> &lt;&lt; cls_float &lt;&lt;<span> std::endl;
}

std::cout </span>&lt;&lt; <span>&quot;</span><span>C++ 2engine</span><span>&quot;</span> &lt;&lt; <span>&quot;</span><span>mean read img time =</span><span>&quot;</span> &lt;&lt; time_read_img / <span>1000</span> &lt;&lt; <span>&quot;</span><span>ms\t</span><span>&quot;</span> &lt;&lt; <span>&quot;</span><span>mean infer img time =</span><span>&quot;</span> &lt;&lt; time_infer / <span>1000</span> &lt;&lt; <span>&quot;</span><span>ms</span><span>&quot;</span> &lt;&lt;<span> std::endl;

</span><span>//</span><span> Destroy the engine</span>
context-&gt;<span>destroy();
engine</span>-&gt;<span>destroy();
runtime</span>-&gt;<span>destroy();

</span><span>return</span> <span>0</span><span>;

}

int main(int argc, char** argv)
{

</span><span>//</span><span>string mode = argv[1];</span>
<span>string</span> mode = <span>&quot;</span><span>-d</span><span>&quot;</span>;  <span>//</span><span>&#x9002;&#x7528;windows&#x7F16;&#x8BD1;&#xFF0C;&#x56FA;&#x5B9A;&#x6307;&#x5B9A;&#x53C2;&#x6570;

</span><span>//</span><span>if (std::string(argv[1]) == &quot;-s&quot;) {</span>
<span>if</span> (mode == <span>&quot;</span><span>-s</span><span>&quot;</span><span>) {

    get_trtengine();
}
</span><span>//</span><span>else if (std::string(argv[1]) == &quot;-d&quot;) {</span>
<span>else</span> <span>if</span> (mode == <span>&quot;</span><span>-d</span><span>&quot;</span><span>) {
    infer();
}
</span><span>else</span><span> {
    </span><span>return</span> -<span>1</span><span>;
}

</span><span>return</span> <span>0</span><span>;

}

yolov52engine

(2)yolov5 使用onnx转为engine代码,完整代码。

代码重要步骤有解释,具体查看代码。

代码平台:windows10 visual studio 相关安装可参考我以往博客点击这里这里末尾

本代码实现功能如下:

①.onnx转engine;

②.engine推理;

③CPU实现NMS方法

YOLOV5使用onnx转tensorrt(engine)YOLOV5使用onnx转tensorrt(engine)
#include "NvInfer.h"

include

"cuda_runtime_api.h"

include

include

include

include

include

include

include

include

include

include

include

// onnx转换头文件

include "NvOnnxParser.h"

using namespace nvonnxparser;

using namespace std;

#define CHECK(status) \ do\ {\ auto ret = (status);\ if (ret != 0)\ {\ std::cerr << "Cuda failure: " << ret << std::endl;\ abort();\ }\ } while (0)struct Detection { //center_x center_y w h float bbox[4]; float conf; // bbox_conf * cls_conf int class_id; int index;};

// stuff we know about the network and the input/output blobsstatic const int INPUT_H = 640;static const int INPUT_W = 640;static const int cls_num = 80;static const int anchor_output_num = 25200; //不同输入尺寸anchor:640-->25200 | 960-->56700static const int OUTPUT_SIZE = 1 anchor_output_num (cls_num+5); //1000 * sizeof(Detection) / sizeof(float) + 1;

const char INPUT_BLOB_NAME = "images";const char OUTPUT_BLOB_NAME = "output";

using namespace nvinfer1;

//static Logger gLogger;

//构建Loggerclass Logger : public ILogger{ void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= Severity::kWARNING) std::cout << msg << std::endl; }} gLogger;

// Creat the engine using only the API and not any parser.ICudaEngine createEngine(unsigned int maxBatchSize, IBuilder builder, IBuilderConfig config){ const char onnx_path = "./best.onnx";

INetworkDefinition</span>* network = builder-&gt;createNetworkV2(<span>1U</span>); <span>//</span><span>&#x6B64;&#x5904;&#x91CD;&#x70B9;1U&#x4E3A;OU&#x5C31;&#x6709;&#x95EE;&#x9898;</span>


IParser
parser = createParser(network, gLogger);
parser
->parseFromFile(onnx_path, static_cast(ILogger::Severity::kWARNING));
//解析有错误将返回
for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
std::cout
<< "successfully parse the onnx model" << std::endl;

</span><span>//</span><span> Build engine</span>
builder-&gt;<span>setMaxBatchSize(maxBatchSize);
config</span>-&gt;setMaxWorkspaceSize(<span>1</span> &lt;&lt; <span>20</span><span>);
</span><span>//</span><span>config-&gt;setFlag(nvinfer1::BuilderFlag::kFP16); </span><span>//</span><span> &#x8BBE;&#x7F6E;&#x7CBE;&#x5EA6;&#x8BA1;&#x7B97;
</span><span>//</span><span>config-&gt;setFlag(nvinfer1::BuilderFlag::kINT8);</span>
ICudaEngine* engine = builder-&gt;buildEngineWithConfig(*network, *<span>config);
std::cout </span>&lt;&lt; <span>&quot;</span><span>successfully  convert onnx to  engine&#xFF01;&#xFF01;&#xFF01; </span><span>&quot;</span> &lt;&lt;<span> std::endl;

</span><span>//</span><span>&#x9500;&#x6BC1;</span>
network-&gt;<span>destroy();
parser</span>-&gt;<span>destroy();

</span><span>return</span><span> engine;

}

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
{

</span><span>//</span><span> Create builder</span>
IBuilder* builder =<span> createInferBuilder(gLogger);
IBuilderConfig</span>* config = builder-&gt;<span>createBuilderConfig();

</span><span>//</span><span> Create model to populate the network, then set the outputs and create an engine</span>
ICudaEngine* engine =<span> createEngine(maxBatchSize, builder, config);

assert(engine </span>!=<span> nullptr);

</span><span>//</span><span> Serialize the engine</span>
(*modelStream) = engine-&gt;<span>serialize();
</span><span>//</span><span> Close everything down</span>
engine-&gt;<span>destroy();
builder</span>-&gt;<span>destroy();
config</span>-&gt;<span>destroy();

}

void doInference(IExecutionContext& context, float input, float output, int batchSize)
{
const ICudaEngine& engine = context.getEngine();
// Pointers to input and output device buffers to pass to engine.

</span><span>//</span><span> Engine requires exactly IEngine::getNbBindings() number of buffers.</span>
assert(engine.getNbBindings() == <span>2</span><span>);
</span><span>void</span>* buffers[<span>2</span><span>];
</span><span>//</span><span> In order to bind the buffers, we need to know the names of the input and output tensors.

</span><span>//</span><span> Note that indices are guaranteed to be less than IEngine::getNbBindings()</span>
<span>const</span> <span>int</span> inputIndex =<span> engine.getBindingIndex(INPUT_BLOB_NAME);
</span><span>const</span> <span>int</span> outputIndex =<span> engine.getBindingIndex(OUTPUT_BLOB_NAME);
</span><span>//</span><span>const int inputIndex = 0;
</span><span>//</span><span>const int outputIndex = 1;
</span><span>//</span><span> Create GPU buffers on device</span>
cudaMalloc(&amp;buffers[inputIndex], batchSize * <span>3</span> * INPUT_H * INPUT_W * <span>sizeof</span>(<span>float</span><span>));
cudaMalloc(</span>&amp;buffers[outputIndex], batchSize * OUTPUT_SIZE * <span>sizeof</span>(<span>float</span><span>));
</span><span>//</span><span> Create stream</span>

cudaStream_t stream;
CHECK(cudaStreamCreate(
&stream));
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize
* OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release stream and buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}

int get_trtengine() {

IHostMemory</span>*<span> modelStream{ nullptr };
APIToModel(</span><span>1</span>, &amp;<span>modelStream);
assert(modelStream </span>!=<span> nullptr);

std::ofstream p(</span><span>&quot;</span><span>./best.engine</span><span>&quot;</span><span>, std::ios::binary);
</span><span>if</span> (!<span>p)
{
    std::cerr </span>&lt;&lt; <span>&quot;</span><span>could not open plan output file</span><span>&quot;</span> &lt;&lt;<span> std::endl;
    </span><span>return</span> -<span>1</span><span>;
}
p.write(reinterpret_cast</span>&lt;<span>const</span> <span>char</span>*&gt;(modelStream-&gt;data()), modelStream-&gt;<span>size());
modelStream</span>-&gt;<span>destroy();

</span><span>return</span> <span>0</span><span>;

}

//加工图片变成拥有batch的输入, tensorrt输入需要的格式,为一个维度
void ProcessImage(cv::Mat image, float input_data[]) {
//只处理一张图片,总之结果为一维[batch3INPUT_W*INPUT_H]
//以下代码为投机取巧了

cv::resize(image, image, cv::Size(INPUT_W, INPUT_H),
0, 0, cv::INTER_LINEAR);
std::vector
<:mat> InputImage;

InputImage.push_back(image);

</span><span>int</span> ImgCount =<span> InputImage.size();

</span><span>//</span><span>float input_data[BatchSize * 3 * INPUT_H * INPUT_W];</span>
<span>for</span> (<span>int</span> b = <span>0</span>; b &lt; ImgCount; b++<span>) {
    cv::Mat img </span>=<span> InputImage.at(b);
    </span><span>int</span> w =<span> img.cols;
    </span><span>int</span> h =<span> img.rows;
    </span><span>int</span> i = <span>0</span><span>;
    </span><span>for</span> (<span>int</span> row = <span>0</span>; row &lt; h; ++<span>row) {
        uchar</span>* uc_pixel = img.data + row *<span> img.step;
        </span><span>for</span> (<span>int</span> col = <span>0</span>; col &lt; INPUT_W; ++<span>col) {
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i] = (<span>float</span>)uc_pixel[<span>2</span>] / <span>255.0</span><span>;
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (<span>float</span>)uc_pixel[<span>1</span>] / <span>255.0</span><span>;
            input_data[b </span>* <span>3</span> * INPUT_H * INPUT_W + i + <span>2</span> * INPUT_H * INPUT_W] = (<span>float</span>)uc_pixel[<span>0</span>] / <span>255.0</span><span>;
            uc_pixel </span>+= <span>3</span><span>;
            </span>++<span>i;
        }
    }

}

}

//******* NMS code *******//

/
struct Detection {
//center_x center_y w h
float bbox[4];
float conf; // bbox_conf * cls_conf
int class_id;
int index;
};
/
struct Bbox {
int x;
int y;
int w;
int h;
};

float iou(Bbox box1, Bbox box2) {

</span><span>int</span> x1 =<span> max(box1.x, box2.x);
</span><span>int</span> y1 =<span> max(box1.y, box2.y);
</span><span>int</span> x2 = min(box1.x + box1.w, box2.x +<span> box2.w);
</span><span>int</span> y2 = min(box1.y + box1.h, box2.y +<span> box2.h);
</span><span>int</span> w = max(<span>0</span>, x2 -<span> x1);
</span><span>int</span> h = max(<span>0</span>, y2 -<span> y1);
</span><span>float</span> over_area = w *<span> h;
</span><span>return</span> over_area / (box1.w * box1.h + box2.w * box2.h -<span> over_area);

}

int get_max_index(vector pre_detection) {
//获得最佳置信度的值,并返回对应的索引值
int index;
float conf;
if (pre_detection.size() > 0) {
index
= 0;
conf
= pre_detection.at(0).conf;
for (int i = 0; i < pre_detection.size(); i++) {
if (conf < pre_detection.at(i).conf) {
index
= i;
conf
= pre_detection.at(i).conf;
}
}
return index;
}
else {
return -1;
}

}
bool judge_in_lst(int index, vector<int> index_lst) {
//若index在列表index_lst中则返回true,否则返回false
if (index_lst.size() > 0) {
for (int i = 0; i < index_lst.size(); i++) {
if (index == index_lst.at(i)) {
return true;
}
}
}
return false;
}
vector
<int> nms(vector pre_detection, float iou_thr)
{
/*
返回需保存box的pre_detection对应位置索引值

</span><span>*/</span>
<span>int</span><span> index;
vector</span><detection><span> pre_detection_new;
</span><span>//</span><span>Detection det_best;</span>

Bbox box_best, box;
float iou_value;
vector
<int> keep_index;
vector
<int> del_index;
bool keep_bool;
bool del_bool;

</span><span>if</span> (pre_detection.size() &gt; <span>0</span><span>) {

    pre_detection_new.clear();
    </span><span>//</span><span> &#x5FAA;&#x73AF;&#x5C06;&#x9884;&#x6D4B;&#x7ED3;&#x679C;&#x5EFA;&#x7ACB;&#x7D22;&#x5F15;</span>
    <span>for</span> (<span>int</span> i = <span>0</span>; i &lt; pre_detection.size(); i++<span>) {
        pre_detection.at(i).index </span>=<span> i;
        pre_detection_new.push_back(pre_detection.at(i));
    }
    </span><span>//</span><span>&#x5FAA;&#x73AF;&#x4FBF;&#x5229;&#x83B7;&#x5F97;&#x4FDD;&#x7559;box&#x4F4D;&#x7F6E;&#x7D22;&#x5F15;-&#x76F8;&#x5BF9;&#x8F93;&#x5165;pre_detection&#x4F4D;&#x7F6E;</span>
    <span>while</span> (pre_detection_new.size() &gt; <span>0</span><span>) {
        index </span>=<span> get_max_index(pre_detection_new);
        </span><span>if</span> (index &gt;= <span>0</span><span>) {
            keep_index.push_back(pre_detection_new.at(index).index); </span><span>//</span><span>&#x4FDD;&#x7559;&#x7D22;&#x5F15;&#x4F4D;&#x7F6E;

            </span><span>//</span><span> &#x66F4;&#x65B0;&#x6700;&#x4F73;&#x4FDD;&#x7559;box</span>
            box_best.x = pre_detection_new.at(index).bbox[<span>0</span><span>];
            box_best.y </span>= pre_detection_new.at(index).bbox[<span>1</span><span>];
            box_best.w </span>= pre_detection_new.at(index).bbox[<span>2</span><span>];
            box_best.h </span>= pre_detection_new.at(index).bbox[<span>3</span><span>];

            </span><span>for</span> (<span>int</span> j = <span>0</span>; j &lt; pre_detection.size(); j++<span>) {
                keep_bool </span>=<span> judge_in_lst(pre_detection.at(j).index, keep_index);
                del_bool </span>=<span> judge_in_lst(pre_detection.at(j).index, del_index);
                </span><span>if</span> ((!keep_bool) &amp;&amp; (!del_bool)) { <span>//</span><span>&#x4E0D;&#x5728;keep_index&#x4E0E;del_index&#x624D;&#x8BA1;&#x7B97;iou</span>
                    box.x = pre_detection.at(j).bbox[<span>0</span><span>];
                    box.y </span>= pre_detection.at(j).bbox[<span>1</span><span>];
                    box.w </span>= pre_detection.at(j).bbox[<span>2</span><span>];
                    box.h </span>= pre_detection.at(j).bbox[<span>3</span><span>];
                    iou_value </span>=<span> iou(box_best, box);
                    </span><span>if</span> (iou_value &gt;<span> iou_thr) {
                        del_index.push_back(j); </span><span>//</span><span>&#x8BB0;&#x5F55;&#x5927;&#x4E8E;&#x9608;&#x503C;&#x5C06;&#x5220;&#x9664;&#x5BF9;&#x5E94;&#x7684;&#x4F4D;&#x7F6E;</span>

}
}

            }
            </span><span>//</span><span>&#x66F4;&#x65B0;pre_detection_new</span>

pre_detection_new.clear();
for (int j = 0; j < pre_detection.size(); j++) {
keep_bool
= judge_in_lst(pre_detection.at(j).index, keep_index);
del_bool
= judge_in_lst(pre_detection.at(j).index, del_index);
if ((!keep_bool) && (!del_bool)) {
pre_detection_new.push_back(pre_detection.at(j));
}
}

        }

    }

}

del_index.clear();
del_index.shrink_to_fit();
pre_detection_new.clear();
pre_detection_new.shrink_to_fit();

</span><span>return</span><span>  keep_index;

}

vector postprocess(float prob, float conf_thr = 0.2, float nms_thr = 0.4) {
/

#####################此函数处理一张图预测结果#########################
prob为[x y w h score multi-pre] 如80类-->(1,anchor_num,85)

</span><span>*/</span><span>

vector</span><detection><span> pre_results;
vector</span>&lt;<span>int</span>&gt;<span> nms_keep_index;
vector</span><detection><span> results;
</span><span>bool</span><span> keep_bool;
Detection pre_res;
</span><span>float</span><span> conf;
</span><span>int</span><span> tmp_idx;
</span><span>float</span><span> tmp_cls_score;
</span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; anchor_output_num; i++<span>) {
    tmp_idx </span>= i * (cls_num + <span>5</span><span>);
    pre_res.bbox[</span><span>0</span>] = prob[tmp_idx + <span>0</span><span>];
    pre_res.bbox[</span><span>1</span>] = prob[tmp_idx + <span>1</span><span>];
    pre_res.bbox[</span><span>2</span>] = prob[tmp_idx + <span>2</span><span>];
    pre_res.bbox[</span><span>3</span>] = prob[tmp_idx + <span>3</span><span>];
    conf </span>= prob[tmp_idx + <span>4</span>];  <span>//</span><span>&#x662F;&#x4E3A;&#x76EE;&#x6807;&#x7684;&#x7F6E;&#x4FE1;&#x5EA6;</span>
    tmp_cls_score = prob[tmp_idx + <span>5</span>] *<span> conf;
    pre_res.class_id </span>= <span>0</span><span>;
    pre_res.conf </span>= <span>0</span><span>;
    </span><span>for</span> (<span>int</span> j = <span>1</span>; j &lt; cls_num; j++<span>) {
        tmp_idx </span>= i * (cls_num + <span>5</span>) + <span>5</span> + j; <span>//</span><span>&#x83B7;&#x5F97;&#x5BF9;&#x5E94;&#x7C7B;&#x522B;&#x7D22;&#x5F15;</span>
        <span>if</span> (tmp_cls_score &lt; prob[tmp_idx] *<span> conf)
        {
            tmp_cls_score </span>= prob[tmp_idx] *<span> conf;
            pre_res.class_id </span>=<span> j;
            pre_res.conf </span>=<span> tmp_cls_score;
        }
    }
    </span><span>if</span> (conf &gt;=<span> conf_thr) {

        pre_results.push_back(pre_res);
    }

}

</span><span>//</span><span>&#x4F7F;&#x7528;nms</span>
nms_keep_index=<span>nms(pre_results,nms_thr);

</span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; pre_results.size(); i++<span>) {
    keep_bool </span>=<span> judge_in_lst(i, nms_keep_index);
    </span><span>if</span><span> (keep_bool) {
        results.push_back(pre_results.at(i));
    }

}

pre_results.clear();
pre_results.shrink_to_fit();
nms_keep_index.clear();
nms_keep_index.shrink_to_fit();

</span><span>return</span><span> results;

}

cv::Mat draw_rect(cv::Mat image, vector results) {
/*
image 为图像

struct  Detection {
float bbox[4];  //center_x center_y  w h
float conf;  // &#x7F6E;&#x4FE1;&#x5EA6;
int class_id; //&#x7C7B;&#x522B;id
int index;    //&#x53EF;&#x5FFD;&#x7565;
};

</span><span>*/</span>

<span>float</span><span> x;
</span><span>float</span><span> y;
</span><span>float</span><span> y_tmp;
</span><span>float</span><span> w;
</span><span>float</span><span> h;
</span><span>string</span><span> info;

cv::Rect rect;
</span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; results.size(); i++<span>) {

    x </span>= results.at(i).bbox[<span>0</span><span>];
    y</span>= results.at(i).bbox[<span>1</span><span>];
    w</span>= results.at(i).bbox[<span>2</span><span>];
    h</span>=results.at(i).bbox[<span>3</span><span>];
    x </span>= (<span>int</span>)(x - w / <span>2</span><span>);
    y </span>= (<span>int</span>)(y - h / <span>2</span><span>);
    w </span>= (<span>int</span><span>)w;
    h </span>= (<span>int</span><span>)h;
    info </span>= <span>&quot;</span><span>id:</span><span>&quot;</span><span>;
    info.append(to_string(results.at(i).class_id));
    info.append(</span><span>&quot;</span><span> s:</span><span>&quot;</span><span>);
    info.append(  to_string((</span><span>int</span>)(results.at(i).conf*<span>100</span><span>)  )   );
    info.append(</span><span>&quot;</span><span>%</span><span>&quot;</span><span>);
    rect</span>=<span> cv::Rect(x, y, w, h);
    cv::rectangle(image, rect, cv::Scalar(</span><span>0</span>, <span>255</span>, <span>0</span>), <span>1</span>, <span>1</span>, <span>0</span>);<span>//</span><span>&#x77E9;&#x5F62;&#x7684;&#x4E24;&#x4E2A;&#x9876;&#x70B9;&#xFF0C;&#x4E24;&#x4E2A;&#x9876;&#x70B9;&#x90FD;&#x5305;&#x62EC;&#x5728;&#x77E9;&#x5F62;&#x5185;&#x90E8;</span>
    cv::putText(image, info, cv::Point(x, y), cv::FONT_HERSHEY_SIMPLEX, <span>0.4</span>, cv::Scalar(<span>0</span>, <span>255</span>, <span>0</span>), <span>0.4</span>, <span>1</span>, <span>false</span><span>);

}

</span><span>return</span><span> image;

}

int infer() {

</span><span>//</span><span>&#x52A0;&#x8F7D;engine&#x5F15;&#x64CE;</span>
<span>char</span>*<span> trtModelStream{ nullptr };
size_t size{ </span><span>0</span><span> };
std::ifstream file(</span><span>&quot;</span><span>./best.engine</span><span>&quot;</span><span>, std::ios::binary);
</span><span>if</span><span> (file.good()) {
    file.seekg(</span><span>0</span><span>, file.end);
    size </span>=<span> file.tellg();
    file.seekg(</span><span>0</span><span>, file.beg);
    trtModelStream </span>= <span>new</span> <span>char</span><span>[size];
    assert(trtModelStream);
    file.read(trtModelStream, size);
    file.close();
}
</span><span>//</span><span>&#x53CD;&#x5E8F;&#x5217;&#x4E3A;engine&#xFF0C;&#x521B;&#x5EFA;context</span>


IRuntime
runtime = createInferRuntime(gLogger);
assert(runtime
!= nullptr);
ICudaEngine
engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
assert(engine
!= nullptr);
IExecutionContext
* context = engine->createExecutionContext();
assert(context
!= nullptr);
delete[] trtModelStream;

</span><span>//</span><span>*********************&#x63A8;&#x7406;-&#x5FAA;&#x73AF;&#x63A8;&#x7406;*********************</span><span>//

<span>float</span> time_read_img = <span>0.0</span><span>;
</span><span>float</span> time_infer = <span>0.0</span><span>;
</span><span>float</span><span> prob[OUTPUT_SIZE];
vector</span><detection><span> results;

</span><span>for</span> (<span>int</span> i = <span>0</span>; i &lt; <span>1000</span>; i++<span>) {
    </span><span>//</span><span> &#x5904;&#x7406;&#x56FE;&#x7247;&#x4E3A;&#x56FA;&#x5B9A;&#x8F93;&#x51FA;</span>


auto start
= std::chrono::system_clock::now(); //时间函数
std::string path = "./7.jpg";
std::cout
<< "img_path=" << path << endl;
static float data[3 * INPUT_H * INPUT_W];
cv::Mat img
= cv::imread(path);

    ProcessImage(img, data);
    auto end </span>=<span> std::chrono::system_clock::now();
    time_read_img </span>= std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() +<span> time_read_img;

    </span><span>//</span><span>Run inference</span>
    start = std::chrono::system_clock::now();  <span>//</span><span>&#x65F6;&#x95F4;&#x51FD;&#x6570;</span>
    doInference(*context, data, prob, <span>1</span><span>);
    end </span>=<span> std::chrono::system_clock::now();
    time_infer </span>= std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() +<span> time_infer;
    std::cout </span>&lt;&lt; std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() &lt;&lt; <span>&quot;</span><span>ms</span><span>&quot;</span> &lt;&lt;<span> std::endl;

    </span><span>//</span><span>&#x8F93;&#x51FA;&#x540E;&#x5904;&#x7406;
    </span><span>//</span><span>std::cout &lt;<"prob="<<prob>

results.clear();
results
=postprocess(prob, 0.3, 0.4);

    cv::resize(img, img, cv::Size(INPUT_W, INPUT_H), </span><span>0</span>, <span>0</span><span>, cv::INTER_LINEAR);
    img</span>=<span>draw_rect(img,results);

    cv::imshow(</span><span>&quot;</span><span>www</span><span>&quot;</span><span>, img);
    cv::waitKey(</span><span>0</span><span>);

    cout </span>&lt;&lt; <span>&quot;</span><span>ok</span><span>&quot;</span> &lt;&lt;<span> endl;

}

std::cout </span>&lt;&lt; <span>&quot;</span><span>C++ 2engine</span><span>&quot;</span> &lt;&lt; <span>&quot;</span><span>mean read img time =</span><span>&quot;</span> &lt;&lt; time_read_img / <span>1000</span> &lt;&lt; <span>&quot;</span><span>ms\t</span><span>&quot;</span> &lt;&lt; <span>&quot;</span><span>mean infer img time =</span><span>&quot;</span> &lt;&lt; time_infer / <span>1000</span> &lt;&lt; <span>&quot;</span><span>ms</span><span>&quot;</span> &lt;&lt;<span> std::endl;

</span><span>//</span><span> Destroy the engine</span>
context-&gt;<span>destroy();
engine</span>-&gt;<span>destroy();
runtime</span>-&gt;<span>destroy();

</span><span>return</span> <span>0</span><span>;

}

int main(int argc, char** argv)
{

</span><span>//</span><span>string mode = argv[1];</span>
<span>string</span> mode = <span>&quot;</span><span>-d</span><span>&quot;</span>;  <span>//</span><span>&#x9002;&#x7528;windows&#x7F16;&#x8BD1;&#xFF0C;&#x56FA;&#x5B9A;&#x6307;&#x5B9A;&#x53C2;&#x6570;

</span><span>//</span><span>if (std::string(argv[1]) == &quot;-s&quot;) {</span>
<span>if</span> (mode == <span>&quot;</span><span>-s</span><span>&quot;</span><span>) {

    get_trtengine();
}
</span><span>//</span><span>else if (std::string(argv[1]) == &quot;-d&quot;) {</span>
<span>else</span> <span>if</span> (mode == <span>&quot;</span><span>-d</span><span>&quot;</span><span>) {
    infer();
}
</span><span>else</span><span> {
    </span><span>return</span> -<span>1</span><span>;
}

</span><span>return</span> <span>0</span><span>;

}

yolov52engine(onnx)

三.预测结果展示:

YOLOV5使用onnx转tensorrt(engine)

自己训练模型转换测试结果:

YOLOV5使用onnx转tensorrt(engine)

四.CMakeLists.txt编写(添加:2022-1006)

介绍如何使用编译命令在ubuntu(linux)环境中运行,以下代码适用YOLO Onnx及C++ 源码构建,其中target_link_libraries(yolo /home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/libnvonnxparser.so)此库的onnx需要调用,若C++则可忽略。

引用链接:https://www.cnblogs.com/tangjunjun/p/16624566.html

engine的CMakeLists.txt构建:

cmake_minimum_required(VERSION 2.6)

project(yolo)

add_definitions(-std=c++11)

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

include_directories(${PROJECT_SOURCE_DIR}/include)
include and link dirs of cuda and tensorrt, you need adapt them if yours are different
cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
tensorrt
include_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/include/)
link_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/lib/)

include_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/samples/common/)
#link_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/)

opencv
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(yolo ${PROJECT_SOURCE_DIR}/main.cpp)
target_link_libraries(yolo nvinfer)
target_link_libraries(yolo cudart)
target_link_libraries(yolo ${OpenCV_LIBS})
target_link_libraries(yolo /home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/libnvonnxparser.so)

add_definitions(-O2 -pthread)

Original: https://www.cnblogs.com/tangjunjun/p/16639361.html
Author: tangjunjun
Title: YOLOV5使用onnx转tensorrt(engine)

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/566353/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球