OpenCV—-YOLOv5目标检测模型推理 (兼容YOLACT)


  • 结果展示:
main.exe -h

Usage: main.exe [params] image confThreshold nmsThresshold model_name

        -?, -h, --help, --usage (value:true)
                opecv based deep learining demo

        image (value:inference/horses.jpg)
                Image to process
        confThreshold (value:0.5)
                confidence threshold, default 0.5
        nmsThresshold (value:0.5)
                nms threshold, default 0.5
        model_name (value:yolov5)
                dnn model, default yolov5
parse wrong, please check command or type help
 main.exe inference/horses.jpg 0.5 0.5 yolov5



include_directories(".../opencv/build/include" ".../opencv/build/include/opencv2")

add_executable (main main.cpp)
add_library(yolact yolact.cpp)
add_library(yolov5 yolov5.cpp)
add_library(config config.cpp)
target_link_libraries(main yolact yolov5 config opencv_world460)

  • 代码示例:

1:检测模型配置文件头文件 config.hpp

extern const char* class_names[];
extern const unsigned char colors[81][3];

2: 检测模型配置实现 config.cpp

#pragma once

extern const char* class_names[] = { "background",
                                        "person", "bicycle", "car", "motorcycle", "airplane", "bus",
                                        "train", "truck", "boat", "traffic light", "fire hydrant",
                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
                                        "scissors", "teddy bear", "hair drier", "toothbrush"

extern const unsigned char colors[81][3] = {{56, 0, 255}, {226, 255, 0}, {0, 94, 255},
    {0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, {255, 151, 0},
    {170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169},
    {255, 0, 207}, {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255},
    {94, 0, 255}, {0, 255, 113}, {255, 18, 0}, {255, 0, 56}, {18, 0, 255},
    {0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0},
    {75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245},
    {255, 132, 0}, {226, 0, 255}, {255, 37, 0}, {207, 255, 0},
    {0, 255, 207}, {94, 255, 0}, {0, 226, 255},
    {56, 255, 0}, {255, 94, 0}, {255, 113, 0},{0, 132, 255}, {255, 0, 132},
    {255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, {113, 0, 255},
    {255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113},
    {0, 255, 188}, {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132},
    {0, 188, 255}, {0, 245, 255}, {0, 169, 255},{37, 255, 0},
    {255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0},
    {255, 0, 170}, {255, 0, 37}, {255, 75, 0}, {0, 0, 255}, {255, 207, 0},
    {255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, {0, 255, 75},
    {0, 255, 151}, {255, 56, 0}, {245, 255, 0}

extern struct net_config{
    float confThreshold;
    float nmsThreshold;
    std::string model_name;
    int img_size;
    std::string model_path;

3: yolov5推理模型


#include "config.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

class yolov5

    yolov5(float confThreshold, float nmsThreshold, string model_path = "model/yolov5m.onnx", const int keep_top_k = 200);

    yolov5(net_config& config);

    void detect(Mat& frame);
    const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
    const float stride[3] = { 8.0, 16.0, 32.0 };
    const int inpWidth = 640;
    const int inpHeight = 640;
    float confThreshold = 0.5;
    float nmsThreshold = 0.5;
    float objThreshold = 0.5;

    Net net;

    void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);

    void sigmoid(Mat* out, int length){
        float* pdata = (float*)(out->data);
        int i = 0;
        for (i = 0; i < length; i++)
            pdata[i] = 1.0 / (1 + expf(-pdata[i]));

yolov5::yolov5(float confThreshold, float nmsThreshold, string model_path, const int keep_top_k)
    this->confThreshold = confThreshold;
    this->nmsThreshold = nmsThreshold;
    this->net = readNet(model_path);

yolov5::yolov5(net_config& config)
    this->confThreshold = config.confThreshold;
    this->nmsThreshold = config.nmsThreshold;
    this->net = readNet(config.model_path);

void yolov5::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid)

    rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);

    string label = format("%.2f", conf);
    label = string(class_names[classid+1]) + ":" + label;

    int baseLine;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = max(top, labelSize.height);

    putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);


void yolov5::detect(Mat& frame)
    Mat blob;
    blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
    vector<Mat> outs;
    this->net.forward(outs, this->net.getUnconnectedOutLayersNames());

    vector<int> classIds;
    vector<float> confidences;
    vector<Rect> boxes;
    float ratioh = (float)frame.rows / this->inpHeight;
    float ratiow = (float)frame.cols / this->inpWidth;

    int n = 0, q = 0, i = 0, j = 0, nout = 80 + 5, c = 0;
    for (n = 0; n < 3; n++)
        int num_grid_x = (int)(this->inpWidth / this->stride[n]);
        int num_grid_y = (int)(this->inpHeight / this->stride[n]);
        int area = num_grid_x * num_grid_y;
        this->sigmoid(&outs[n], 3 * nout * area);
        for (q = 0; q < 3; q++)
            const float anchor_w = this->anchors[n][q * 2];
            const float anchor_h = this->anchors[n][q * 2 + 1];
            float* pdata = (float*)outs[n].data + q * nout * area;
            for (i = 0; i < num_grid_y; i++)
                for (j = 0; j < num_grid_x; j++)
                    float box_score = pdata[4 * area + i * num_grid_x + j];
                    if (box_score > this->objThreshold)
                        float max_class_socre = 0, class_socre = 0;
                        int max_class_id = 0;
                        for (c = 0; c < 80; c++)
                            class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
                            if (class_socre > max_class_socre)
                                max_class_socre = class_socre;
                                max_class_id = c;

                        if (max_class_socre > this->confThreshold)
                            float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n];
                            float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n];
                            float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w;
                            float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h;

                            int left = (cx - 0.5*w)*ratiow;
                            int top = (cy - 0.5*h)*ratioh;

                            boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));

    vector<int> indices;
    NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
        int idx = indices[i];
        Rect box = boxes[idx];

        this->drawPred(confidences[idx], box.x, box.y,
            box.x + box.width, box.y + box.height, frame, classIds[idx]);

4: 整体代码结构


#include "config.cpp"
#include "yolact.cpp"
#include "yolov5.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

bool parseParam(int argc, char** argv, const char* keys, Mat& img, net_config& config){
    CommandLineParser parser(argc, argv, keys);
        return false;
        return false;
    String imgFile = parser.get<String>(0);
    img = imread(imgFile);
        cout << "wrong image path ! please check again." << endl;
        return false;
    config.confThreshold = parser.get<float>(1);
    config.nmsThreshold = parser.get<float>(2);
    config.model_name = parser.get<string>(3);
    return true;

int main(int argc, char** argv)
    const char* keys  = {
        "{help h usage ? | | opecv based deep learining demo}"
        "{@image | inference/horses.jpg | Image to process}"
        "{@confThreshold | 0.5 | confidence threshold, default 0.5}"
        "{@nmsThresshold | 0.5 | nms threshold, default 0.5}"
        "{@model_name | yolov5 | dnn model, default yolov5}"

    net_config config;
    Mat srcimg;
    if(!parseParam(argc, argv, keys, srcimg, config)){
        cout << "parse wrong, please check command or type help" << endl;
        return 0;

    if(config.model_name == "yolact"){
        config.model_path = "model/yolact_base_54_800000.onnx";
        yolact model(config);
        static const string kWinName = "Deep learning object detection in OpenCV";
        namedWindow(kWinName, WINDOW_NORMAL);
        imshow(kWinName, srcimg);
    }else if(config.model_name == "yolov5"){
        config.model_path = "model/yolov5m.onnx";
        yolov5 model(config);
        static const string kWinName = "Deep learning object detection in OpenCV";
        namedWindow(kWinName, WINDOW_NORMAL);
        imshow(kWinName, srcimg);

        cout << "model not defined" << endl;
    return 0;

