三种部署Pytorch模型到C++环境的方式
文章目录
- 三种部署Pytorch模型到C++环境的方式
- 前言
- 一、pytorch2onnx
- 二、三种部署的方式
* - 1.opencv加载onnx
- 2.onnxruntime加载onnx
- 3.libtorch部署
- 参考资料
前言
由于工作原因需要部署Pytorch模型到c++环境下,目前大概有三种方式。
1、pytorch转成onnx文件后,通过opencv读取。
2、pytroch转成onnx文件后,通过onnxruntime读取。
3、利用libtorch库,也就是pytorch的c++版。
一、pytorch2onnx
首先的将pytorch训练好的模型导出onnx文件。
安装所需包:
pip install onnx
pip install onnxruntime
from nets.deeplabv3 import deeplabv3
import torch
import os
from PIL import Image
import numpy as np
import onnx
import onnxruntime
def preprocess_input(image):
image /= 255.0
return image
def cvtColor(image):
if len(np.shape(image)) == 3 and np.shape(image)[-2] == 3:
return image
else:
image = image.convert('RGB')
return image
def check_onnx_output(filename, input_data, torch_output):
print("模型测试")
session = onnxruntime.InferenceSession(filename)
input_name = session.get_inputs()[0].name
result = session.run([], {input_name: input_data.detach().cpu().numpy()})
for test_result, gold_result in zip(result, torch_output.values()):
np.testing.assert_almost_equal(
gold_result.cpu().numpy(), test_result, decimal=3,
)
return result
def check_onnx_model(model, onnx_filename, input_image):
with torch.no_grad():
torch_out = {"output": model(input_image)}
check_onnx_output(onnx_filename, input_image, torch_out)
print("模型输出一致")
onnx_model = onnx.load(onnx_filename)
onnx.checker.check_model(onnx_model)
print("模型测试成功")
return onnx_model
if __name__ == '__main__':
model_path = 'net.pth'
onnx_path = os.path.split(model_path)[0] + '/'
device = 'cpu'
VOCdevkit_path ='./1.jpg'
img = Image.open(VOCdevkit_path)
img = cvtColor(img)
img = np.expand_dims(np.transpose(preprocess_input(np.array(img, np.float32)), (2, 0, 1)), 0)
img = torch.from_numpy(img)
net = deeplabv3 ()
net.load_state_dict(torch.load(model_path, map_location=device), strict=True)
net = net.eval()
out = net(img)
print(out)
torch.onnx.export(net, img, onnx_path + "torch.onnx", verbose=True ,input_names=["input"], output_names=["output"], opset_version=11)
onnx_name = onnx_path + "torch.onnx"
onnx_model = check_onnx_model(net, onnx_name, img)
二、三种部署的方式
1.opencv加载onnx
#include
#include
#include
#include
#include
#include
using namespace std;
int main()
{
String modelFile = "./torch.onnx";
String imageFile = "./1.jpg";
dnn::Net net = cv::dnn::readNetFromONNX(modelFile);
cv::Mat imageBGR = cv::imread(input_path, cv::ImreadModes::IMREAD_COLOR);
cv::Mat resizedImageRGB, resizedImage, preprocessedImage;
resize(imageBGR , resizedImage, Size(500, 500), INTER_AREA)
cv::cvtColor(resizedImage, resizedImageRGB,
cv::ColorConversionCodes::COLOR_BGR2RGB);
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
cv::Mat channels[3];
cv::split(resizedImage, channels);
cv::merge(channels, 3, resizedImage);
cv::dnn::blobFromImage(resizedImage, preprocessedImage);
net.setInput(inputBolb);
Mat result = net.forward();
cout << result << endl;
return 0;
}
2.onnxruntime加载onnx
下面部署的是语义分割的模型。
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace cv;
using namespace std;
using namespace cv::dnn;
bool CheckStatus(const OrtApi* g_ort, OrtStatus* status) {
if (status != nullptr) {
const char* msg = g_ort->GetErrorMessage(status);
std::cerr << msg << std::endl;
g_ort->ReleaseStatus(status);
throw Ort::Exception(msg, OrtErrorCode::ORT_EP_FAIL);
}
return true;
}
void PreProcess(const Mat& image, Mat& image_blob)
{
Mat input;
image.copyTo(input);
std::vector<Mat> channels, channel_p;
split(input, channels);
Mat R, G, B;
B = channels.at(0);
G = channels.at(1);
R = channels.at(2);
B = B / 255.0;
G = G / 255.0;
R = R / 255.0;
channel_p.push_back(R);
channel_p.push_back(G);
channel_p.push_back(B);
Mat outt;
merge(channel_p, outt);
image_blob = outt;
}
void run_ort_net(std::string backend, std::string input_path) {
#ifdef _WIN32
const wchar_t* model_path = L"F:/visual studio workplace/torch.onnx";
#else
const char* model_path = "F:/visual studio workplace/torch.onnx";
#endif
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtEnv* env;
CheckStatus(g_ort, g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
OrtSessionOptions* session_options;
CheckStatus(g_ort, g_ort->CreateSessionOptions(&session_options));
CheckStatus(g_ort, g_ort->SetIntraOpNumThreads(session_options, 1));
CheckStatus(g_ort, g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC));
std::vector<const char*> options_keys = { "runtime", "buffer_type" };
std::vector<const char*> options_values = { backend.c_str(), "FLOAT" };
OrtSession* session;
CheckStatus(g_ort, g_ort->CreateSession(env, model_path, session_options, &session));
OrtAllocator* allocator;
CheckStatus(g_ort, g_ort->GetAllocatorWithDefaultOptions(&allocator));
size_t num_input_nodes;
CheckStatus(g_ort, g_ort->SessionGetInputCount(session, &num_input_nodes));
std::vector<const char*> input_node_names;
std::vector<std::vector<int64_t>> input_node_dims;
std::vector<ONNXTensorElementDataType> input_types;
std::vector<OrtValue*> input_tensors;
input_node_names.resize(num_input_nodes);
input_node_dims.resize(num_input_nodes);
input_types.resize(num_input_nodes);
input_tensors.resize(num_input_nodes);
for (size_t i = 0; i < num_input_nodes; i++) {
char* input_name;
CheckStatus(g_ort, g_ort->SessionGetInputName(session, i, allocator, &input_name));
input_node_names[i] = input_name;
std::cout << "input name :" << input_name << std::endl;
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetInputTypeInfo(session, i, &typeinfo));
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
ONNXTensorElementDataType type;
CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type));
input_types[i] = type;
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
input_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, input_node_dims[i].data(), num_dims));
std::cout << "input dims :" << num_dims << std::endl;
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
size_t num_output_nodes;
std::vector<const char*> output_node_names;
std::vector<std::vector<int64_t>> output_node_dims;
std::vector<OrtValue*> output_tensors;
CheckStatus(g_ort, g_ort->SessionGetOutputCount(session, &num_output_nodes));
output_node_names.resize(num_output_nodes);
output_node_dims.resize(num_output_nodes);
output_tensors.resize(num_output_nodes);
for (size_t i = 0; i < num_output_nodes; i++) {
char* output_name;
CheckStatus(g_ort, g_ort->SessionGetOutputName(session, i, allocator, &output_name));
output_node_names[i] = output_name;
std::cout << "output dims :" << output_name << std::endl;
OrtTypeInfo* typeinfo;
CheckStatus(g_ort, g_ort->SessionGetOutputTypeInfo(session, i, &typeinfo));
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
size_t num_dims;
CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
output_node_dims[i].resize(num_dims);
CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, (int64_t*)output_node_dims[i].data(), num_dims));
std::cout << "output dims :" << num_dims << std::endl;
size_t tensor_size;
CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));
if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
}
Mat img = imread(input_path);
Mat det1;
img.convertTo(img, CV_32FC3);
PreProcess(img, det1);
Mat blob = dnn::blobFromImage(det1, 1., Size(500, 500), Scalar(0, 0, 0), false, false);
printf("Load success!\n");
OrtMemoryInfo* memory_info;
CheckStatus(g_ort, g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
CheckStatus(g_ort, g_ort->CreateTensorWithDataAsOrtValue(memory_info, blob.ptr<float>(), blob.total() * sizeof(float), input_node_dims[0].data(),
input_node_dims[0].size(), input_types[0], &input_tensors[0]));
CheckStatus(g_ort, g_ort->Run(session, nullptr, input_node_names.data(), (const OrtValue* const*)input_tensors.data(),
input_tensors.size(), output_node_names.data(), output_node_names.size(),
output_tensors.data()));
size_t output_data_size = 500 * 500;
size_t output_data_length = output_data_size * sizeof(int64_t*);
std::vector<int64_t*> output_data(output_data_length);
void* output_buffer;
CheckStatus(g_ort, g_ort->GetTensorMutableData(output_tensors[0], &output_buffer));
int64_t* int_buffer = reinterpret_cast<int64_t*>(output_buffer);
int count = 0;
Mat newarr = Mat_<int>(500, 500);
for (int i = 0; i < newarr.rows; i++)
{
for (int j = 0; j < newarr.cols; j++)
{
if ((int)int_buffer[i * j + j] >= 1) {
count++;
newarr.at<int>(i, j) = 255;
continue;
}
newarr.at<int>(i, j) = int_buffer[i * j + j];
}
}
cout << count << endl;
imwrite("./test.png", newarr);
newarr = imread("./test.png", IMREAD_GRAYSCALE);
cout << newarr.channels() << endl;
imshow("mask", newarr);
cv::waitKey();
}
int main(int argc, char* argv[]) {
std::string backend = "CPU";
std::string input_path = "./1.jpg";
run_ort_net(backend, input_path);
return 0;
}
结果为了更好的显示,把非背景的值置为255,如下图:
3.libtorch部署
pytorch训练的模型,需要转换为script model,参考在C++平台上部署PyTorch模型流程+踩坑实录
#include
#include
#include
#include
int main()
{
torch::DeviceType device_type;
if (torch::cuda::is_available()) {
std::cout << "CUDA available! Predicting on GPU." << std::endl;
device_type = torch::kCUDA;
}
else {
std::cout << "Predicting on CPU." << std::endl;
device_type = torch::kCUDA;
}
torch::Device device(device_type);
std::string model_pb = "./cpu.pth";
auto module = torch::jit::load(model_pb);
module.to(at::kCUDA);
auto image = cv::imread("./1_35.jpg", cv::ImreadModes::IMREAD_COLOR);
cv::Mat image_transfomed;
cv::resize(image, image_transfomed, cv::Size(500, 500));
torch::Tensor tensor_image = torch::from_blob(image_transfomed.data,
{ image_transfomed.rows, image_transfomed.cols,3 }, torch::kByte);
tensor_image = tensor_image.permute({ 2,0,1 });
tensor_image = tensor_image.toType(torch::kFloat);
tensor_image = tensor_image.div(255);
tensor_image = tensor_image.unsqueeze(0);
tensor_image = tensor_image.to(at::kCUDA);
torch::Tensor output = module.forward({ tensor_image }).toTensor();
auto max_result = output.max(1, true);
auto max_index = std::get<1>(max_result).item<float>();
std::cout << output << std::endl;
return 0;
}
参考资料
[1] https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/Snpe_EP/main.cpp
[2] https://blog.csdn.net/qq_44747572/article/details/120820964?spm=1001.2014.3001.5501
[3] https://zhuanlan.zhihu.com/p/191569603
[4] https://zhuanlan.zhihu.com/p/414317269Original: https://blog.csdn.net/likesomething1/article/details/125543214
Author: 双木linwis
Title: 部署Pytorch模型到C++环境
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/706936/
转载文章受原作者版权保护。转载请注明原作者出处!