pt模型在VS上运行

注意：本例子为我个人对YOLOv5的源码进行在VS上的实现，大家可按自己的模型根据源码进行修改
我这为单个物体的检测，如果检测多个物体需对结果处理函数(non_max_suppression2)进行修改

1.安装和训练时pytorch版本相对应的libtorch
网上教程很多，我两个版本都是CUDA版本
2.将训练好的模型转为.torchscript.pt模型（可以选择是CUDA或者CPU，这里是CUDA）

import argparse

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish
from utils.general import set_logging

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='httt.pt', help='weights path')  # from yolov5/models/
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    set_logging()

    # Input
    img = torch.zeros((opt.batch_size, 3, *opt.img_size)).to(device='cuda')  # image size(1,3,320,192) iDetection

    # Load PyTorch model
    model = attempt_load(opt.weights, map_location=torch.device('cuda'))  # load FP32 model

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
        if isinstance(m, models.common.Conv) and isinstance(m.act, nn.Hardswish):
            m.act = Hardswish()  # assign activation
        # if isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = True  # set Detect() layer export=True
    y = model(img)  # dry run

    # TorchScript export
    try:
        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
        ts = torch.jit.trace(model, img)
        ts.save(f)
        print('TorchScript export success, saved as %s' % f)
    except Exception as e:
        print('TorchScript export failure: %s' % e)

运行结束后会获得转换后的模型
在这里插入图片描述
3.加载模型
测试CUDA是否可用

cout << "cuda是否可用：" << torch::cuda::is_available() << endl;
cout << "cudnn是否可用：" << torch::cuda::cudnn_is_available() << endl;

加载模型

torch::jit::script::Module module;
module = torch::jit::load("C:\\Users\\34187\\Desktop\\test\\test\\httt.torchscript.pt");
//模型转到GPU中去
device_type = at::kCUDA;
module.to(device_type);
module.eval();

根据自己的模型的输入参数对输入的图像进行处理（缩放，数值类型转换）

VideoCapture m(0);
namedWindow("t", WINDOW_AUTOSIZE);
Mat temp;
m >> temp;
at::Tensor inputtensor = imagpro(temp).to(device_type);

imagpro函数

// pre process
at::Tensor imagpro(Mat img) {

	//读取图片
	//auto image = cv::imread(imgpath);
	 auto image=img;
	//缩放至指定大小
	cv::resize(image, image, cv::Size(640, 640));
	cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
	//转成张量

	at::Tensor imgTensor = torch::from_blob(image.data, {1, image.rows, image.cols,3 }, torch::kByte);
	imgTensor = imgTensor.permute({0,3,1,2 });
	
	imgTensor = imgTensor.toType(torch::kFloat);
	imgTensor = imgTensor.div(255);
	/*imgTensor[0][0] = imgTensor[0][0].sub_(0.485).div_(0.229);
	imgTensor[0][1] = imgTensor[0][1].sub_(0.456).div_(0.224);
	imgTensor[0][2] = imgTensor[0][2].sub_(0.406).div_(0.225);*/

	return imgTensor;
}

对刚刚获取的图像数值进行推理，获取结果

torch::jit::Stack inputs;
inputs.push_back({ inputtensor });
torch::Tensor preds = module.forward(inputs).toTuple()->elements()[0].toTensor().to(device_type);

对结果进行处理

std::vector<torch::Tensor> dets = non_max_suppression2(preds,temp, 0.5, 0.55);

non_max_supperssion2函数，会有一个物体被多个框检测，暂时未正确实现nms，只将全部的框显示，所以会有许多框框到一个物体的情况


std::vector<torch::Tensor> non_max_suppression2(torch::Tensor preds,Mat img, float score_thresh = 0.25, float iou_thresh = 0.45)
{
	std::vector<torch::Tensor> output;
	
	int nc = preds.sizes()[1] - 5;
	at::Tensor xc = preds.index({"...", 4}) > score_thresh;
	xc=xc.to(device_type);
	//test
	//at::Tensor x = preds;
	at::Tensor x = preds.index({ xc }).to(device_type);
	cout << x << endl;
	cout << x.sizes();
	if (x.sizes()[0] == 0)
		return output;


	x.index_put_({"...",5 }, x.index({"...", 4}) * x.index({"...", 5}));
	cout << x.sizes();
	at::Tensor box = xywh2xyxy(x.index({ "...",Slice({None,4}) })).to(device_type);

	std::tuple<at::Tensor, at::Tensor> temp=x.index({ "...", Slice(5) }).max(1,true);
	at::Tensor con, j;
	con=std::get<0>(temp);
	j = std::get<1>(temp);;
	cout << con;
	cout << j;

	x = torch::cat({ box,con,j }, 1);
	cout << x;

	int len = x.sizes()[0];
	if (len == 0)
		return output;
	int count;
	/*if (nms(box, con, j, count, iou_thresh, 2)) {
		cout << box.sizes();
	}*/
	Mat image;
	image=img;
	cv::resize(image, image, cv::Size(640, 640));
	for (int i = 0; i < len; i++)
	{
		float left = box[i][0].item().toFloat();
		float top = box[i][1].item().toFloat();
		float right = box[i][2].item().toFloat();
		float bottom = box[i][3].item().toFloat();
		float score = con[i][0].item().toFloat();

		cv::rectangle(image, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);

		cv::putText(image,
			 "QRcode: " + cv::format("%.2f", score),
			cv::Point(left, top),
			cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
	}
	imshow("t", image);
	waitKey(1);
	return output;

}

完整代码：

#include <torch/script.h>
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include<opencv2/highgui.hpp>
#include <iostream>
#include <memory>
using namespace cv;
using namespace std;

using namespace torch::indexing;
torch::DeviceType device_type;


bool nms(const torch::Tensor& boxes, const torch::Tensor& scores, torch::Tensor& keep, int& count, float overlap, int top_k)
{
	count = 0;
	keep = torch::zeros({ scores.sizes() });
	cout << scores.sizes();
	if (0 == boxes.numel())
	{
		return false;
	}

	torch::Tensor x1 = boxes.select(1, 0).clone();
	torch::Tensor y1 = boxes.select(1, 1).clone();
	torch::Tensor x2 = boxes.select(1, 2).clone();
	torch::Tensor y2 = boxes.select(1, 3).clone();
	torch::Tensor area = (x2 - x1) * (y2 - y1);
	//    std::cout<<area<<std::endl;
	cout << x1;
	std::tuple<torch::Tensor, torch::Tensor> sort_ret = torch::sort(scores.unsqueeze(1), 0, 0);
	torch::Tensor v = std::get<0>(sort_ret).squeeze(1).to(scores.device());
	torch::Tensor idx = std::get<1>(sort_ret).squeeze(1).to(scores.device());

	int num_ = idx.size(0);
	if (num_ > top_k) //python:idx = idx[-top_k:]
	{
		idx = idx.slice(0, num_ - top_k, num_).clone();
	}
	torch::Tensor xx1, yy1, xx2, yy2, w, h;
	while (idx.numel() > 0)
	{
		auto i = idx[-1];
		keep[count] = i;
		count += 1;
		if (1 == idx.size(0))
		{
			break;
		}
		idx = idx.slice(0, 0, idx.size(0) - 1).clone();
		cout << idx;
		xx1 = x1.index_select(0, idx[0]);
		yy1 = y1.index_select(0, idx[0]);
		xx2 = x2.index_select(0, idx[0]);
		yy2 = y2.index_select(0, idx[0]);
		cout << xx1;
		xx1 = xx1.clamp(x1[i].item().toFloat(), INT_MAX * 1.0);
		yy1 = yy1.clamp(y1[i].item().toFloat(), INT_MAX * 1.0);
		xx2 = xx2.clamp(INT_MIN * 1.0, x2[i].item().toFloat());
		yy2 = yy2.clamp(INT_MIN * 1.0, y2[i].item().toFloat());

		w = xx2 - xx1;
		h = yy2 - yy1;

		w = w.clamp(0, INT_MAX);
		h = h.clamp(0, INT_MAX);

		torch::Tensor inter = w * h;
		torch::Tensor rem_areas = area.index_select(0, idx[0]);

		torch::Tensor union_ = (rem_areas - inter) + area[i];
		torch::Tensor Iou = inter * 1.0 / union_;
		torch::Tensor index_small = Iou < overlap;
		auto mask_idx = torch::nonzero(index_small).squeeze();
		idx = idx.index_select(0, mask_idx);//pthon: idx = idx[IoU.le(overlap)]
	}
	return true;
}

at::Tensor xywh2xyxy(at::Tensor x)
{
	x.index_put_({ "...",0 }, x.index({ "...", 0 }) - x.index({ "...", 2 }) / 2);
	x.index_put_({ "...",1 }, x.index({ "...", 1 }) - x.index({ "...", 3 }) / 2);
	x.index_put_({ "...",2 }, x.index({ "...", 0 }) + x.index({ "...", 2 }) );
	x.index_put_({ "...",3 }, x.index({ "...", 1 }) + x.index({ "...", 3 }) );
	/*x.index_put_({ "...",0 }, x.index({ "...", 0 }) - x.index({ "...", 2 }) / 2);
	x.index_put_({ "...",1 }, x.index({ "...", 1 }) - x.index({ "...", 3 }) / 2);
	x.index_put_({ "...",2 }, x.index({ "...", 0 }) + x.index({ "...", 2 }) / 2);
	x.index_put_({ "...",3 }, x.index({ "...", 1 }) + x.index({ "...", 3 }) / 2);*/
	return x;
}
// pre process
at::Tensor imagpro(Mat img) {

	//读取图片
	//auto image = cv::imread(imgpath);
	 auto image=img;
	//缩放至指定大小
	cv::resize(image, image, cv::Size(640, 640));
	cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
	//转成张量

	at::Tensor imgTensor = torch::from_blob(image.data, {1, image.rows, image.cols,3 }, torch::kByte);
	imgTensor = imgTensor.permute({0,3,1,2 });
	
	imgTensor = imgTensor.toType(torch::kFloat);
	imgTensor = imgTensor.div(255);
	/*imgTensor[0][0] = imgTensor[0][0].sub_(0.485).div_(0.229);
	imgTensor[0][1] = imgTensor[0][1].sub_(0.456).div_(0.224);
	imgTensor[0][2] = imgTensor[0][2].sub_(0.406).div_(0.225);*/

	return imgTensor;
}

at::Tensor getMax(at::Tensor preds,double conf_thres)
{
	at::Tensor res;
	int len = preds.sizes()[0];
	for (int i = 0; i < len; i++)
	{
		double t = preds.select(i, 4).item().toDouble();
		if ( t > conf_thres)
		{
			res.add(preds[i]);
		}
		cout << preds.select(i, 4)<<endl;
	}
	return res;
}


std::vector<torch::Tensor> non_max_suppression2(torch::Tensor preds,Mat img, float score_thresh = 0.25, float iou_thresh = 0.45)
{
	std::vector<torch::Tensor> output;
	
	int nc = preds.sizes()[1] - 5;
	at::Tensor xc = preds.index({"...", 4}) > score_thresh;
	xc=xc.to(device_type);
	//test
	//at::Tensor x = preds;
	at::Tensor x = preds.index({ xc }).to(device_type);
	cout << x << endl;
	cout << x.sizes();
	if (x.sizes()[0] == 0)
		return output;


	x.index_put_({"...",5 }, x.index({"...", 4}) * x.index({"...", 5}));
	cout << x.sizes();
	at::Tensor box = xywh2xyxy(x.index({ "...",Slice({None,4}) })).to(device_type);

	std::tuple<at::Tensor, at::Tensor> temp=x.index({ "...", Slice(5) }).max(1,true);
	at::Tensor con, j;
	con=std::get<0>(temp);
	j = std::get<1>(temp);;
	cout << con;
	cout << j;

	x = torch::cat({ box,con,j }, 1);
	cout << x;

	int len = x.sizes()[0];
	if (len == 0)
		return output;
	int count;
	/*if (nms(box, con, j, count, iou_thresh, 2)) {
		cout << box.sizes();
	}*/
	Mat image;
	image=img;
	cv::resize(image, image, cv::Size(640, 640));
	for (int i = 0; i < len; i++)
	{
		float left = box[i][0].item().toFloat();
		float top = box[i][1].item().toFloat();
		float right = box[i][2].item().toFloat();
		float bottom = box[i][3].item().toFloat();
		float score = con[i][0].item().toFloat();

		cv::rectangle(image, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);

		cv::putText(image,
			 "QRcode: " + cv::format("%.2f", score),
			cv::Point(left, top),
			cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
	}
	imshow("t", image);
	waitKey(1);
	return output;

}

int main()
{
	cout << "cuda是否可用：" << torch::cuda::is_available() << endl;
	cout << "cudnn是否可用：" << torch::cuda::cudnn_is_available() << endl;
	torch::jit::script::Module module;
	try {
		module = torch::jit::load("C:\\Users\\34187\\Desktop\\test\\test\\httt.torchscript.pt");
		//模型转到GPU中去
		device_type = at::kCUDA;
		module.to(device_type);
		module.eval();
		//vector<Output> result;
		//Mat img1 = imread(img_path),img;
		VideoCapture m(0);
		namedWindow("t", WINDOW_AUTOSIZE);
		while (true)
		{
			Mat temp;
			m >> temp;
			at::Tensor inputtensor = imagpro(temp).to(device_type);
			
			// yolov5 模型识别
			cout << inputtensor.sizes();
			int i = 0;
			torch::jit::Stack inputs;
			inputs.push_back({ inputtensor });
			i = 0;
			torch::Tensor preds = module.forward(inputs).toTuple()->elements()[0].toTensor().to(device_type);
			imshow("t", temp);
			waitKey(1);
			// 后处理
			std::vector<torch::Tensor> dets = non_max_suppression2(preds,temp, 0.5, 0.55);
			
		}
	

	}
	catch (const c10::Error& e) {
		cout << endl;
		std::cout << e.msg()<<endl;
		std::cerr << "error loading the model "<<endl;
	}
	
	return 0;
}