下面是在libtorch中对yolov11分割模型进行推理的程序,运行完整正确,现在改为推理时对多张图片批处理的, 推理函数接口:std::vector<std::vector<YMask>> inferyolo(std::vector<cv::Mat>& v_imgs) 。给出完整的代码
#include <iostream>
#include <string>
#include <algorithm>
#include <filesystem>
#include <vector>
#include <fstream>
#include <sstream>
#include <random>
#include <map>
#include <memory>
#include <chrono>
#include <format>
#include <opencv2/opencv.hpp>
#include <torch/torch.h>
#include <torch/script.h>
// 配置参数
struct YoloConfig {
bool cuda_enabled{ false };
int input_size[2]{ 640, 640 }; // {height,width}
float confidence_threshold{ 0.45 };
float iou_threshold{ 0.45 };
float mask_threshold{ 0.50 };
std::string torchscript_file{ "best.torchscript_101001" };
std::string classes_file{ "labels.txt" };
};
// LetterBox预处理信息
struct LetterBoxInfo {
float scale; // 缩放因子(原图到letterbox图)
int pad_w; // 宽度方向填充(左)
int pad_h; // 高度方向填充(上)
};
// 检测框信息
struct DetectionBox {
cv::Rect rect; // 检测框位置
int class_id; // 类别ID
std::string class_name; // 类别名称
float confidence; // 置信度
};
// 分割结果数据结构
struct YMask {
DetectionBox box; // 检测框信息
cv::Mat mask; // 分割掩码(二值图像)
cv::Mat segmented_region; // 分割出的原图区域(可选)
};
// YOLO推理器类
class YOLOSegmentor {
private:
YoloConfig config;
torch::jit::script::Module model;
torch::Device device;
std::vector<std::string> classes;
bool initialized{ false };
// 生成随机颜色
std::vector<cv::Scalar> colors;
public:
YOLOSegmentor() : device(torch::kCPU) {}
YOLOSegmentor(const YoloConfig& cfg) : config(cfg), device(torch::kCPU) {
if (config.cuda_enabled && torch::cuda::is_available()) {
device = torch::kCUDA;
}
}
// 初始化函数
bool inityolo(const YoloConfig& cfg) {
config = cfg;
if (config.cuda_enabled && torch::cuda::is_available()) {
device = torch::kCUDA;
std::cout << "Using CUDA device" << std::endl;
}
else {
device = torch::kCPU;
std::cout << "Using CPU device" << std::endl;
}
// 加载类别文件
classes = parse_classes_file(config.classes_file.c_str());
if (classes.empty()) {
std::cerr << "Error: Failed to parse classes file: " << config.classes_file << std::endl;
return false;
}
// 生成随机颜色
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
for (size_t i = 0; i < classes.size(); ++i) {
colors.emplace_back(cv::Scalar(dis(gen), dis(gen), dis(gen)));
}
// 加载模型
try {
if (device.is_cuda()) {
model = torch::jit::load(config.torchscript_file, torch::kCUDA);
}
else {
model = torch::jit::load(config.torchscript_file, torch::kCPU);
}
model.eval();
initialized = true;
std::cout << "YOLO model loaded successfully" << std::endl;
return true;
}
catch (const c10::Error& e) {
std::cerr << "Error loading model: " << e.msg() << std::endl;
return false;
}
}
// 推理函数
std::vector<YMask> inferyolo(cv::Mat& img) {
if (!initialized) {
std::cerr << "Error: YOLO model not initialized. Call inityolo() first." << std::endl;
return {};
}
auto tstart = std::chrono::high_resolution_clock::now();
// LetterBox预处理
LetterBoxInfo lb_info;
cv::Mat input_img = letterbox(img, config.input_size[0], config.input_size[1], lb_info);
cv::cvtColor(input_img, input_img, cv::COLOR_BGR2RGB);
// 构建输入张量
torch::Tensor tensor = torch::from_blob(input_img.data, { input_img.rows, input_img.cols, 3 }, torch::kByte).to(device);
tensor = tensor.toType(torch::kFloat32).div(255.0f); // 归一化到[0,1]
tensor = tensor.permute({ 2, 0, 1 }); // HWC->CHW
tensor = tensor.unsqueeze(0); // 加batch维度
tensor = tensor.contiguous(); // 确保内存连续
std::vector<torch::jit::IValue> inputs{ tensor };
// 推理
auto output = model.forward(inputs).toTuple()->elements();
auto output0 = output[0].toTensor().transpose(1, 2).contiguous().to(torch::kCPU);
auto output1 = output[1].toTensor().to(torch::kCPU);
if (output0.dim() != 3 || output1.dim() != 4) {
std::cerr << "Error: unmatch dimensions: " << output0.dim() << "," << output1.dim() << std::endl;
return {};
}
cv::Mat data0 = cv::Mat(output0.size(1), output0.size(2), CV_32FC1, output0.data_ptr<float>());
std::vector<int> sizes;
for (int i = 0; i < 4; ++i)
sizes.emplace_back(output1.size(i));
cv::Mat data1 = cv::Mat(sizes, CV_32F, output1.data_ptr<float>());
auto tend = std::chrono::high_resolution_clock::now();
std::cout << "Inference elapsed milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tend - tstart).count()
<< " ms" << std::endl;
// 后处理并返回YMasks
return post_process_mask(data0, data1, sizes, img, lb_info);
}
// 可视化函数(可选,用于调试和测试)
cv::Mat visualize_results(const cv::Mat& frame, const std::vector<YMask>& results, bool draw_masks = true) {
cv::Mat display_frame = frame.clone();
cv::Mat mask_overlay = frame.clone();
for (const auto& result : results) {
const auto& box = result.box;
// 绘制检测框
cv::rectangle(display_frame, box.rect, colors[box.class_id], 2);
// 绘制类别标签
std::string label = box.class_name + " " + std::to_string(box.confidence).substr(0, 4);
cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_DUPLEX, 0.7, 2, 0);
cv::Rect text_box(box.rect.x, box.rect.y - text_size.height - 10,
text_size.width + 10, text_size.height + 10);
cv::rectangle(display_frame, text_box, colors[box.class_id], cv::FILLED);
cv::putText(display_frame, label,
cv::Point(box.rect.x + 5, box.rect.y - 5),
cv::FONT_HERSHEY_DUPLEX, 0.7, cv::Scalar(0, 0, 0), 2, 0);
// 绘制分割掩码
if (draw_masks && !result.mask.empty()) {
mask_overlay(box.rect).setTo(colors[box.class_id], result.mask);
}
}
if (draw_masks) {
cv::addWeighted(display_frame, 0.5, mask_overlay, 0.5, 0, display_frame);
}
return display_frame;
}
bool is_initialized() const { return initialized; }
const std::vector<std::string>& get_classes() const { return classes; }
private:
// LetterBox预处理
cv::Mat letterbox(const cv::Mat& img, int target_h, int target_w, LetterBoxInfo& info) {
float scale_w = (float)target_w / img.cols;
float scale_h = (float)target_h / img.rows;
info.scale = std::min(scale_w, scale_h);
int new_w = (int)round(img.cols * info.scale);
int new_h = (int)round(img.rows * info.scale);
info.pad_w = (target_w - new_w) / 2;
info.pad_h = (target_h - new_h) / 2;
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_LINEAR);
cv::Mat letterbox_img(target_h, target_w, CV_8UC3, cv::Scalar(114, 114, 114));
resized_img.copyTo(letterbox_img(cv::Rect(info.pad_w, info.pad_h, new_w, new_h)));
return letterbox_img;
}
// 解析类别文件
std::vector<std::string> parse_classes_file(const char* name) {
std::vector<std::string> classes;
std::ifstream file(name);
if (!file.is_open()) {
std::cerr << "Error: fail to open classes file: " << name << std::endl;
return classes;
}
std::string line;
while (std::getline(file, line)) {
auto pos = line.find_first_of(" ");
classes.emplace_back(line.substr(0, pos));
}
file.close();
return classes;
}
// 生成分割掩码
void get_masks(const cv::Mat& features, const cv::Mat& proto, const std::vector<int>& output1_sizes,
const cv::Mat& frame, const cv::Rect box, const LetterBoxInfo& lb_info, cv::Mat& mk) {
const cv::Size shape_src(frame.cols, frame.rows), shape_input(config.input_size[1], config.input_size[0]);
const cv::Size shape_mask(output1_sizes[3], output1_sizes[2]);
cv::Mat res0 = features * proto;
cv::Mat res = res0.t();
res = res.reshape(1, { shape_mask.height, shape_mask.width });
// Sigmoid激活
cv::exp(-res, res);
res = 1.0 / (1.0 + res);
// mask缩放
cv::resize(res, res, shape_input);
cv::Mat mask_cropped = res(cv::Rect(lb_info.pad_w, lb_info.pad_h,
(int)(shape_src.width * lb_info.scale),
(int)(shape_src.height * lb_info.scale)));
cv::resize(mask_cropped, res, shape_src);
mk = res(box) > config.mask_threshold;
}
// 后处理
std::vector<YMask> post_process_mask(const cv::Mat& output0, const cv::Mat& output1,
const std::vector<int>& output1_sizes, cv::Mat& frame,
const LetterBoxInfo& lb_info) {
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<std::vector<float>> masks;
const float* data = (float*)output0.data;
for (auto i = 0; i < output0.rows; ++i) {
cv::Mat scores(1, classes.size(), CV_32FC1, (float*)data + 4);
cv::Point class_id;
double max_class_score;
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > config.confidence_threshold) {
confidences.emplace_back(max_class_score);
class_ids.emplace_back(class_id.x);
masks.emplace_back(std::vector<float>(data + 4 + classes.size(), data + output0.cols));
// 坐标还原
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int center_x = (int)round((x - lb_info.pad_w) / lb_info.scale);
int center_y = (int)round((y - lb_info.pad_h) / lb_info.scale);
int box_w = (int)round(w / lb_info.scale);
int box_h = (int)round(h / lb_info.scale);
int left = std::max(0, center_x - box_w / 2);
int top = std::max(0, center_y - box_h / 2);
int right = std::min(frame.cols - 1, center_x + box_w / 2);
int bottom = std::min(frame.rows - 1, center_y + box_h / 2);
boxes.emplace_back(cv::Rect(left, top, right - left, bottom - top));
}
data += output0.cols;
}
// NMS
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, config.confidence_threshold, config.iou_threshold, nms_result);
cv::Mat proto = output1.reshape(0, { output1_sizes[1], output1_sizes[2] * output1_sizes[3] });
std::vector<YMask> results;
for (size_t i = 0; i < nms_result.size(); ++i) {
auto index = nms_result[i];
// 创建DetectionBox
DetectionBox det_box;
det_box.rect = boxes[index] & cv::Rect(0, 0, frame.cols, frame.rows);
det_box.class_id = class_ids[index];
det_box.class_name = classes[class_ids[index]];
det_box.confidence = confidences[index];
// 生成分割掩码
cv::Mat mask;
get_masks(cv::Mat(masks[index]).t(), proto, output1_sizes, frame, boxes[index], lb_info, mask);
// 创建YMask
YMask ymask;
ymask.box = det_box;
ymask.mask = mask;
// 可选:提取分割区域
if (!mask.empty()) {
cv::Mat segmented_region;
frame(det_box.rect).copyTo(segmented_region, mask);
ymask.segmented_region = segmented_region;
}
results.emplace_back(ymask);
}
return results;
}
};
// 测试函数
void test_yolo_segmentor() {
// 配置参数
YoloConfig config;
config.cuda_enabled = false;
config.torchscript_file = "best.torchscript_101001";
config.classes_file = "labels.txt";
config.confidence_threshold = 0.45;
config.iou_threshold = 0.45;
config.mask_threshold = 0.50;
// 创建YOLO分割器
YOLOSegmentor segmentor;
// 初始化
if (!segmentor.inityolo(config)) {
std::cerr << "Failed to initialize YOLO segmentor" << std::endl;
return;
}
// 测试单张图片
std::string test_image_path = "w1.jpg"; // 替换为您的测试图片路径
cv::Mat image = cv::imread(test_image_path);
if (image.empty()) {
std::cerr << "Failed to load test image: " << test_image_path << std::endl;
return;
}
std::cout << "Processing image: " << test_image_path << std::endl;
std::cout << "Image size: " << image.cols << "x" << image.rows << std::endl;
// 推理
auto results = segmentor.inferyolo(image);
std::cout << "Detected " << results.size() << " objects" << std::endl;
// 显示结果
for (size_t i = 0; i < results.size(); ++i) {
const auto& result = results[i];
std::cout << "Object " << i + 1 << ": "
<< result.box.class_name << " (confidence: " << result.box.confidence
<< ", box: " << result.box.rect << ")" << std::endl;
}
// 可视化结果
cv::Mat result_image = segmentor.visualize_results(image, results, true);
// 保存结果
cv::imwrite("result.jpg", result_image);
std::cout << "Result saved to result.jpg" << std::endl;
// 显示结果(可选)
cv::imshow("YOLO Segmentation Results", result_image);
cv::waitKey(0);
// 测试批量处理(可选)
std::string images_dir = "imgs";
if (std::filesystem::exists(images_dir)) {
std::cout << "Processing images in directory: " << images_dir << std::endl;
for (const auto& entry : std::filesystem::directory_iterator(images_dir)) {
if (entry.is_regular_file()) {
std::string image_path = entry.path().string();
cv::Mat img = cv::imread(image_path);
if (!img.empty()) {
auto batch_results = segmentor.inferyolo(img);
cv::Mat batch_result_img = segmentor.visualize_results(img, batch_results, true);
std::string output_path = "result_" + entry.path().filename().string();
cv::imwrite(output_path, batch_result_img);
std::cout << "Processed: " << image_path << " -> " << output_path
<< " (detections: " << batch_results.size() << ")" << std::endl;
}
}
}
}
}
int main() {
test_yolo_segmentor();
return 0;
}