使用OpenVinoSharp+MiDaS进行单目深度估计

最新推荐文章于 2025-07-29 15:27:53 发布

hixiong123

最新推荐文章于 2025-07-29 15:27:53 发布

阅读量823

点赞数 27

CC 4.0 BY-SA版权

分类专栏： AI OPENVINO 文章标签： c# opencv 计算机视觉人工智能

本文链接：https://blog.youkuaiyun.com/hixiong123/article/details/149207251

AI 同时被 2 个专栏收录

2 篇文章

订阅专栏

OPENVINO

2 篇文章

订阅专栏

MiDaS（Monocular Depth Estimation in the Wild）是一个用于单目深度估计的深度学习模型系列，由英特尔实验室等机构开发。它能够从单个RGB图像预测每个像素的深度信息（即场景中物体到相机的距离），而无需依赖双目视觉或其他传感器。MiDaS的核心优势在于其强大的泛化能力，能够在多种不同的场景下工作，包括室内、室外、自然和人造环境等。

MiDaS 的关键特点**1. **单目输入**：仅需一个普通摄像头（单目），无需立体视觉或深度传感器。2. **相对深度估计**：输出是深度相对值（深度图的尺度不确定），但可通过后处理转换为绝对距离（需已知场景中的至少一个参考距离）。3. **多数据集训练**：使用多个不同的深度数据集（如KITTI、NYU Depth V2等）联合训练，增强了泛化能力。4. **模型变体**：提供不同大小的模型（如MiDaS v2.1 small, medium, large），平衡精度和推理速度。

MiDaS 的工作原理**- **网络结构**：基于卷积神经网络（CNN）或Transformer（如DPT，Dense Prediction Transformer）。DPT是MiDaS v3的核心，它使用Vision Transformer（ViT）作为骨干网络，结合多尺度特征融合，生成高分辨率深度图。- **损失函数**：训练时采用尺度不变损失（scale-invariant loss），使模型能处理不同数据集中深度尺度的差异。- **输出**：模型输出是每个像素的深度值（浮点数），值越小表示越近，值越大表示越远（具体数值无绝对物理意义）。

下面使用 midas_v21_small_256.onnx模型进行推理，模型链接：

Release com.doji.midas@1.0.0 · julienkay/com.doji.midas · GitHub

效果如下：

using OpenCvSharp;
using OpenCvSharp.Extensions;
using OpenVinoSharp;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Size = OpenCvSharp.Size;

namespace opencvsharp_net4._8
{
public partial class MiDaSOpenvino : Form
{
Core core;
Model model;
CompiledModel compiledModel;
InferRequest inferRequest;

public MiDaSOpenvino()
{
InitializeComponent();
}

private void MiDaSOpenvino_Load(object sender, EventArgs e)
{
// 初始化 OpenVINO
core = new Core();

// 加载模型
model = core.read_model("midas_v21_small_256.onnx");//dpt_swin2_tiny_256.onnx
compiledModel = core.compile_model(model, "GPU.0");
inferRequest = compiledModel.create_infer_request();
}

private void button1_Click(object sender, EventArgs e)
{
Stopwatch stopwatch = Stopwatch.StartNew();

// 读取图像
Mat image = Cv2.ImRead("D:\\FastestDet-main\\data\\4.jpg");

// 1. 调整尺寸
Mat resized = new Mat();
Cv2.Resize(image, resized, new Size(256, 256));

// 2. 转换为float并缩放至[0,1]
Mat normalized = new Mat();
resized.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255.0);

// 3. 转换为RGB
Cv2.CvtColor(normalized, normalized, ColorConversionCodes.BGR2RGB);

// 4. 减去均值（使用[0,1]范围内的归一化均值）
Scalar mean = new Scalar(
123.675f / 255f,
116.28f / 255f,
103.53f / 255f);
Cv2.Subtract(normalized, mean, normalized);

// 5. 除以标准差（使用[0,1]范围内的归一化标准差）
Scalar std = new Scalar(
58.395f / 255f,
57.12f / 255f,
57.375f / 255f);
Cv2.Divide(normalized, std, normalized);

// 6. 转换为NCHW格式
float[] inputData = new float[3 * 256 * 256];
int index = 0;
for (int c = 0; c < 3; c++)
{
for (int y = 0; y < 256; y++)
{
for (int x = 0; x < 256; x++)
{
inputData[index++] = normalized.At<Vec3f>(y, x)[c];
}
}
}

// 7. 创建输入张量
Tensor inputTensor = new Tensor(

inferRequest.get_input_tensor().get_shape(), // NCHW
inputData
);

// 8. 设置输入并推理
inferRequest.set_input_tensor(0, inputTensor);
inferRequest.infer();

stopwatch.Stop();

// 9. 获取输出
Tensor outputTensor = inferRequest.get_output_tensor(0);
float[] outputData = outputTensor.get_data<float>((int)outputTensor.get_size());

// 10. 创建深度图 (输出形状: [1, 1, 256, 256])
Mat depthMap = new Mat(256, 256, MatType.CV_32FC1);
for (int y = 0; y < 256; y++)
{
for (int x = 0; x < 256; x++)
{
depthMap.Set(y, x, outputData[y * 256 + x]);
}
}

// 11. 后处理
Cv2.Normalize(depthMap, depthMap, 0, 1, NormTypes.MinMax);
depthMap=1-depthMap;
Mat displayMap = new Mat();
depthMap.ConvertTo(displayMap, MatType.CV_8UC1, 255);

// 12. 显示结果
Cv2.Resize(displayMap, displayMap, image.Size());
Cv2.ApplyColorMap(displayMap,displayMap,ColormapTypes.Jet);

Mat mat=new Mat();
Cv2.HConcat(image, displayMap, mat);

Cv2.PutText(mat,$"Elapsed Time{stopwatch.ElapsedMilliseconds}ms",new OpenCvSharp.Point(20,30),HersheyFonts.HersheySimplex,1,new Scalar(0,255,255),2);
pictureBox1.Image?.Dispose();
pictureBox1.Image=BitmapConverter.ToBitmap(mat);

// 释放资源
inputTensor.Dispose();
outputTensor.Dispose();
//inferRequest.Dispose();
//compiledModel.Dispose();
//model.Dispose();
//core.Dispose();
}
}
}