在Unity中运行Yolo推理

原创于 2025-11-01 22:55:14 发布 · 952 阅读

21 ·

CC 4.0 BY-SA版权

文章标签：

#unity #YOLO #游戏引擎 #计算机视觉 #视觉推理

部署运行你感兴趣的模型镜像

在Unity中运行Yolo推理

今天研究了一整天，在Unity中推理Yolo，路子终于已经搞通了。

关于YOLO

我是在WSL中研究YOLO的，WSL真是个好东西。

安装环境：

sudo apt update
sudo apt upgrade

# cuda
sudo apt install nvidia-cuda-toolkit

# miniconda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash ./Miniconda3-latest-Linux-x86_64.sh
source ~/miniconda3/bin/activate   # sudo reboot
conda init --all

# 创建环境
conda config --set ssl_verify false
conda create -n yolo
conda activate yolo

# 安装pytorch
pip install torch torchvision
# pip install torchaudio

# 安装Yolo
pip install ultralytics

# 安装Jupyter
conda install jupyter notebook
jupyter notebook

训练

from ultralytics import YOLO
model = YOLO('yolo11n.pt')
model.train(data='xxx.yaml', epochs=300, batch=16)
# data: 数据集yaml文件
# epochs: 训练的总轮数。每个epoch代表对整个数据集的一次完整遍历。调整此值会影响训练时长和模型性能。
# batch: 批次大小，具有三种模式：设置为整数（例如，batch=16），自动模式，GPU 内存利用率为 60%（batch=-1），或具有指定利用率分数的自动模式（batch=0.70）。

## 继续训练（接着上次的训练结果继续训练）
from ultralytics import YOLO
model = YOLO('path/to/last.pt')
model.train(resume=True)

其中，训练时需要用到Yaml配置文件，很简单，就几个路径而已，参考如下：

path: project_name	# dataset root dir 相对于datasets
train: train/images  # train images 相对于path
val: valid/images # val images 相对于path
test: test/images # test images 相对于path  测试集可以为空

names:
	0: class1
	1: class2
	2: class3
	3: class4

download: url # 下载地址，可省略

推理测试

from ultralytics import YOLO
yolo = YOLO(model='yolo11n.pt', task='detect')
result = yolo(source='a.jpg', save=True)

导出ONNX

from ultralytics import YOLO

# 加载训练好的模型
model = YOLO("path/to/best.pt")  # 您的训练权重

# 导出到 ONNX，支持矩形输入
model.export(
    format="onnx",             # 格式
    imgsz=(480, 640),          # 矩形大小，匹配 640x480（注意顺序：height, width）
#   dynamic=True,              # 启用动态形状
    half=True,                 # FP16 量化（可选）
#   simplify=True,             # 简化模型（推荐）
    opset=9                   # ONNX 版本（兼容 Unity 等）
)

imgsz，其他他推荐的是一个方形的尺寸，只有一个数，比如 imgsz=640这样，因为我摄像头是640*480的，所以弄成这个尺寸，但是这个不是很重要。
重要的是：opset=9，必须是9。我从13一路降到9进行测试，只有9，导入到unity之后，没有告警，9以上的版本，会给出“MaxPool: Unsupported attribute ceil_mode. Value will be ignored and defaulted to [0].”的警告。

Unity端，比较麻烦

一开始，查资料，看到了有个Barrcuda的库，开始研究，后来发现，从Unity2022左右，这个库升级了，改成了Sentis，我在Unity2022版本中，Package Manager中可以找到这个库，并可以顺利安装，但是，在Unity6中，死活就是找不到这个库。郁闷了很久，最后发现，它又改名字了，这是Grok给我的回答：

在 Unity 6 中，Sentis 库并未“消失”，而是被正式重命名为 Inference Engine（推理引擎）。这是 Unity 在 2025 年对 AI 工具栈的重大更新之一，旨在更好地整合和优化运行时 AI 功能。重命名从 Sentis 版本 2.2 开始生效（对应 Unity 6.2 Beta 及更高版本），原包名 com.unity.sentis 被替换为 com.unity.ai.inference。Inference Engine 本质上是 Sentis 的延续和升级版，保留了核心功能（如 ONNX 模型导入、跨平台推理、GPU/CPU 支持），但进行了优化，包括更好的模型兼容性、性能提升和与 Unity 6 运行时的自动集成。

安装好之后，测试也并不太顺利，因为它的API极不稳定，版本改来改去，文档也跟不太上，好多API都有过时的标记。比如：

[Obsolete("`Tensor<float> ToTensor(Texture texture, int width = -1, int height = -1, int channels = -1)` is deprecated, please use `void ToTensor(Texture texture, Tensor<float> tensor, TextureTransform transform)` instead.")]
[Obsolete("`TextureTransform SetDimensions(int width = -1, int height = -1, int channels = -1)` is deprecated, dimensions of the target tensor or texture are used.")]

好在，最后测通了：

using System;
using System.Collections.Generic;
using Unity.InferenceEngine;
using UnityEngine;

public class SentisTest : MonoBehaviour
{
    [SerializeField] private Texture2D inputTexture;
    [SerializeField] private ModelAsset _modelAsset;
    private Model _model;
    private Worker _worker;

    private const int inputWidth = 640;
    private const int inputHeight = 480;
    private void Start()
    {
        try
        {
            _model = ModelLoader.Load(_modelAsset);
            //_model = ModelLoader.Load(Path.Combine(Application.streamingAssetsPath, "my-yolo.onnx"));
            _worker = new Worker(_model, BackendType.GPUCompute);
            
            Debug.Log(_model);
            RunInference(inputTexture);
        }
        catch (Exception e)
        {
            Debug.LogError(e.Message);
        }
    }
    
    private void RunInference(Texture2D texture)
    {
        // 步骤1: 预处理输入
        // 按比例调整图像到 480x640，保持 aspect ratio 并添加 padding 如果需要
        Texture2D resized = ResizeTexture(texture, inputWidth, inputHeight);
        var inputTensor = new Tensor<float>(new TensorShape(1, 3, inputHeight, inputWidth));
        // TextureConverter.ToTensor(resized, inputHeight, inputWidth, 3);  // [1, 3, height, width]
        
        // 新：创建 TextureTransform 并设置维度（width, height, channels）
        var trans = new TextureTransform()
            .SetTensorLayout(TensorLayout.NCHW);       // 可选：明确设置布局（默认 NCHW）

        // 新：使用 ToTensor 填充 tensor
        TextureConverter.ToTensor(resized, inputTensor, trans);

        // 步骤2: 执行推理
        _worker.Schedule(inputTensor);

        // 步骤3: 获取输出
        // YOLOv8 输出通常为 "output0" 或类似（用 Netron 检查您的模型输出名称）
        if (_worker.PeekOutput("output0") is not Tensor<float> outputTensor)
        {
            inputTensor.Dispose();
            Debug.Log("Out put Error");
            return;
        }
        // 同步输出到 CPU 以读取
        //outputTensor.MakeReadable();

        // 步骤4: 后处理输出
        var results = PostProcess(outputTensor, inputHeight, inputWidth);

        // 显示结果（例如绘制边界框）
        DisplayResults(resized, results);
        //displayImage.texture = resized;  // 更新 UI

        // 清理
        inputTensor.Dispose();
        outputTensor.Dispose();
    }

    // 调整纹理大小（支持矩形，保持 aspect ratio）
    private static Texture2D ResizeTexture(Texture2D source, int newWidth, int newHeight)
    {
        // 计算缩放比例以保持 aspect ratio
        float scale = Mathf.Min((float)newWidth / source.width, (float)newHeight / source.height);
        int scaledWidth = Mathf.RoundToInt(source.width * scale);
        int scaledHeight = Mathf.RoundToInt(source.height * scale);

        // 创建临时 RenderTexture 并 Blit（居中放置，添加黑边如果不匹配）
        RenderTexture rt = RenderTexture.GetTemporary(newWidth, newHeight);
        RenderTexture.active = rt;
        Graphics.Blit(source, rt, new Vector2(scale, scale), new Vector2((newWidth - scaledWidth) / 2f, (newHeight - scaledHeight) / 2f));
        Texture2D result = new Texture2D(newWidth, newHeight);
        result.ReadPixels(new Rect(0, 0, newWidth, newHeight), 0, 0);
        result.Apply();
        RenderTexture.ReleaseTemporary(rt);
        RenderTexture.active = null;
        return result;
    }

    // 后处理函数（YOLO 输出解析示例，假设 4 个边界框值 + 1 置信度 + 类数）
    private static List<DetectionResult> PostProcess(Tensor<float> output, int ih, int iw)
    {
        float[] data = output.DownloadToArray();  // 获取浮点数据
        List<DetectionResult> detections = new List<DetectionResult>();

        int numDetections = output.shape[2];  // 如 8400
        int numClasses = output.shape[1] - 5; // 假设前 5 是 x,y,w,h,conf

        for (int i = 0; i < numDetections; i++)
        {
            float conf = data[i * output.shape[1] + 4];  // 置信度
            if (conf > 0.5f)  // 阈值过滤
            {
                float cx = data[i * output.shape[1] + 0] / iw;  // 归一化
                float cy = data[i * output.shape[1] + 1] / ih;
                float w = data[i * output.shape[1] + 2] / iw;
                float h = data[i * output.shape[1] + 3] / ih;

                // 转换为边界框 (x1,y1,x2,y2)
                float x1 = cx - w / 2;
                float y1 = cy - h / 2;
                float x2 = cx + w / 2;
                float y2 = cy + h / 2;

                // 找到最高类分数
                int bestClass = -1;
                float bestScore = 0;
                for (int c = 0; c < numClasses; c++)
                {
                    float score = data[i * output.shape[1] + 5 + c] * conf;
                    if (score > bestScore)
                    {
                        bestScore = score;
                        bestClass = c;
                    }
                }

                if (bestScore > 0.5f)
                {
                    detections.Add(new DetectionResult { box = new Rect(x1 * iw, y1 * ih, w * iw, h * ih), classId = bestClass, score = bestScore });
                }
            }
        }

        // 应用 NMS 去除重叠框
        detections = ApplyNMS(detections, 0.45f);  // IoU 阈值 0.45

        return detections;
    }

    // 简单 NMS 实现（可优化）
    private static List<DetectionResult> ApplyNMS(List<DetectionResult> detections, float iouThreshold)
    {
        // 按分数排序
        detections.Sort((a, b) => b.score.CompareTo(a.score));
        List<DetectionResult> final = new List<DetectionResult>();

        while (detections.Count > 0)
        {
            final.Add(detections[0]);
            detections.RemoveAt(0);

            for (int i = detections.Count - 1; i >= 0; i--)
            {
                if (CalculateIoU(final[^1].box, detections[i].box) > iouThreshold)
                {
                    detections.RemoveAt(i);
                }
            }
        }
        return final;
    }

    private static float CalculateIoU(Rect a, Rect b)
    {
        float interX = Mathf.Max(0, Mathf.Min(a.xMax, b.xMax) - Mathf.Max(a.xMin, b.xMin));
        float interY = Mathf.Max(0, Mathf.Min(a.yMax, b.yMax) - Mathf.Max(a.yMin, b.yMin));
        float interArea = interX * interY;
        float unionArea = a.width * a.height + b.width * b.height - interArea;
        return interArea / unionArea;
    }

    // 显示结果（绘制边界框）
    private static void DisplayResults(Texture2D texture, List<DetectionResult> results)
    {
        foreach (var res in results)
        {
            // 使用 Graphics.DrawTexture 或 LineRenderer 绘制框（简化示例）
            Debug.Log($"Detected class {res.classId} at {res.box} with score {res.score}");
            // 实际中：用 Texture2D.SetPixels 绘制红色框
        }
    }

    private struct DetectionResult
    {
        public Rect box;  // 基于输入大小的像素坐标
        public int classId;
        public float score;
    }
    
    private void OnDestroy()
    {
        _worker?.Dispose();
    }
}