Mesh R-CNN 简介与使用指南

最新推荐文章于 2025-11-03 09:24:22 发布

原创最新推荐文章于 2025-11-03 09:24:22 发布 · 228 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#cnn #人工智能 #神经网络

部署运行你感兴趣的模型镜像

Mesh R-CNN 是一种基于 Mask R-CNN 的 3D 深度学习模型，可以直接从单张 2D 图像中检测物体并重建其 3D 网格模型。

一、安装

pip install torch torchvision matplotlib
pip install 'git+https://github.com/facebookresearch/pytorch3d.git'

二、加载预训练模型

PyTorch3D 提供了预训练的 Mesh R-CNN 模型，可以直接加载使用：

import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# 从PyTorch3D导入Mesh R-CNN模型
from pytorch3d.models import mesh_rcnn
from pytorch3d.vis.plotly_vis import AxisArgs, plot_batch_individually, plot_scene
from pytorch3d.vis.texture_vis import texturesuv_image_matplotlib
from pytorch3d.renderer import (
    look_at_view_transform,
    FoVPerspectiveCameras,
    PointLights,
    RasterizationSettings,
    MeshRenderer,
    MeshRasterizer,
    SoftPhongShader,
    TexturesUV
)

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载预训练的Mesh R-CNN模型
model = mesh_rcnn("R_50_FPN_1x", pretrained=True)
model = model.to(device)
model.eval()  # 设置为评估模式

三、准备输入图像

准备一张包含目标物体的图像，并进行预处理：

# 加载并预处理图像
def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    
    # 图像预处理
    transform = transforms.Compose([
        transforms.Resize((800, 800)),  # 模型期望的输入尺寸
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
    
    return transform(image).unsqueeze(0).to(device)

# 示例：加载测试图像
image = preprocess_image("path/to/your/image.jpg")

四、运行模型进行推理

使用加载的模型对图像进行推理：

# 模型推理
with torch.no_grad():
    predictions = model(image)

# 提取预测结果
prediction = predictions[0]  # 处理第一张图像（批次中的第一个）

# 提取检测到的物体类别、边界框和掩码
boxes = prediction["boxes"]
labels = prediction["labels"]
masks = prediction["masks"]
scores = prediction["scores"]

# 提取3D网格预测
meshes = prediction["meshes"]

# 只保留置信度高的预测
threshold = 0.7
keep = scores > threshold
boxes = boxes[keep]
labels = labels[keep]
masks = masks[keep]
meshes = meshes[keep] if meshes is not None else None

print(f"检测到 {len(boxes)} 个物体")

五、可视化结果

可视化检测结果和生成的 3D 网格：

# 可视化2D检测结果
def visualize_2d_results(image, boxes, masks, labels):
    image_np = image.cpu().squeeze().permute(1, 2, 0).numpy()
    image_np = (image_np * np.array([0.229, 0.224, 0.225])) + np.array([0.485, 0.456, 0.406])
    image_np = (image_np * 255).astype(np.uint8)
    
    fig, ax = plt.figure(figsize=(10, 10)), plt.gca()
    ax.imshow(image_np)
    
    # 绘制边界框和掩码
    for box, mask, label in zip(boxes, masks, labels):
        x1, y1, x2, y2 = box.cpu().numpy().astype(int)
        ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, color='r', linewidth=2))
        ax.text(x1, y1, f"{label.item()}", color='white', bbox=dict(facecolor='r', alpha=0.5))
        
        # 绘制掩码
        mask_np = mask.cpu().squeeze().numpy() > 0.5
        ax.imshow(np.ma.masked_array(mask_np, ~mask_np), alpha=0.3, cmap='jet')
    
    plt.axis('off')
    plt.show()

# 可视化3D网格
def visualize_3d_meshes(meshes):
    # 设置相机和渲染器
    R, T = look_at_view_transform(2.7, 0, 180)
    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
    
    raster_settings = RasterizationSettings(
        image_size=512,
        blur_radius=0.0,
        faces_per_pixel=1,
    )
    
    lights = PointLights(device=device, location=[[0.0, 0.0, 3.0]])
    
    renderer = MeshRenderer(
        rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings),
        shader=SoftPhongShader(cameras=cameras, lights=lights, device=device)
    )
    
    # 渲染每个网格
    for i, mesh in enumerate(meshes):
        # 如果网格没有纹理，添加一个简单的纹理
        if mesh.textures is None:
            verts = mesh.verts_padded()
            faces = mesh.faces_padded()
            texture_image = torch.ones(1, 3, 256, 256, device=device) * 0.5  # 灰色纹理
            textures = TexturesUV(
                maps=texture_image,
                faces_uvs=faces,
                verts_uvs=torch.zeros_like(verts)[:, :, :2] + 0.5
            )
            mesh = mesh.update_padded(textures=textures)
        
        # 渲染并显示
        images = renderer(mesh)
        plt.figure(figsize=(10, 10))
        plt.imshow(images[0, ..., :3].cpu().numpy())
        plt.title(f"Reconstructed Mesh {i+1}")
        plt.axis('off')
        plt.show()

# 执行可视化
if len(boxes) > 0:
    visualize_2d_results(image, boxes, masks, labels)
    if meshes is not None and len(meshes) > 0:
        visualize_3d_meshes(meshes)
else:
    print("未检测到任何物体")

六、模型微调（可选）

如果需要针对特定任务微调 Mesh R-CNN，可以参考以下步骤：

# 冻结部分网络层
for param in model.backbone.parameters():
    param.requires_grad = False

# 定义优化器，只优化需要训练的参数
optimizer = torch.optim.Adam(
    [p for p in model.parameters() if p.requires_grad],
    lr=0.001
)

# 准备自定义数据集（这里需要替换为实际的数据集）
from torch.utils.data import DataLoader
from your_custom_dataset import YourCustomDataset  # 自定义数据集类

dataset = YourCustomDataset("path/to/your/data")
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# 微调训练循环
num_epochs = 10
for epoch in range(num_epochs):
    for images, targets in dataloader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # 前向传播
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # 反向传播和优化
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {losses.item():.4f}")

# 保存微调后的模型
torch.save(model.state_dict(), "fine_tuned_mesh_rcnn.pth")