# 主要作用是控制模块的导出接口,确保在 from <module> import * 时只导入指定的类
__all__ = ["RadarFeatureNet", "RadarEncoder", "RadarEncoderCalib", "RadarEncoderProj"]
class RFNLayer(nn.Module):
def __init__(self, in_channels, out_channels, norm_cfg=None, last_layer=False):
# 这个模块类似于 PointPillars 中的 PFNLayer,主要用于将点云特征转换为高效的柱状特征表示。
# 多个 RFNLayer 可以串联使用,但 PointPillars 论文中通常只使用单个 PFNLayer
"""
Pillar Feature Net Layer.
The Pillar Feature Net could be composed of a series of these layers, but the PointPillars paper results only
used a single PFNLayer. This layer performs a similar role as second.pytorch.voxelnet.VFELayer.
:param in_channels: <int>. Number of input channels.
:param out_channels: <int>. Number of output channels.
:param last_layer: <bool>. If last_layer, there is no concatenation of features.
"""
super().__init__()
self.name = "RFNLayer"
# 判断是否为最后一层,如果是最后一层就不进行特征拼接,只输出池化结果
self.last_vfe = last_layer
# 定义输出特征的维度(全连接层的输出神经元数量)
self.units = out_channels
# 如果未提供归一化层的配置,则使用默认的 BatchNorm1d 配置
if norm_cfg is None:
norm_cfg = dict(type="BN1d", eps=1e-3, momentum=0.01)
self.norm_cfg = norm_cfg # 保存归一化层的配置
# 定义一个线性层,将输入的 in_channels 特征映射到out_channels 维度空间中
self.linear = nn.Linear(in_channels, self.units, bias=False)
# 构建归一化层(如 BatchNorm1d),根据 norm_cfg 定义的配置
self.norm = build_norm_layer(self.norm_cfg, self.units)[1]
前向传播过程:将输入数据依次通过线性层、归一化层和 ReLU 激活层处理
def forward(self, inputs):
x = self.linear(inputs) # 输入: (batch_size, num_points, in_channels) -> 输出: (batch_size, num_points, units)
# 2. 暂时禁用 cuDNN 加速,避免一些不可预测的行为
torch.backends.cudnn.enabled = False
# 3. 对数据进行归一化处理。
# - 归一化操作需要将输入从 (batch_size, num_points, units) 转置为 (batch_size, units, num_points)
# - 使用 `.contiguous()` 确保张量在内存中是连续的,保证后续操作的正确性。
# x.permute(0, 2, 1).contiguous() 再次转置回原来的形状 (batch_size, num_points, units)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()
# 5. 重新启用 cuDNN 加速
torch.backends.cudnn.enabled = True
x = F.relu(x) # 6. 使用 ReLU 激活函数,对特征进行非线性处理
if self.last_vfe: # 7. 如果是最后一层,执行最大池化操作,返回每个柱状特征的最大值
# `torch.max` 在 dim=1 维度上(即 num_points 维度)取最大值,并保留该维度(keepdim=True)
x_max = torch.max(x, dim=1, keepdim=True)[0] # 最大池化: (batch_size, 1, units)
return x_max # 返回池化后的结果
else: # 如果不是最后一层,直接返回激活后的特征
return x
class RadarFeatureNet(nn.Module):
def __init__(
self,
in_channels=4,
feat_channels=(64,), # 中间网络层的输出通道数(可变)
with_distance=False, # 是否加入距离作为特征
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=None, # 归一化配置(如 BatchNorm)
):
"""
Pillar Feature Net.
The network prepares the pillar features and performs forward pass through PFNLayers. This net performs a
similar role to SECOND's second.pytorch.voxelnet.VoxelFeatureExtractor.
:param num_input_features: <int>. Number of input features, either x, y, z or x, y, z, r.
:param num_filters: (<int>: N). Number of features in each of the N PFNLayers.
:param with_distance: <bool>. Whether to include Euclidean distance to points.
:param voxel_size: (<float>: 3). Size of voxels, only utilize x and y size.
:param pc_range: (<float>: 6). Point cloud range, only utilize x and y min.
"""
super().__init__()
self.name = "RadarFeatureNet"
assert len(feat_channels) > 0 # 确保输出特征通道数量大于0
self.in_channels = in_channels
in_channels += 2 # 增加两个特征通道(计算出来的偏移量等特征)
# in_channels += 5
self._with_distance = with_distance # 是否使用距离信息作为特征
self.export_onnx = False # 设置为ONNX导出模式的标志
# Create PillarFeatureNet layers
feat_channels = [in_channels] + list(feat_channels) # 将输入通道和输出通道连接成列表
# print(feat_channels) [47, 256, 256, 256, 256]
rfn_layers = [] # 初始化一个空的 RFN 层列表
for i in range(len(feat_channels) - 1): # 遍历层数
in_filters = feat_channels[i] # 当前层的输入通道数
out_filters = feat_channels[i + 1] # 当前层的输出通道数
if i < len(feat_channels) - 2: # 是否为最后一层】
last_layer = False
else:
last_layer = True
rfn_layers.append(
RFNLayer(
in_filters, out_filters, norm_cfg=norm_cfg, last_layer=last_layer
) # 添加一个 RFNLayer 实例
)
self.rfn_layers = nn.ModuleList(rfn_layers) # 将所有 RFN 层存储在 ModuleList 中
# Need pillar (voxel) size and x/y offset in order to calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.pc_range = point_cloud_range
# features:输入的点云特征张量。num_voxels:每个体素的点数量。coors:体素的坐标
def forward(self, features, num_voxels, coors):
if not self.export_onnx: # 如果不是导出 ONNX
# 一个物体检测模型可以从云端训练后导出为 ONNX,然后部署在边缘设备(如监控摄像头)上运行
dtype = features.dtype # 获取输入特征的数据类型
# Find distance of x, y, and z from cluster center
points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(
features
).view(-1, 1, 1) # 计算每个体素中所有点的质心
f_cluster = features[:, :, :3] - points_mean # 计算每个点与质心的距离
f_center = torch.zeros_like(features[:, :, :2]) # 初始化 f_center 为全0张量
f_center[:, :, 0] = features[:, :, 0] - (
coors[:, 1].to(dtype).unsqueeze(1) * self.vx + self.x_offset
) # 计算 x 方向上的偏移
f_center[:, :, 1] = features[:, :, 1] - (
coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset
) # 计算 y 方向上的偏移
# print(self.pc_range) [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# normalize x,y,z to [0, 1] 正则化操作
features[:, :, 0:1] = (features[:, :, 0:1] - self.pc_range[0]) / (self.pc_range[3] - self.pc_range[0])
features[:, :, 1:2] = (features[:, :, 1:2] - self.pc_range[1]) / (self.pc_range[4] - self.pc_range[1])
features[:, :, 2:3] = (features[:, :, 2:3] - self.pc_range[2]) / (self.pc_range[5] - self.pc_range[2])
# Combine together feature decorations
features_ls = [features, f_center] # features_ls 是一个 Python 列表,包含了两个张量:features 和 f_center
features = torch.cat(features_ls, dim=-1)
# 在最后一维上进行拼接,如果 features 的形状是 (N, M, C),f_center 的形状是 (N, M, 2),则拼接后张量的形状会变为:(N, M, C + 2)
# The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1] # 每个体素的点数
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) # 获取掩码,标记有效点
mask = torch.unsqueeze(mask, -1).type_as(features) # 调整掩码的形状
features *= mask # 将掩码应用于特征,将无效点的特征置为0
features = torch.nan_to_num(features) # 将 NaN 替换为0
else: # 如果是导出 ONNX,则调用 feature_decorator
features = feature_decorator(features, num_voxels, coors, self.vx, self.vy, self.x_offset, self.y_offset, True, False, True)
# Forward pass through PFNLayers
for rfn in self.rfn_layers:
features = rfn(features) # 依次通过每一层 RFNLayer
return features.squeeze() # 去除不必要的维度并返回结果
class RadarEncoder(nn.Module):
def __init__(
self,
pts_voxel_encoder: Dict[str, Any], # 体素编码器的配置,字典类型
pts_middle_encoder: Dict[str, Any], # 中间特征编码器的配置,字典类型
pts_transformer_encoder=None, # 可选的 Transformer 编码器配置
pts_bev_encoder=None, # 可选的 BEV(鸟瞰视图)编码器配置
post_scatter=None, # 可选的 scatter 层配置(用于后处理)
**kwargs,
):
super().__init__()
# 根据传入的配置,构建不同的编码器模块
self.pts_voxel_encoder = build_backbone(pts_voxel_encoder)
self.pts_middle_encoder = build_backbone(pts_middle_encoder)
self.pts_transformer_encoder = build_backbone(pts_transformer_encoder) if pts_transformer_encoder is not None else None
self.pts_bev_encoder = build_backbone(pts_bev_encoder) if pts_bev_encoder is not None else None
self.post_scatter = build_backbone(post_scatter) if post_scatter is not None else None
def forward(self, feats, coords, batch_size, sizes, img_features=None):
# 使用体素编码器对输入特征进行编码
x = self.pts_voxel_encoder(feats, sizes, coords)
# 如果定义了 Transformer 编码器,则将编码后的特征输入 Transformer 编码器
if self.pts_transformer_encoder is not None:
x = self.pts_transformer_encoder(x, sizes, coords, batch_size)
# 使用中间编码器进一步处理特征
x = self.pts_middle_encoder(x, coords, batch_size)
# 如果定义了 scatter 层,则在 scatter 层处理后续特征
if self.post_scatter is not None:
x = self.post_scatter(x, img_features)
# 如果定义了 BEV 编码器,则在 BEV 编码器中处理特征
if self.pts_bev_encoder is not None:
x = self.pts_bev_encoder(x)
# 返回最终处理的特征
return x
# 可视化柱状体特征(voxel pillar)函数
def visualize_pillars(self, feats, coords, sizes):
nx, ny = 128, 128 # 定义画布的大小
canvas = torch.zeros( # 创建一个 128x128 的画布,并初始化为 0
nx*ny, dtype=sizes.dtype, device=sizes.device
)
indices = coords[:, 1] * ny + coords[:, 2] # 计算每个点的索引(根据体素的 x 和 y 坐标)
indices = indices.type(torch.long) # 将索引转换为长整型(Long Tensor)
canvas[indices] = sizes # 在对应的索引位置填入大小数据
torch.save(canvas, 'sample_canvas') # 将画布保存为 sample_canvas 文件