Title: 三维点云深度网络 PointNeXt 源码阅读 (II) —— 点云数据集构造与预处理
关联博文
[1] 三维点云深度网络 PointNeXt 的安装配置与测试
[2] 三维点云深度网络 PointNeXt 源码阅读 (I) —— 注册机制与参数解析
[3] 三维点云深度网络 PointNeXt 源码阅读 (II) —— 点云数据集构造与预处理 ⇐ \qquad \Leftarrow ⇐ 本篇
[4] 三维点云深度网络 PointNeXt 源码阅读 (III) —— 骨干网络模型
[5] 三维点云深度网络 PointNeXt 源码阅读 (IV) —— PointNeXt-B
前言
接上一篇博文, 我们继续.
本篇博文我们主要关注 PointNeXt 点云数据集的构造、点云数据的预处理/数据增强.
首先感谢原作者开源 PointNeXt 供我们学习 !
I. 点云数据加载器
先看一下数据加载器 DataLoader 的构造的整体时序 (如下图).
对比上一篇博客文章中 PointNeXt 中网络模块类的注册与构造机制的时序图, 可以发现是类似的.
实际上原作者将网络模型、数据集, 数据处理, 甚至损失函数都统一地基于 Registry 类的注册机制来实现.
为避免雷同或重复, 此处不对数据加载器 DataLoader 部分进行详细展开, 只在后面章节展开数据预处理/数据增强部分.

II. 点云数据集类
在建立数据加载器 DataLoader 的同时构建了 S3DIS 数据集.
S3DIS(**obj_cfg)
数据集的配置参数,
obj_cfg = {
'data_root': 'data/S3DIS/s3disfull',
'test_area': 5,
'voxel_size': 0.04,
'split': 'train',
'voxel_max': 24000,
'loop': 30,
'presample': False,
'transform': <openpoints.transforms.transforms_factory.Compose object at 0x7fc15810f390>
}
对 openpoints/dataset/s3dis/s3dis.py
中 S3DIS 数据集类的注释如下.
@DATASETS.register_module()
class S3DIS(Dataset):
classes = ['ceiling',
'floor',
'wall',
'beam',
'column',
'window',
'door',
'chair',
'table',
'bookcase',
'sofa',
'board',
'clutter']
num_classes = 13
num_per_class = np.array([3370714, 2856755, 4919229, 318158, 375640, 478001, 974733,
650464, 791496, 88727, 1284130, 229758, 2272837], dtype=np.int32)
class2color = {'ceiling': [0, 255, 0],
'floor': [0, 0, 255],
'wall': [0, 255, 255],
'beam': [255, 255, 0],
'column': [255, 0, 255],
'window': [100, 100, 255],
'door': [200, 200, 100],
'table': [170, 120, 200],
'chair': [255, 0, 0],
'sofa': [200, 100, 100],
'bookcase': [10, 200, 100],
'board': [200, 200, 200],
'clutter': [50, 50, 50]}
cmap = [*class2color.values()]
gravity_dim = 2
"""S3DIS dataset, loading the subsampled entire room as input without block/sphere subsampling.
number of points per room in average, median, and std: (794855.5, 1005913.0147058824, 939501.4733064277)
Args:
data_root (str, optional): Defaults to 'data/S3DIS/s3disfull'.
test_area (int, optional): Defaults to 5.
voxel_size (float, optional): the voxel size for donwampling. Defaults to 0.04.
voxel_max (_type_, optional): subsample the max number of point per point cloud. Set None to use all points. Defaults to None.
split (str, optional): Defaults to 'train'.
transform (_type_, optional): Defaults to None.
loop (int, optional): split loops for each epoch. Defaults to 1.
presample (bool, optional): wheter to downsample each point cloud before training. Set to False to downsample on-the-fly. Defaults to False.
variable (bool, optional): where to use the original number of points. The number of point per point cloud is variable. Defaults to False.
"""
def __init__(self,
data_root: str = 'data/S3DIS/s3disfull',
test_area: int = 5,
voxel_size: float = 0.04,
voxel_max=None,
split: str = 'train',
transform=None,
loop: int = 1,
presample: bool = False,
variable: bool = False,
shuffle: bool = True,
):
super().__init__()
self.split, self.voxel_size, self.transform, self.voxel_max, self.loop = \
split, voxel_size, transform, voxel_max, loop
# ('train', 0.04, <openpoints.transforms.transforms_factory.Compose object at 0x7fa956c3f5d0>, 24000, 30)
self.presample = presample
self.variable = variable # False
self.shuffle = shuffle # True
raw_root = os.path.join(data_root, 'raw') # 'data/S3DIS/s3disfull/raw'
self.raw_root = raw_root
data_list = sorted(os.listdir(raw_root)) # raw_root 下面的所有文件
data_list = [item[:-4] for item in data_list if 'Area_' in item] # 移除了 ".npy" 这四个字符
if split == 'train':
self.data_list = [
item for item in data_list if not 'Area_{}'.format(test_area) in item] # test_area = 5
else:
self.data_list = [
item for item in data_list if 'Area_{}'.format(test_area) in item]
processed_root = os.path.join(data_root, 'processed')
filename = os.path.join(
processed_root, f's3dis_{split}_area{test_area}_{voxel_size:.3f}_{str(voxel_max)}.pkl')
if presample and not os.path.exists(filename): # presample 在 train 阶段设为 False
np.random.seed(0)
self.data = []
for item in tqdm(self.data_list, desc=f'Loading S3DISFull {split} split on Test Area {test_area}'):
data_path = os.path.join(raw_root, item + '.npy')
cdata = np.load(data_path).astype(np.float32)
cdata[:, :3] -= np.min(cdata[:, :3], 0)
if voxel_size:
coord, feat, label = cdata[:,0:3], cdata[:, 3:6], cdata[:, 6:7]
uniq_idx = voxelize(coord, voxel_size)
coord, feat, label = coord[uniq_idx], feat[uniq_idx], label[uniq_idx]
cdata = np.hstack((coord, feat, label))
self.data.append(cdata)
npoints = np.array([len(data) for data in self.data])
logging.info('split: %s, median npoints %.1f, avg num points %.1f, std %.1f' % (
self.split, np.median(npoints), np.average(npoints), np.std(npoints)))
os.makedirs(processed_root, exist_ok=True)
with open(filename, 'wb') as f:
pickle.dump(self.data, f)
print(f"{filename} saved successfully")
elif presample:
with open(filename, 'rb') as f:
self.data = pickle.load(f)
print(f"{filename} load successfully")
self.data_idx = np.arange(len(self.data_list))
assert len(self.data_idx) > 0
logging.info(f"\nTotally {len(self.data_idx)} samples in {split} set")
def __getitem__(self, idx):
# 允许通过索引或切片的方式访问对象的元素
# 使用索引或切片操作时, python解释器会自动调用该方法来实现对应操作
data_idx = self.data_idx[idx % len(self.data_idx)]
if self.presample: # train 阶段为 False
coord, feat, label = np.split(self.data[data_idx], [3, 6], axis=1)
else:
data_path = os.path.join(
self.raw_root, self.data_list[data_idx] + '.npy')
# 比如 'data/S3DIS/s3disfull/raw/Area_4_office_15.npy'
cdata = np.load(data_path).astype(np.float32)
cdata[:, :3] -= np.min(cdata[:, :3], 0)
# 减去最小值实现平移,xyz 以最小值为原点
coord, feat, label = cdata[:, :3], cdata[:, 3:6], cdata[:, 6:7]
# XYZ(坐标)|RGB(特征)|L(标签)
coord, feat, label = crop_pc(
coord, feat, label, self.split, self.voxel_size, self.voxel_max,
downsample=not self.presample, variable=self.variable, shuffle=self.shuffle)
# TODO: do we need to -np.min in cropped data?
label = label.squeeze(-1).astype(np.long)
data = {'pos': coord, 'x': feat, 'y': label}
# pre-process.
if self.transform is not None:
data = self.transform(data)
# 在 __getitem__(self, idx) 中包含点云数据处理 transform(data), 即预处理
# self.transform = <openpoints.transforms.transforms_factory.Compose object at 0x7fd07416c390>
# 包含基本点云操作类实例/对象:
# [<openpoints.transforms.point_transform_cpu.ChromaticAutoContrast object at 0x7fd07416c4d0>,
# <openpoints.transforms.point_transform_cpu.PointsToTensor object at 0x7fd07416c3d0>,
# <openpoints.transforms.point_transformer_gpu.PointCloudScaling object at 0x7fd07416c490>,
# <openpoints.transforms.point_transformer_gpu.PointCloudXYZAlign object at 0x7fd07416c510>,
# <openpoints.transforms.point_transformer_gpu.PointCloudJitter object at 0x7fd07416c410>,
# <openpoints.transforms.point_transformer_gpu.ChromaticDropGPU object at 0x7fd07416c450>,
# <openpoints.transforms.point_transformer_gpu.ChromaticNormalize object at 0x7fd07416c550>]
if 'heights' not in data.keys(): # data.eky() = dict_keys(['pos', 'x', 'y'])
data['heights'] = torch.from_numpy(coord[:, self.gravity_dim:self.gravity_dim+1].astype(np.float32))
# self.gravity_dim = 2
return data
# data.keys() = dict_keys(['pos', 'x', 'y', 'heights'])
def __len__(self):
return len(self.data_idx) * self.loop
# len(self.data_idx) = 204; self.loop = 30
# return 1 # debug
方法 __getitem__
实现自动索引式访问数据集的同时, 将原始以 XYZRGBL
顺序排列的张量数据集映射为字典形式 dict_keys(['pos', 'x', 'y', 'heights'])
.
需要指出, 在此过程中也完成了下面将要描述的点云预处理/数据增强操作.
III. 组合点云操作类
openpoints/transforms/transforms_factory.py
中定义了 openpoints.transforms.transforms_factory.Compose
类.
该类可以收集组合多个基本点云操作, 这些基本点云操作定义于 openpoints/transforms/point_transformer_gpu.py
或 openpoints/transforms/point_transformer_cpu.py
中.
这些点云操作构成了点云数据的预处理方法, 主要为神经网络训练前的数据增强处理.
而 Compose 类中定义了 __call__
方法, 当其类实例/类对象被当做函数来调用时, Python 解释器会自动调用该 __call__
方法.
而且由于各个基本点云操作类中也定义了各自的 __call__
, 所以 Python 解释器还会深入到各个基本点云操作对象中调用该对象的 __call__
.
由以上两个层次的自动调用, 实现对数据集的自动预处理. 注释如下:
class Compose(object):
"""Composes several transforms together."""
def __init__(self, transforms):
self.transforms = transforms
# transforms 为点云预处理类的类实例/对象
def __call__(self, args):
# Compose 类实例作为函数被调用时, Python 解释器自动调用 Compose.__call__ 方法
for t in self.transforms:
args = t(args)
# args 为被操作处理的点云数据 data
# t 为预处理相关类的对象
# t(args) 像函数一样调用类对象
# 每个基本点云操作 t 的类定义内也包含 __call__ 方法
# 当 Compose 对象实例被调用,自动调用 Compose.__call__(...) 时,
# 也会分别调用 Compose 对象内包含的 基本点云操作的类对象 中各自的 t.__call__(...)
# 这样就实现了 在调用实例化 Compose 对象时, 按照顺序执行所包含的全部基本点云操作于输入的点云之上
return args
IV. 基本点云操作类
openpoints/transforms/point_transformer_gpu.py
和 openpoints/transforms/point_transformer_cpu.py
中具体定义的基本点云操作细节. 针对涉及到的点云操作简单看一下.
类 ChromaticAutoContrast 主要做一些颜色偏差.
@DataTransforms.register_module()
class ChromaticAutoContrast(object):
def __init__(self, p=0.2, blend_factor=None, **kwargs):
self.p = p
self.blend_factor = blend_factor
def __call__(self, data):
if np.random.rand() < self.p:
# 是否处理也是随机的, 发生概率为 self.p.
lo = np.min(data['x'][:, :3], 0, keepdims=True)
# RGB 三个通道上各自的颜色最小值
hi = np.max(data['x'][:, :3], 0, keepdims=True)
# RGB 三个通道上各自的颜色最大值
scale = 255 / (hi - lo)
contrast_feat = (data['x'][:, :3] - lo) * scale
# 把RGB三个通道的颜色各自映射到 0-255,增加对比度
blend_factor = np.random.rand() if self.blend_factor is None else self.blend_factor
# [0,1) 之间均匀分布的一个随机值
data['x'][:, :3] = (1 - blend_factor) * data['x'][:, :3] + blend_factor * contrast_feat
# 颜色偏差/混杂
"""vis
from openpoints.dataset import vis_points
vis_points(data['pos'], data['x']/255.)
"""
return data
类 PointCloudScaling 对点云坐标做缩放或者镜像翻转.
@DataTransforms.register_module()
class PointCloudScaling(object):
def __init__(self,
scale=[2. / 3, 3. / 2],
anisotropic=True,
scale_xyz=[True, True, True],
mirror=[0, 0, 0], # the possibility of mirroring. set to a negative value to not mirror
**kwargs):
self.scale_min, self.scale_max = np.array(scale).astype(np.float32)
self.anisotropic = anisotropic
self.scale_xyz = scale_xyz
self.mirror = torch.from_numpy(np.array(mirror))
self.use_mirroring = torch.sum(torch.tensor(self.mirror)>0) != 0
def __call__(self, data):
device = data['pos'].device if hasattr(data, 'keys') else data.device
scale = torch.rand(3 if self.anisotropic else 1, dtype=torch.float32, device=device) * (
self.scale_max - self.scale_min) + self.scale_min
# torch.rand(3), 各向异性时三个数值不同, [0,1) 之间均匀分布
# scale 三个数值, 在区间 [scale_min, scale_max) 内随机选择
if self.use_mirroring:
assert self.anisotropic==True
self.mirror = self.mirror.to(device)
mirror = (torch.rand(3, device=device) > self.mirror).to(torch.float32) * 2 - 1
# 随机 1 或 -1, 三个数, 决定三个坐标轴反转与否
scale *= mirror
for i, s in enumerate(self.scale_xyz):
if not s: scale[i] = 1
if hasattr(data, 'keys'):
data['pos'] *= scale
# 三个坐标轴上坐标值的缩放
else:
data *= scale
return data
类 PointCloudXYZAlign 把 XY 平面坐标原点移到点云中心.
@DataTransforms.register_module()
class PointCloudXYZAlign(object):
"""Centering the point cloud in the xy plane
Args:
object (_type_): _description_
"""
def __init__(self,
gravity_dim=2,
**kwargs):
self.gravity_dim = gravity_dim
def __call__(self, data):
if hasattr(data, 'keys'):
data['pos'] -= torch.mean(data['pos'], axis=0, keepdims=True)
# 三维坐标值减去中心坐标,即以点云中心为原点
data['pos'][:, self.gravity_dim] -= torch.min(data['pos'][:, self.gravity_dim])
# 重力方向上的坐标, 即 Z 坐标, 恢复到最低点作为 Z 轴原点
else:
data -= torch.mean(data, axis=0, keepdims=True)
data[:, self.gravity_dim] -= torch.min(data[:, self.gravity_dim])
return data
类 PointCloudJitter 在坐标数据上加上噪音数据, 获得坐标抖动偏移的效果.
@DataTransforms.register_module()
class PointCloudJitter(object):
def __init__(self, jitter_sigma=0.01, jitter_clip=0.05, **kwargs):
self.noise_std = jitter_sigma
self.noise_clip = jitter_clip
def __call__(self, data):
if hasattr(data, 'keys'):
noise = torch.randn_like(data['pos']) * self.noise_std
# 产生一个和 data['pos'] 一样 shape 的随机张量, 每个元素的方差是 self.noise_std
data['pos'] += noise.clamp_(-self.noise_clip, self.noise_clip)
# noise.clamp_(...) 将 noise 张量元素值控制在 -self.noise_clip 和 self.noise_clip 之间
# 原来的坐标数据 data['pos'] + noise
else:
noise = torch.randn_like(data) * self.noise_std
data += noise.clamp_(-self.noise_clip, self.noise_clip)
return data
类 ChromaticDropGPU 依照概率使得点云颜色 RGB 特性数据归零.
@DataTransforms.register_module()
class ChromaticDropGPU(object):
def __init__(self, color_drop=0.2, **kwargs):
self.color_drop = color_drop
def __call__(self, data):
if torch.rand(1) < self.color_drop:
data['x'][:, :3] = 0
# 随机地让点云特性数据 data['x'] 的 RGB 通道数据归零
# self.color_drop 为颜色特性归零发生的概率值
return data
类 ChromaticNormalize 将点云 RGB 数据从 [0, 255] 整数表示映射到 [0, 1] 浮点数表示, 并进行正则化计算.
@DataTransforms.register_module()
class ChromaticNormalize(object):
def __init__(self,
color_mean=[0.5136457, 0.49523646, 0.44921124],
color_std=[0.18308958, 0.18415008, 0.19252081],
**kwargs):
self.color_mean = torch.from_numpy(np.array(color_mean)).to(torch.float32)
self.color_std = torch.from_numpy(np.array(color_std)).to(torch.float32)
def __call__(self, data):
device = data['x'].device
if data['x'][:, :3].max() > 1:
# 如果 RGB 是 0~255 表示
data['x'][:, :3] /= 255.
# 将 RGB 映射到 0~1 表示
data['x'][:, :3] = (data['x'][:, :3] - self.color_mean.to(device)) / self.color_std.to(device)
# 对 RGB 进行正则化计算
return data
V. 点云预处理类对象
openpoints/dataset/build.py
中 build_dataloader_from_cfg(...)
函数调用 build_transforms_from_cfg(trans_split, datatransforms_cfg)
函数建立 Compose 点云处理组合类实例, 其传入的参数如下.
trans_split = 'train'
datatransforms_cfg 配置参数
train: ['ChromaticAutoContrast', 'PointsToTensor', 'PointCloudScaling', 'PointCloudXYZAlign', 'PointCloudJitter', 'ChromaticDropGPU', 'ChromaticNormalize']
val: ['PointsToTensor', 'PointCloudXYZAlign', 'ChromaticNormalize']
vote: ['ChromaticDropGPU']
kwargs:
color_drop: 0.2
gravity_dim: 2
scale: [0.9, 1.1]
angle: [0, 0, 1]
jitter_sigma: 0.005
jitter_clip: 0.02
其中关键字 “train” 后面列表内的内容对应需要包含的基本点云操作.
而 build_transforms_from_cfg
的具体代码注释如下.
def build_transforms_from_cfg(split, datatransforms_cfg):
"""
Build a dataset transform for a certrain split, defined by `datatransforms_cfg`.
"""
transform_list = datatransforms_cfg.get(split, None)
# ['ChromaticAutoContrast', 'PointsToTensor', 'PointCloudScaling',
# 'PointCloudXYZAlign', 'PointCloudJitter', 'ChromaticDropGPU',
# 'ChromaticNormalize']
transform_args = datatransforms_cfg.get('kwargs', None)
compose_fn = eval(datatransforms_cfg.get('compose_fn', 'Compose'))
# datatransforms_cfg.get('compose_fn', 'Compose') 返回默认值 'Compose'
# eval('Compose') 将字符串转换为 class Compose
# compose_fn = <class 'openpoints.transforms.transforms_factory.Compose'>
if transform_list is None or len(transform_list) == 0:
return None
point_transforms = []
if len(transform_list) > 1:
for t in transform_list:
point_transforms.append(DataTransforms.build(
{'NAME': t}, default_args=transform_args))
# 注册机制 -> 建立各个转换处理类的对象/实例 -> 放入 point_transforms 列表
return compose_fn(point_transforms)
# 相当于 Compose(point_transforms), 返回 Compose 类实例/对象
else:
return DataTransforms.build({'NAME': transform_list[0]}, default_args=transform_args)
点云数据预处理类实例/对象构建的大概时序如下.

VI. 点云数据再处理
主函数 main()
内调用 train_one_epoch(...)
函数, 在将 S3DIS 类定义的 data 数据集输入模型 model 前, 需对数据集做最后的处理.
def train_one_epoch(model, train_loader, criterion, optimizer, scheduler, scaler, epoch, total_iter, cfg):
loss_meter = AverageMeter()
cm = ConfusionMatrix(num_classes=cfg.num_classes, ignore_index=cfg.ignore_index)
model.train() # set model to training mode
pbar = tqdm(enumerate(train_loader), total=train_loader.__len__())
# 会自动调用 s3dis 数据集类的 __getitem__(self, idx) 实现数据集的自动索引
# 在 __getitem__(self, idx) 中包含点云数据预处理操作 transform(data)
num_iter = 0
for idx, data in pbar:
keys = data.keys() if callable(data.keys) else data.keys
# keys = dict_keys(['pos', 'x', 'y', 'heights'])
for key in keys:
data[key] = data[key].cuda(non_blocking=True) # 异步传输模式, 将Tensor转移到GPU上
num_iter += 1
target = data['y'].squeeze(-1)
# data['y'].shape = torch.Size([32, 24000]), 没有尺寸为 1 的维度, 所以不起作用
""" debug
from openpoints.dataset import vis_points
vis_points(data['pos'].cpu().numpy()[0], labels=data['y'].cpu().numpy()[0])
vis_points(data['pos'].cpu().numpy()[0], data['x'][0, :3, :].transpose(1, 0))
end of debug """
data['x'] = get_features_by_keys(data, cfg.feature_keys)
# cfg.feature_keys = 'x,heights', 合并 RGB 与 heights
# data['x'].shape = torch.Size([32, 4, 24000])
data['epoch'] = epoch
total_iter += 1
data['iter'] = total_iter
# data.keys() = dict_keys(['pos', 'x', 'y', 'heights', 'epoch', 'iter'])
with torch.cuda.amp.autocast(enabled=cfg.use_amp):
logits = model(data)
loss = criterion(logits, target) if 'mask' not in cfg.criterion_args.NAME.lower() \
else criterion(logits, target, data['mask'])
其中
def get_features_by_keys(data, keys='pos,x'):
key_list = keys.split(',')
if len(key_list) == 1: # False
return data[keys].transpose(1,2).contiguous()
else:
return torch.cat([data[key] for key in keys.split(',')], -1).transpose(1,2).contiguous()
# data['x'].shape = torch.Size([32, 24000, 3])
# data['heights'].shape = torch.Size([32, 24000, 1])
# torch.Size([32, 24000, 4]).transpose(1,2) => torch.Size([32, 4, 24000])
点云数据再处理初略的时序如下图所示.

最后得到的点云特征数据形状为 data['x'].shape = torch.Size([32, 4, 24000])
.
完整的点云数据包括 data.keys() = dict_keys(['pos', 'x', 'y', 'heights', 'epoch', 'iter'])
这些数据类别.
这样就为深度神经网络准备好了训练数据.
总结
数据预处理或者数据增强后, 就可以进入模型训练了.
针对深度网络模型的分析学习将是重点也是难点.
版权声明:本文为博主原创文章,遵循 CC 4.0 BY 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.youkuaiyun.com/woyaomaishu2/article/details/141038519
本文作者:wzf@robotics_notes