在深度学习目标检测、实例分割等任务中,我们经常需要对图像和对应的 LabelMe 标注文件进行切片处理。本教程将教你如何使用 Python 批量裁剪 500×500 的图片和 JSON 标注文件,并支持:
✅ 自定义切片尺寸(x, y)
✅ 自定义重叠比例(z)
✅ JSON 同步裁剪坐标
✅ 不足区域自动黑色填充回 500×500
✅ 批量处理整个文件夹
✅ 输出图片命名规则:原名_行_列.jpg/json
一、准备工作
假设你的原始文件是 LabelMe 标注生成的:
ysl_20210820_01_9.jpg
ysl_20210820_01_9.json
这些文件都放在同一个目录:D:\plant_seg_datasets\woody_plant\lableme
处理后的切片输出到目录:D:\plant_seg_datasets\woody_plant\slice_test
二、环境依赖
需要有以下几个包
import os
import json
from pathlib import Path
from PIL import Image
没有则需要安装:
pip install pillow
三、完整 Python 脚本(复制即可运行)
# --------------- 代码开始 ---------------
import os
import json
from pathlib import Path
from PIL import Image
# --------------- USER CONFIG ---------------
slice_width = 256 # 切片宽度(可改)
slice_height = 256 # 切片高度(可改)
z = 0.5 # 重叠比例(0~1)
# 输入路径和输出路径(✅按你提供的设置)
input_folder = Path(r"D:\plant_seg_datasets\woody_plant\lableme")
output_folder = Path(r"D:\plant_seg_datasets\woody_plant\slice_test")
output_folder.mkdir(parents=True, exist_ok=True)
# 填充回 500×500
pad_to = (500, 500)
# -------------------------------------------
def compute_starts(image_size, slice_size, overlap):
step = max(1, int(round(slice_size * (1 - overlap))))
starts = []
if slice_size >= image_size:
starts = [0]
else:
cur = 0
while cur + slice_size < image_size:
starts.append(cur)
cur += step
last = image_size - slice_size
if len(starts) == 0 or starts[-1] != last:
starts.append(last)
return starts
def crop_and_pad(img, x0, y0, x1, y1, pad_to=(500,500)):
crop = img.crop((x0, y0, x1, y1))
Wpad, Hpad = pad_to
out = Image.new("RGB", (Wpad, Hpad), (0,0,0))
out.paste(crop, (0,0))
return out, crop.size
def process_labelme_json_for_crop(jdata, crop_box):
x0, y0, x1, y1 = crop_box
new_shapes = []
for shape in jdata.get("shapes", []):
pts = shape.get("points", [])
kept = []
for (px, py) in pts:
if x0 <= px < x1 and y0 <= py < y1:
kept.append([px - x0, py - y0])
shape_type = shape.get("shape_type", "").lower()
if shape_type == "polygon":
min_points = 3
elif shape_type in ("linestrip","rectangle","line"):
min_points = 2
else:
min_points = 1
if len(kept) >= min_points:
new_shape = dict(shape)
new_shape["points"] = kept
new_shapes.append(new_shape)
return new_shapes
def process_one_pair(image_path, json_path, out_dir,
slice_w, slice_h, overlap, pad_to=(500,500)):
img = Image.open(image_path).convert("RGB")
W, H = img.size
with open(json_path, "r", encoding="utf8") as f:
jdata = json.load(f)
x_starts = compute_starts(W, slice_w, overlap)
y_starts = compute_starts(H, slice_h, overlap)
basename = image_path.stem
out_files = []
for i_row, y0 in enumerate(y_starts):
for j_col, x0 in enumerate(x_starts):
x1 = min(x0 + slice_w, W)
y1 = min(y0 + slice_h, H)
padded_img, _ = crop_and_pad(img, x0, y0, x1, y1, pad_to=pad_to)
new_j = dict(jdata)
new_shapes = process_labelme_json_for_crop(jdata, (x0,y0,x1,y1))
new_j["shapes"] = new_shapes
new_j["imagePath"] = f"{basename}_{i_row}_{j_col}.jpg"
new_j["imageHeight"] = pad_to[1]
new_j["imageWidth"] = pad_to[0]
out_img_path = out_dir / f"{basename}_{i_row}_{j_col}.jpg"
out_json_path = out_dir / f"{basename}_{i_row}_{j_col}.json"
padded_img.save(out_img_path, format="JPEG", quality=95)
with open(out_json_path, "w", encoding="utf8") as f:
json.dump(new_j, f, ensure_ascii=False, indent=2)
out_files.append((out_img_path, out_json_path))
return out_files
def main():
jpgs = sorted(input_folder.glob("*.jpg"))
pairs = []
for j in jpgs:
js = j.with_suffix(".json")
if js.exists():
pairs.append((j, js))
else:
print("Warning: JSON not found for", j.name)
total = 0
for img_p, json_p in pairs:
outs = process_one_pair(
img_p, json_p,
output_folder,
slice_width,
slice_height,
z,
pad_to=pad_to
)
total += len(outs)
print(f"✅ 处理完成!共处理 {len(pairs)} 组图片,生成 {total} 个切片到:{output_folder}")
if __name__ == "__main__":
main()
# --------------- 代码结束 ---------------
五、输出文件命名格式
ysl_20210820_01_9_0_0.jpg
ysl_20210820_01_9_0_0.json
ysl_20210820_01_9_1_0.jpg
ysl_20210820_01_9_1_0.json
...
全部保存在:D:\plant_seg_datasets\woody_plant\slice_test
六、效果特点
✔ 保留部分落入切片区域的标注
✔ 支持重叠切片
✔ 自动补黑边回 500×500
✔ 支持批量目录处理
✔ LabelMe JSON 结构不变,可直接继续训练使用
七、总结
这段脚本非常适用于:
-
目标检测 / 实例分割数据增强
-
LabelMe 标注数据预处理
-
大图切片+小图标注同步
-
图像+JSON 批量裁剪保真
3340

被折叠的 条评论
为什么被折叠?



