pair根据选择标注标签的不同,会保存生成不同的标注文件。目前我发现的就是压缩包文件,里面包含着nii.gz
文件,或者.json
文件。对于某些标注工具的选择,保存的内容可能不同,但是据我发现,.json
文件是都会存在的。
标注过程中发现,pair标注中对闭合区域的标记,有两种形式可以选择,分别是实心闭合和空心闭合,发现如下:
- 实心闭合,保存文件中包括
nii.gz
和json
文件 - 空心闭合,保存文件中只有
jso
n文件 - 两者保存的
json
文件字段和格式不一样
下面我们就对这个复杂的json文件进行转储,生成一个相对比较简单的json文件,可以用labelme
打开的文件。下面我们开始吧。
- 解压存储的标签文件 untar
- 去除一个series的标注信息,包括层号、类别、坐标等等
- 存储到json文件
- labelme打开确认下
下面我们就分空心闭合、实心闭合和混合型三个方面,进行记录。
1. 空心闭合操作
主代码如下:
import json
import commentjson
import tarfile
import os
import glob
def untar(fname, output_dirs):
t = tarfile.open(fname)
t.extractall(path = output_dirs)
def PolygonModelpoints3D2points2D(E_Points):
tmp_points2d = []
for e in E_Points:
tmp_points2d.append([e[0], e[1]])
return tmp_points2d
def save2jsonFile(list_coors, save_dir, filename):
A = dict()
listbigoption = []
for coor in list_coors:
print('coor:', coor)
listobject = dict()
listxy = coor[:-1]
label_cls = coor[-1]
listobject['points'] = listxy
listobject['line_color'] = 'null'
listobject['label'] = str(label_cls)
listobject['fill_color'] = 'null'
listbigoption.append(listobject)
A['lineColor'] = [0, 255, 0, 128]
A['imageData'] = 'imageData'
A['fillColor'] = [255, 0, 0, 128]
A['imagePath'] = filename.replace('.json', '.png')
A['shapes'] = listbigoption
A['flags'] = {}
with open(os.path.join(save_dir, filename), 'w') as f:
json.dump(A, f, indent=2, ensure_ascii=False)
def Polygon_json(json_path=None, with_comment=False):
label_list = []
with open(json_path, encoding='utf-8') as f:
if with_comment: # 若*.json中添加了注释,则使用commentjson。若无注释则使用json库也可。
Anno_Dict = commentjson.load(f)
else:
Anno_Dict = json.load(f)
Depth, Name = str(Anno_Dict["FileInfo"]["Depth"]), Anno_Dict["FileInfo"]["Name"].split('_')[0]
Results_Dict = Anno_Dict["Models"]
Polygon_list = Results_Dict["PolygonModel2"]
dict_sliceNum_coor = {}
for E_idx, E_item in enumerate(Polygon_list):
E_Label = str(E_item["Label"])
E_Points = E_item["Points"]
# 由于存储标签中坐标是xyz的,labelme打开一张图时候,不需要高度信息,就去除掉
E_Points = PolygonModelpoints3D2points2D(E_Points)
E_Points.append(E_Label)
E_SliceIndex = str(E_item["SliceIndex"])
print('slicenum: {}\n类别id:{}\n坐标:{}\n'.format(E_SliceIndex, E_Label, E_Points))
label_list.append(E_Label)
if E_SliceIndex not in dict_sliceNum_coor.keys():
dict_sliceNum_coor[E_SliceIndex] = [E_Points]
else:
cur_coor = dict_sliceNum_coor[E_SliceIndex]
cur_coor.append(E_Points)
dict_sliceNum_coor[E_SliceIndex] = cur_coor
for key, value in dict_sliceNum_coor.items():
save_dir = os.path.join(os.path.dirname(json_path), 'json')
filename = Name + '_' + (6-len(key))*'0'+key+'_'+Depth+'.json'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save2jsonFile(value, save_dir, filename)
print(save_dir, filename)
return label_list
if __name__ == '__main__':
data_dir = r'F:\Pair\json_data'
patient_list = os.listdir(data_dir)
label_list_all = []
TB_PatientNum = 0
for patient in patient_list:
file_dir = os.path.join(data_dir, patient)
tar_list = glob.glob(file_dir + r'/*.tar')
json_list = glob.glob(file_dir + r'/*.json')
if len(tar_list) > len(json_list):
if len(tar_list) > 0:
for file_path in tar_list:
print('start untar ---', file_path)
untar(file_path, file_dir)
json_list = glob.glob(file_dir + r'/*.json')
if len(json_list) > 0:
for file_path in json_list:
label_list = PolygonModel2_json(json_path=file_path, with_comment=True)
到这里,已经将pair标注保存的json文件,转储到了小的json文件了。但是,此时的json文件和图片文件放到一起,还不能用labelme进行查看,还需要添加base64
位的图像信息到json
文件内。
2. 添加base64到imageData json中
添加的方式如下:
import base64
from PIL import Image
import io
def base64encode_img(image_path):
src_image = Image.open(image_path)
output_buffer = io.BytesIO()
src_image.save(output_buffer, format='JPEG')
byte_data = output_buffer.getvalue()
base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str
A['imageData'] = base64encode_img(os.path.join(raw_dir, file_name))
上面我就没和上面的代码组合到一起了,相信你一定知道该放置到哪里。如果还有疑问,来评论区留言就可以了。
此时,再用labelme打开,你就能够直接看到标注的信息,显示在图像上面了。
3. 实心闭合操作
import json
import commentjson
import tarfile
import os
import glob
"""
1.解压存储的标签文件 untar
2.去除一个series的标注信息,包括层号、类别、坐标等等
3.存储到json文件
4.labelme打开确认下
"""
def untar(fname, output_dirs):
t = tarfile.open(fname)
t.extractall(path = output_dirs)
def Polyspoints3D2points2D(E_Points):
tmp_points2d = []
for e in E_Points:
e_p = e["Pos"]
tmp_points2d.append([e_p[0], e_p[1]])
return tmp_points2d
def save2jsonFile(list_coors, save_dir, filename):
A = dict()
listbigoption = []
for coor in list_coors:
# print('coor:', coor)
listobject = dict()
listxy = coor[:-1]
label_cls = coor[-1]
listobject['points'] = listxy
listobject['line_color'] = 'null'
listobject['label'] = str(label_cls)
listobject['fill_color'] = 'null'
listbigoption.append(listobject)
A['lineColor'] = [0, 255, 0, 128]
A['imageData'] = 'imageData'
A['fillColor'] = [255, 0, 0, 128]
A['imagePath'] = filename.replace('.json', '.png')
A['shapes'] = listbigoption
A['flags'] = {}
with open(os.path.join(save_dir, filename), 'w') as f:
json.dump(A, f, indent=2, ensure_ascii=False)
def Polys_json(json_path=None, with_comment=False):
label_list = []
with open(json_path, encoding='utf-8') as f:
if with_comment: # 若*.json中添加了注释,则使用commentjson。若无注释则使用json库也可。
Anno_Dict = commentjson.load(f)
else:
Anno_Dict = json.load(f)
Depth, Name = str(Anno_Dict["FileInfo"]["Depth"]), Anno_Dict["FileInfo"]["Name"].split('_')[0]
Polygon_list = Anno_Dict["Polys"]
dict_sliceNum_coor = {}
for E_idx, E_item in enumerate(Polygon_list):
Shapes_info = E_item["Shapes"]
for j_idx, j_item in enumerate(Shapes_info):
E_Label = str(j_item["labelType"])
E_SliceIndex = str(j_item["ImageFrame"])
E_Points = j_item["Points"]
# 由于存储标签中坐标是xyz的,labelme打开一张图时候,不需要高度信息,就去除掉
E_Points = Polyspoints3D2points2D(E_Points)
E_Points.append(E_Label)
# print('slicenum: {}\n类别id:{}\n坐标:{}\n'.format(E_SliceIndex, E_Label, E_Points))
label_list.append(E_Label)
if E_SliceIndex not in dict_sliceNum_coor.keys():
dict_sliceNum_coor[E_SliceIndex] = [E_Points]
else:
cur_coor = dict_sliceNum_coor[E_SliceIndex]
cur_coor.append(E_Points)
dict_sliceNum_coor[E_SliceIndex] = cur_coor
for key, value in dict_sliceNum_coor.items():
save_dir = os.path.join(os.path.dirname(json_path), 'json')
filename = Name + '_' + (6-len(key))*'0'+key+'_'+Depth+'.json'
if not os.path.exists(save_dir):
os.mkdir(save_dir)
save2jsonFile(value, save_dir, filename)
print(save_dir, filename)
return label_list
if __name__ == '__main__':
data_dir = r'Z:\cancer1-100'
patient_list = os.listdir(data_dir)
label_list_all = []
TB_PatientNum = 0
for patient in patient_list:
file_dir = os.path.join(data_dir, patient)
print(file_dir)
if os.path.exists(os.path.join(data_dir, patient, 'json')):
continue
tar_list = glob.glob(file_dir + r'/*.tar')
json_list = glob.glob(file_dir + r'/*.json')
if len(tar_list) > len(json_list):
if len(tar_list) > 0:
for file_path in tar_list:
print('start untar ---', file_path)
untar(file_path, file_dir)
json_list = glob.glob(file_dir + r'/*.json')
if len(json_list) > 0:
for file_path in json_list:
# Polys
label_list = Polys_json(json_path=file_path, with_comment=True)
除去上面两种情况外,还会有混合的,则进行统一处理。
处理代码如下:
def Polygon_combine_json(json_path=None, with_comment=False):
label_list = []
with open(json_path, encoding='utf-8') as f:
if with_comment: # 若*.json中添加了注释,则使用commentjson。若无注释则使用json库也可。
Anno_Dict = commentjson.load(f)
else:
Anno_Dict = json.load(f)
Depth, Name = str(Anno_Dict["FileInfo"]["Depth"]), Anno_Dict["FileInfo"]["Name"].split('_')[0]
try:
Results_Dict = Anno_Dict["Models"]
Polygon_list = Results_Dict["PolygonModel2"]
dict_sliceNum_coor = {}
for E_idx, E_item in enumerate(Polygon_list):
E_Label = str(E_item["Label"])
E_Points = E_item["Points"]
if len(E_Points) < 3:
continue
# 由于存储标签中坐标是xyz的,labelme打开一张图时候,不需要高度信息,就去除掉
E_Points = PolygonModelpoints3D2points2D(E_Points)
E_Points.append(E_Label)
# 序号这里建议直接+1,解决从0开始的问题
E_SliceIndex = str(E_item["SliceIndex"])
# print('slicenum: {}\n类别id:{}\n坐标:{}\n'.format(E_SliceIndex, E_Label, E_Points))
label_list.append(E_Label)
if E_SliceIndex not in dict_sliceNum_coor.keys():
dict_sliceNum_coor[E_SliceIndex] = [E_Points]
else:
cur_coor = dict_sliceNum_coor[E_SliceIndex]
cur_coor.append(E_Points)
dict_sliceNum_coor[E_SliceIndex] = cur_coor
except:
pass
try:
Polygon_list2 = Anno_Dict["Polys"]
for E_idx, E_item in enumerate(Polygon_list2):
Shapes_info = E_item["Shapes"]
for j_idx, j_item in enumerate(Shapes_info):
E_Label = str(j_item["labelType"])
# 序号这里建议直接+1,解决从0开始的问题
E_SliceIndex = str(j_item["ImageFrame"])
E_Points = j_item["Points"]
# 加上判断,避免出现单个点
if len(E_Points) < 3:
continue
# 由于存储标签中坐标是xyz的,labelme打开一张图时候,不需要高度信息,就去除掉
E_Points = Polyspoints3D2points2D(E_Points)
E_Points.append(E_Label)
# print('slicenum: {}\n类别id:{}\n坐标:{}\n'.format(E_SliceIndex, E_Label, E_Points))
label_list.append(E_Label)
if E_SliceIndex not in dict_sliceNum_coor.keys():
dict_sliceNum_coor[E_SliceIndex] = [E_Points]
else:
cur_coor = dict_sliceNum_coor[E_SliceIndex]
cur_coor.append(E_Points)
dict_sliceNum_coor[E_SliceIndex] = cur_coor
except:
pass
for key, value in dict_sliceNum_coor.items():
save_dir = os.path.join(os.path.dirname(json_path), 'json')
# filename = Name + '_' + (6-len(key))*'0'+key+'_'+Depth+'.json'
filename = Name + '_' + (6 - len(key)) * '0' + key + '.json'
if not os.path.exists(save_dir):
os.mkdir(save_dir)
save2jsonFile(value, save_dir, filename)
print(save_dir, filename)
return label_list
4.注意事项
注意事项1:ImageFrame从0开始
这点在对生成的json文件与图像对应时候尤为重要。我们dcm文件中都是从1开始的,而不是0。所以这点需要注意。
注意事项2:ImageFrame的排序和dcm文件的排序会不一致
生成的json标注文件内ImageFrame排序不是按照dcm文件内记录的顺序进行排布的,而是根据Z轴进行排布的,也就是我们下图的箭头方向:
注意事项3:json标注坐标,是在mm单位下的坐标,不是像素单位
如果你用labelme
打开发现标注区域不匹配,这个点需要注意。所以在转图时候,记得转化到mm单位,这里要用到PixelSpacing
- 方式一,对坐标除以pixelspace,换算到mm单位
- 方式二,对图像大小乘以pixelspace,换算到像素单位
至此,亲测有效,你就可以打开生成的json和图像放到一起,用labelme打开看看了。相信你看到的内容,还是很满意的。