如题,本文提供labelme标注的json文件和labelimg标注的xml文件转成yolo txt格式的标签文件,其他数据集标签处理问题,见如下博文:
作物计数方法之合并信息生成json标签的方法_fsc147-优快云博客
(1)json文件转yolo txt标签
import json
import os
def convert(img_size, box):
x1_center = box[0] + (box[2]-box[0]) / 2.0
y1_center = box[1] + (box[3]-box[1]) / 2.0
w_1 = box[2] - box[0]
h_1 = box[3] - box[1]
x1_normal = x1_center / img_size[0]
y1_normal = y1_center / img_size[1]
w_1_normal = w_1 / img_size[0]
y_1_normal = h_1 / img_size[1]
return (x1_normal, y1_normal, w_1_normal, y_1_normal)
def decode_json(json_floder_path, json_name):
txt_name = 'D:/TEST/label_yolo_test/txt/' + json_name[0:-5] + '.txt' # 改为自己的txt标签存储路径
txt_file = open(txt_name, 'w')
json_path = os.path.join(json_floder_path, json_name)
data = json.load(open(json_path, 'r', encoding='utf-8'))
img_w = data['imageWidth']
img_h = data['imageHeight']
for i in data['shapes']:
if (i['shape_type'] == 'rectangle' and i['label'] == 'cotton_flower'): # 这里cotton_flower改成自己的标签类别
x1 = float(i['points'][0][0])
y1 = float(i['points'][0][1])
x2 = float(i['points'][1][0])
y2 = float(i['points'][1][1])
print(x1)
print(y1)
print(x2)
print(y2)
print(img_w)
print(img_h)
bb = (x1, y1, x2, y2)
bbox = convert((img_w, img_h), bb)
txt_file.write( '0' + " " + " ".join([str(i) for i in bbox]) + '\n')
elif (i['shape_type'] == 'rectangle' and i['label'] == 'potato_flower'): # 多类别标签就加判定条件
x1 = float(i['points'][0][0])
y1 = float(i['points'][0][1])
x2 = float(i['points'][1][0])
y2 = float(i['points'][1][1])
print(x1)
print(y1)
print(x2)
print(y2)
print(img_w)
print(img_h)
bb = (x1, y1, x2, y2)
bbox = convert((img_w, img_h), bb)
txt_file.write( '1' + " " + " ".join([str(i) for i in bbox]) + '\n')
if __name__ == "__main__":
json_floder_path = 'D:/TEST/label_yolo_test/Json' #改成自己的json文件存储路径
json_names = os.listdir(json_floder_path)
for json_name in json_names:
decode_json(json_floder_path, json_name)
(2)xml文件转yolo txt标签
import os, shutil, random
from tqdm import tqdm
def split_img(img_path, label_path, split_list):
try :
Data = r'Cotton_flower_dataset/flower'
# Data是创建的文件夹路径
# os.mkdir(Data) #
train_img_dir = Data + '/images/train'
val_img_dir = Data + '/images/val'
test_img_dir = Data + '/images/test'
train_label_dir = Data + '/labels/train'
val_label_dir = Data + '/labels/val'
test_label_dir = Data + '/labels/test'
# 创建文件夹
os.makedirs(train_img_dir)
os.makedirs(train_label_dir)
os.makedirs(val_img_dir)
os.makedirs(val_label_dir)
os.makedirs(test_img_dir)
os.makedirs(test_label_dir)
except:
print('文件目录已存在')
train, val, test = split_list
all_img = os.listdir(img_path)
all_img_path = [os.path.join(img_path, img) for img in all_img]
# all_label = os.listdir(label_path)
# all_label_path = [os.path.join(label_path, label) for label in all_label]
train_img = random.sample(all_img_path, int(train * len(all_img_path)))
train_img_copy = [os.path.join(train_img_dir, img.split('/')[-1]) for img in train_img]
# print(train_img)
train_label = [toLabelPath(img, label_path) for img in train_img]
train_label_copy = [os.path.join(train_label_dir, label.split('/')[-1]) for label in train_label]
for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
_copy(train_img[i], train_img_dir)
_copy(train_label[i], train_label_dir)
all_img_path.remove(train_img[i])
val_img = random.sample(all_img_path, int(val / (val + test) * len(all_img_path)))
val_label = [toLabelPath(img, label_path) for img in val_img]
for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
_copy(val_img[i], val_img_dir)
_copy(val_label[i], val_label_dir)
all_img_path.remove(val_img[i])
test_img = all_img_path
test_label = [toLabelPath(img, label_path) for img in test_img]
for i in tqdm(range(len(test_img)), desc='test ', ncols=80, unit='img'):
_copy(test_img[i], test_img_dir)
_copy(test_label[i], test_label_dir)
def _copy(from_path, to_path):
shutil.copy(from_path, to_path)
def toLabelPath(img_path, label_path):
img = img_path.split('/')[-1]
label = img.split('\\')[1].split('.jpg')[0] + '.txt' # 注意路径问题,分割好
# print(label, "***********")
return os.path.join(label_path, label)
def main():
img_path = r"Cotton_flower_dataset/image" # 图片存放的路径
label_path = r"Cotton_flower_dataset/label_txt" # txt文件存放的路径
split_list = [0.7, 0.2, 0.1] # 数据集划分比例[train:val:test]
split_img(img_path, label_path, split_list)
if __name__ == '__main__':
main()