将DOTA标签格式转为VOC格式形成xml文件

本文介绍了一种将DOTA数据集转换为VOC格式的方法,包括如何读取DOTA标签文件并将其转换为VOC所需的XML格式。文中提供了Python代码实现,支持将DOTA中的8坐标点转换为VOC的矩形边界框(hbb)或原生8坐标点(obb)。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

DOTA数据集的格式为:

前8个数字分别为gt的4个点坐标(x、y)

转换为VOC标注的代码为:

这里有两种选择

1. 将原有的8坐标转换为4坐标的最小外界矩形,即hbb形式 xmin,ymin,xmax,ymax

2. 保留原有/坐标,即obb形式x0,y0,x1,y1,x2,y2,x3,y3

import os
import cv2
from xml.dom.minidom import Document 
 
#windows下无需
import sys  
stdi, stdo, stde = sys.stdin, sys.stdout, sys.stderr
reload(sys)
sys.setdefaultencoding('utf-8')
sys.stdin, sys.stdout, sys.stderr = stdi, stdo, stde
 
 
category_set = ['ship']  
 
def custombasename(fullname):  
    return os.path.basename(os.path.splitext(fullname)[0])
 
 
def limit_value(a,b):
    if a<1:
        a = 1
    if a>=b:
        a = b-1
    return a
 
    
def readlabeltxt(txtpath, height, width, hbb = True):
    print(txtpath)
    with open(txtpath, 'r') as f_in:   #打开txt文件          
        lines = f_in.readlines()
        splitlines = [x.strip().split(' ') for x in lines]  #根据空格分割
        boxes = []
        for i, splitline  in enumerate(splitlines):
            if i in [0,1]:  #DOTA数据集前两行对于我们来说是无用的
                continue
            label = splitline[8]
            if label not in category_set:#只书写制定的类别      
                continue            
            x1 = int(float(splitline[0]))
            y1 = int(float(splitline[1]))
            x2 = int(float(splitline[2]))
            y2 = int(float(splitline[3]))
            x3 = int(float(splitline[4]))
            y3 = int(float(splitline[5]))  
            x4 = int(float(splitline[6]))
            y4 = int(float(splitline[7]))
            #如果是hbb
            if hbb:
                xx1 = min(x1,x2,x3,x4)
                xx2 = max(x1,x2,x3,x4)
                yy1 = min(y1,y2,y3,y4)
                yy2 = max(y1,y2,y3,y4)
                
                xx1 = limit_value(xx1, width)
                xx2 = limit_value(xx2, width)
                yy1 = limit_value(yy1, height)
                yy2 = limit_value(yy2, height)
                
                box = [xx1,yy1,xx2,yy2,label]
                boxes.append(box)            
            else:  #否则是obb                        
                x1 = limit_value(x1, width)
                y1 = limit_value(y1, height)
                x2 = limit_value(x2, width)
                y2 = limit_value(y2, height)
                x3 = limit_value(x3, width)
                y3 = limit_value(y3, height)   
                x4 = limit_value(x4, width)
                y4 = limit_value(y4, height)
                               
                box = [x1,y1,x2,y2,x3,y3,x4,y4,label]
                boxes.append(box)
    return boxes
 
def writeXml(tmp, imgname, w, h, d, bboxes, hbb = True):  
    doc = Document()  
    #owner  
    annotation = doc.createElement('annotation')  
    doc.appendChild(annotation)  
    #owner  
    folder = doc.createElement('folder')  
    annotation.appendChild(folder)  
    folder_txt = doc.createTextNode("VOC2007")  
    folder.appendChild(folder_txt)  
  
    filename = doc.createElement('filename')  
    annotation.appendChild(filename)  
    filename_txt = doc.createTextNode(imgname)  
    filename.appendChild(filename_txt)  
    #ones#  
    source = doc.createElement('source')  
    annotation.appendChild(source)  
  
    database = doc.createElement('database')  
    source.appendChild(database)  
    database_txt = doc.createTextNode("My Database")  
    database.appendChild(database_txt)  
  
    annotation_new = doc.createElement('annotation')  
    source.appendChild(annotation_new)  
    annotation_new_txt = doc.createTextNode("VOC2007")  
    annotation_new.appendChild(annotation_new_txt)  
  
    image = doc.createElement('image')  
    source.appendChild(image)  
    image_txt = doc.createTextNode("flickr")  
    image.appendChild(image_txt) 
    #owner
    owner = doc.createElement('owner')  
    annotation.appendChild(owner)  
  
    flickrid = doc.createElement('flickrid')  
    owner.appendChild(flickrid)  
    flickrid_txt = doc.createTextNode("NULL")  
    flickrid.appendChild(flickrid_txt) 
    
    ow_name = doc.createElement('name')  
    owner.appendChild(ow_name)  
    ow_name_txt = doc.createTextNode("idannel")  
    ow_name.appendChild(ow_name_txt)
    #onee#  
    #twos#  
    size = doc.createElement('size')  
    annotation.appendChild(size)  
  
    width = doc.createElement('width')  
    size.appendChild(width)  
    width_txt = doc.createTextNode(str(w))  
    width.appendChild(width_txt)  
  
    height = doc.createElement('height')  
    size.appendChild(height)  
    height_txt = doc.createTextNode(str(h))  
    height.appendChild(height_txt)  
  
    depth = doc.createElement('depth') 
    size.appendChild(depth)  
    depth_txt = doc.createTextNode(str(d))  
    depth.appendChild(depth_txt)  
    #twoe#  
    segmented = doc.createElement('segmented')  
    annotation.appendChild(segmented)  
    segmented_txt = doc.createTextNode("0")  
    segmented.appendChild(segmented_txt)  
    
    for bbox in bboxes:
        #threes#  
        object_new = doc.createElement("object")  
        annotation.appendChild(object_new)  
        
        name = doc.createElement('name')  
        object_new.appendChild(name)  
        name_txt = doc.createTextNode(str(bbox[-1]))  
        name.appendChild(name_txt)  
  
        pose = doc.createElement('pose')  
        object_new.appendChild(pose)  
        pose_txt = doc.createTextNode("Unspecified")  
        pose.appendChild(pose_txt)  
  
        truncated = doc.createElement('truncated')  
        object_new.appendChild(truncated)  
        truncated_txt = doc.createTextNode("0")  
        truncated.appendChild(truncated_txt)  
  
        difficult = doc.createElement('difficult')  
        object_new.appendChild(difficult)  
        difficult_txt = doc.createTextNode("0")  
        difficult.appendChild(difficult_txt)  
        #threes-1#  
        bndbox = doc.createElement('bndbox')  
        object_new.appendChild(bndbox)  
  
        if hbb: 
            xmin = doc.createElement('xmin')  
            bndbox.appendChild(xmin)  
            xmin_txt = doc.createTextNode(str(bbox[0]))
            xmin.appendChild(xmin_txt)  
  
            ymin = doc.createElement('ymin')  
            bndbox.appendChild(ymin)  
            ymin_txt = doc.createTextNode(str(bbox[1]))
            ymin.appendChild(ymin_txt)    
  
            xmax = doc.createElement('xmax')  
            bndbox.appendChild(xmax)  
            xmax_txt = doc.createTextNode(str(bbox[2]))
            xmax.appendChild(xmax_txt)  
        
            ymax = doc.createElement('ymax')  
            bndbox.appendChild(ymax)  
            ymax_txt = doc.createTextNode(str(bbox[3]))
            ymax.appendChild(ymax_txt) 
        else:          
            x0 = doc.createElement('x0')  
            bndbox.appendChild(x0)  
            x0_txt = doc.createTextNode(str(bbox[0]))
            x0.appendChild(x0_txt)  
      
            y0 = doc.createElement('y0')  
            bndbox.appendChild(y0)  
            y0_txt = doc.createTextNode(str(bbox[1]))
            y0.appendChild(y0_txt)    
      
            x1 = doc.createElement('x1')  
            bndbox.appendChild(x1)  
            x1_txt = doc.createTextNode(str(bbox[2]))
            x1.appendChild(x1_txt)  
      
            y1 = doc.createElement('y1')  
            bndbox.appendChild(y1)  
            y1_txt = doc.createTextNode(str(bbox[3]))
            y1.appendChild(y1_txt) 
            
            x2 = doc.createElement('x2')  
            bndbox.appendChild(x2)  
            x2_txt = doc.createTextNode(str(bbox[4]))
            x2.appendChild(x2_txt)  
      
            y2 = doc.createElement('y2')  
            bndbox.appendChild(y2)  
            y2_txt = doc.createTextNode(str(bbox[5]))
            y2.appendChild(y2_txt)
     
            x3 = doc.createElement('x3')  
            bndbox.appendChild(x3)  
            x3_txt = doc.createTextNode(str(bbox[6]))
            x3.appendChild(x3_txt)  
      
            y3 = doc.createElement('y3')  
            bndbox.appendChild(y3)  
            y3_txt = doc.createTextNode(str(bbox[7]))
            y3.appendChild(y3_txt)
    
    xmlname = os.path.splitext(imgname)[0]  
    tempfile = os.path.join(tmp ,xmlname+'.xml')
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return  
  
 
 
if __name__ == '__main__':
    data_path = '/home/yantianwang/lala/ship/train/examplesplit'
    images_path = os.path.join(data_path, 'images') #样本图片路径
    labeltxt_path = os.path.join(data_path, 'labelTxt') #DOTA标签的所在路径
    anno_new_path = os.path.join(data_path, 'obbxml')  #新的voc格式存储位置(hbb形式)
    ext = '.tif'  #样本图片的后缀
    filenames=os.listdir(labeltxt_path)    #获取每一个txt的名称   
    for filename in filenames:    
        filepath=labeltxt_path + '/'+filename    #每一个DOTA标签的具体路径
        picname = os.path.splitext(filename)[0] + ext  
        pic_path = os.path.join(images_path, picname)   
        im= cv2.imread(pic_path)            #读取相应的图片               
        (H,W,D) = im.shape                  #返回样本的大小
        boxes = readlabeltxt(filepath, H, W, hbb = True)           #默认是矩形(hbb)得到gt
        if len(boxes)==0:
            print('文件为空',filepath)
        #读取对应的样本图片,得到H,W,D用于书写xml
 
        #书写xml
        writeXml(anno_new_path, picname, W, H, D, boxes, hbb = True)
        print('正在处理%s'%filename)

 

### 将YOLO格式的标注文件转换为DOTA格式 为了实现从YOLO到DOTA格式的转换,需要理解两种格式之间的差异并编写相应的脚本。通常情况下,YOLO格式采用的是边界框中心坐标以及宽度高度的形式表示目标位置;而DOTA格式则更倾向于使用多边形来描述物体轮廓。 对于这种特定需求,并未直接提供现函数用于执行此操作[^1]。然而可以根据已有的`convert_dota_to_yolo_obb()`方法反向思考解决方案。该方法负责将DOTA中的旋转矩形或多边形转化为适合YOLO使用的最小外接水平矩形形式。因此要完逆过程,则需创建一个新的Python函数来进行相反的操作——即将YOLO风格的目标定义重新解释回原始图像空间内的多边形形状。 下面是一个简单的概念验证代码片段: ```python import numpy as np def yolo_to_dota(yolo_bbox, img_width, img_height): """ Convert a single YOLO format bounding box to DOTA format. Parameters: yolo_bbox (list): A list of four floats representing the normalized center coordinates and width-height ratio. img_width (int): Width of the image. img_height (int): Height of the image. Returns: tuple: Four points defining a rectangle in DOTA format. """ cx_norm, cy_norm, w_ratio, h_ratio = yolo_bbox # Denormalize values based on image dimensions cx_abs = float(cx_norm * img_width) cy_abs = float(cy_norm * img_height) half_w = 0.5 * w_ratio * min(img_width, img_height) half_h = 0.5 * h_ratio * min(img_width, img_height) point1_x = int(round(cx_abs - half_w)) point1_y = int(round(cy_abs - half_h)) point2_x = int(round(cx_abs + half_w)) point2_y = int(round(cy_abs - half_h)) point3_x = int(round(cx_abs + half_w)) point3_y = int(round(cy_abs + half_h)) point4_x = int(round(cx_abs - half_w)) point4_y = int(round(cy_abs + half_h)) return ((point1_x, point1_y), (point2_x, point2_y), (point3_x, point3_y), (point4_x, point4_y)) # Example usage with dummy data example_yolo_bbox = [0.5, 0.5, 0.2, 0.3] image_dimensions = {'width': 800, 'height': 600} dota_format_points = yolo_to_dota(example_yolo_bbox, image_dimensions['width'], image_dimensions['height']) print(dota_format_points) ``` 上述代码实现了基本的功能,即接收YOLO格式下的单个边界框信息及其对应图片尺寸参数作为输入,并返回一组四个顶点组的元组,这些顶点按照顺时针顺序排列形成一个多边形区域,在视觉上近似于原对象的位置和方向[^2]。 需要注意的是,这里假设了YOLO格式是以相对整个图像大小的比例给出宽高比的方式存储的。如果实际应用中遇到不同的情况(比如绝对像素值),可能还需要调整这部分逻辑以适应具体场景的要求。
评论 12
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值