DOTA数据集分割,并将txt转为xml

该文介绍了DOTA数据集的处理流程,包括从15类中筛选特定类别,如ship,然后对大尺寸图像进行切割,保持左上角不变并填充,确保每个图像为608x608。接着抓取标签数据并转换为txt,最后将txt转换为xml格式,便于目标检测训练。此外,还提供了批量修改文件名的脚本以及创建ImageSets文件夹中的txt文件,用于训练和验证集划分。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

DOTA数据集简介

DOTA数据集包含2806张航空图像,尺寸大约从800x800到4000x4000不等,包含15个类别共计188282个实例。其标注方式为四点确定的任意形状和方向的四边形(区别于传统的对边平行bbox)。类别分别为:plane, ship, storage tank, baseball dia- mond, tennis court, swimming pool, ground track field, har- bor, bridge, large vehicle, small vehicle, helicopter, round- about, soccer ball field , basketball court。

可以看出DOTA数据集里的照片,有的尺寸非常大,而且照片大小也不统一,因此我们在进行目标检测网络训练时需要对照片进行分割,分割步骤如下:

选取需要类别 → 图片裁剪 → 标签数据自动抓取 → 格式转换txt2xml → 修改图片和标签文件名称 → 生成各类数据集索引文件

代码演示:

1. SelectShip.py

从有15种类别的DOTA数据集中筛选出需要的类别

import os
import shutil
import cv2

category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
                'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
                'basketball-court', 'storage-tank', 'soccer-ball-field',
                'roundabout', 'harbor', 'swimming-pool', 'helicopter']


def custombasename(fullname):
    return os.path.basename(os.path.splitext(fullname)[0])


def GetFileFromThisRootDir(dir, ext=None):
    allfiles = []
    needExtFilter = (ext != None)
    for root, dirs, files in os.walk(dir):
        for filespath in files:
            filepath = os.path.join(root, filespath)
            extension = os.path.splitext(filepath)[1][1:]
            if needExtFilter and extension in ext:
                allfiles.append(filepath)
            elif not needExtFilter:
                allfiles.append(filepath)
    return allfiles


if __name__ == '__main__':
    root1 = 'G:/DOTA/train'
    pic_path = os.path.join(root1, 'images')  # 样本图片路径
    label_path = os.path.join(root1, 'labelTxt')  # DOTA标签的所在路径
    label_list = GetFileFromThisRootDir(label_path)
    ship_pic = 'G:/DOTA/DOTAship/images'
    ship_label = 'G:/DOTA/DOTAship/labelTxt'
    for labelpath in label_list:
        n = 0
        f = open(labelpath, 'r')
        lines = f.readlines()
        splitlines = [x.strip().split(' ') for x in lines]  # 根据空格分割
        for i, splitline in enumerate(splitlines):
            if i in [0, 1]:  # DOTA数据集前两行对于我们来说是无用的
                continue
            catogory_name = splitline[8]  # 类别名称
            if catogory_name in catogory:
                n = n + 1
                if n > 2:  # 样本包含两个及以上的再挑选出来
                    name = custombasename(labelpath)  # 名称
                    oldlabelpath = labelpath
                    oldimgpath = os.path.join(pic_path, name + '.png')
                    img = cv2.imread(oldimgpath)
                    newlabelpath = os.path.join(ship_label, name + '.txt')
                    newimage_path = os.path.join(ship_pic, name + '.png')  # 如果要改变图像的后缀,就采用重写的方法
                    cv2.imwrite(newimage_path, img)
                    # shutil.copyfile(oldimgpath, newimage_path)
                    shutil.copyfile(oldlabelpath, newlabelpath)
                    break
2.ImgSplit.py

数据集影像切割为608*608

不足608的高或宽在在下或右方向补齐(左上角不动,因为标签数据原点为左上角)

import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt


def tianchong_you(img):
    size = img.shape
    #if size[0]>=608 and size[1]<608:
    # 这里的大小可以自己设定,但是尽量是32的倍数,并且与txt分割保持一致
    constant = cv2.copyMakeBorder(img,0,0,0,608-size[1],cv2.BORDER_CONSTANT,value=(107,113,115))#填充值为数据集均值
    #else:
     #    print('图像不符合要求')
      #   return 0
    return constant

def tianchong_xia(img):
    size = img.shape
   # if size[0]<608 and size[1]>=608:
    constant = cv2.copyMakeBorder(img,0,608-size[0],0,0,cv2.BORDER_CONSTANT,value=(107,113,115))
    #else:
     #    print('图像不符合要求')
      #   return 0
    return constant

def tianchong_xy(img):
    size = img.shape
    #if size[0]<608 and size[1]<608:
    constant = cv2.copyMakeBorder(img,0,608-size[0],0,608-size[1],cv2.BORDER_CONSTANT,value=(107,113,115))
   #else:
   #      print('图像不符合要求')
   #      return 0
    return constant

def caijian(path,path_out,size_w=608,size_h=608,step=576):#重叠度为32
    ims_list=os.listdir(path)
    #print(ims_list)
    count = 0
    for im_list in ims_list:
        number = 0
        name = im_list[:-4]#去处“.png后缀”
        print(name)
        img = cv2.imread(ims_path+im_list)
        size = img.shape
        if size[0]>=608 and size[1]>=608:
           count = count + 1
           for h in range(0,size[0]-1,step):
               star_h = h
               for w in range(0,size[1]-1,step):
                   star_w = w
                   end_h = star_h + size_h
                   if end_h > size[0]:
                      star_h = size[0] - size_h
                      end_h = star_h + size_h
                   end_w = star_w + size_w
                   if end_w > size[1]:
                      star_w = size[1] - size_w
                   end_w = star_w + size_w
                   cropped = img[star_h:end_h, star_w:end_w]
                   name_img = name + '_'+ str(star_h) +'_' + str(star_w)#用起始坐标来命名切割得到的图像,为的是方便后续标签数据抓取
                   cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
                   number = number + 1
        if size[0]>=608 and size[1]<608:
            print('图片{}需要在右面补齐'.format(name))
            count = count + 1
            img0 = tianchong_you(img)
            for h in range(0,size[0]-1,step):
               star_h = h
               star_w = 0
               end_h = star_h + size_h
               if end_h > size[0]:
                  star_h = size[0] - size_h
                  end_h = star_h + size_h
               end_w = star_w + size_w
               cropped = img0[star_h:end_h, star_w:end_w]
               name_img = name + '_'+ str(star_h) +'_' + str(star_w)
               cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
               number = number + 1
        if size[0]<608 and size[1]>=608:
            count = count + 1
            print('图片{}需要在下面补齐'.format(name))
            img0 = tianchong_xia(img)
            for w in range(0,size[1]-1,step):
               star_h = 0
               star_w = w
               end_w = star_w + size_w
               if end_w > size[1]:
                  star_w = size[1] - size_w
                  end_w = star_w + size_w
               end_h = star_h + size_h
               cropped = img0[star_h:end_h, star_w:end_w]
               name_img = name + '_'+ str(star_h) +'_' + str(star_w)
               cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
               number = number + 1
        if size[0]<608 and size[1]<608:
            count = count + 1
            print('图片{}需要在下面和右面补齐'.format(name))
            img0 = tianchong_xy(img)
            cropped = img0[0:608, 0:608]
            name_img = name + '_'+ '0' +'_' + '0'
            cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
            number = number + 1
        print('图片{}切割成{}张'.format(name,number))
        print('共完成{}张图片'.format(count))

if __name__ == '__main__':
    ims_path='G:/DOTA/DOTAship/images/'# 图像数据集的路径
   # txt_path = '/home/***/data/VOCdevkit/mydataset/Annotations/txt/'
    path = 'G:/DOTA/DOTAship/imageSplit/' #切割得到的数据集存放路径
    caijian(ims_path,path,size_w=608,size_h=608,step=576)
3.txtGrab.py

标签数据自动抓取

第16行 name = im_list[:-4]

如果图像后缀为.png .jpg等则为-4,若图像后缀为.tiff则为-5

import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
                'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
                'basketball-court', 'storage-tank', 'soccer-ball-field',
                'roundabout', 'harbor', 'swimming-pool', 'helicopter']

def tqtxt(path,path_txt,path_out,size_h=608,size_w=608):
    ims_list=os.listdir(path)
    # print(ims_list)
    for im_list in ims_list:
        # name_list = []
        name = im_list[:-4]
        name_list = name.split('_')
        # print(name_list)
        # print(len(name_list))
        if len(name_list)<2:
            continue
        h = int(name_list[1])
        w = int(name_list[2])
        txtpath = path_txt + name_list[0] + '.txt'
        txt_outpath = path_out + name + '.txt'
        f = open(txt_outpath,'a')
        with open(txtpath, 'r') as f_in:   #打开txt文件
             i = 0
             lines = f_in.readlines()
             #print(len(lines))
             #splitlines = [x.strip().split(' ') for x in lines]  #根据空格分割
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)#txt前两行直接复制过去
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]
                 if label not in category_set:#只书写指定的类别
                     continue
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))
                 if w<=x1<=w+size_w and w<=x2<=w+size_w and w<=x3<=w+size_w and w<=x4<=w+size_w and h<=y1<=h+size_h and h<=y2<=h+size_h and h<=y3<=h+size_h and h<=y4<=h+size_h:
                     f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
        f.close()

if __name__ == '__main__':
    ims_path='G:/DOTA/DOTAship/imageSplit/'# 图像数据集的路径
    txt_path = 'G:/DOTA/DOTAship/labelTxt/'#原数据集标签文件
    path = 'G:/DOTA/DOTAship/labelTxtSplit/'#切割后数据集的标签文件存放路径
    tqtxt(ims_path,txt_path,path,size_h=608,size_w=608)
4.txt2xml.py

数据格式转换 / 两种矩形框的xml可选

hbb(水平矩形框):xmin ymin xmax ymax

obb(旋转矩形框):x0 y0 x1 y1 x2 y2 x3 y3

根据需要选择(如果生成obb,修改260行和266行的hbb=True为False)

import os
import cv2
from xml.dom.minidom import Document
# import importlib,sys


# stdi, stdo, stde = sys.stdin, sys.stdout, sys.stderr
# importlib.reload(sys)
# sys.setdefaultencoding('utf-8')
# sys.stdin, sys.stdout, sys.stderr = stdi, stdo, stde


category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
                'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
                'basketball-court', 'storage-tank', 'soccer-ball-field',
                'roundabout', 'harbor', 'swimming-pool', 'helicopter']


def custombasename(fullname):
    return os.path.basename(os.path.splitext(fullname)[0])


def limit_value(a, b):
    if a < 1:
        a = 1
    if a >= b:
        a = b - 1
    return a


def readlabeltxt(txtpath, height, width, hbb=True):
    print(txtpath)
    with open(txtpath, 'r') as f_in:  # 打开txt文件
        lines = f_in.readlines()
        splitlines = [x.strip().split(' ') for x in lines]  # 根据空格分割
        boxes = []
        for i, splitline in enumerate(splitlines):
            if i in [0, 1]:  # DOTA数据集前两行对于我们来说是无用的
                continue
            label = splitline[8]
            kunnan = splitline[9]
            if label not in category_set:  # 只书写制定的类别
                print(label)
                continue
            x1 = int(float(splitline[0]))
            y1 = int(float(splitline[1]))
            x2 = int(float(splitline[2]))
            y2 = int(float(splitline[3]))
            x3 = int(float(splitline[4]))
            y3 = int(float(splitline[5]))
            x4 = int(float(splitline[6]))
            y4 = int(float(splitline[7]))
            # 如果是hbb
            if hbb:
                xx1 = min(x1, x2, x3, x4)
                xx2 = max(x1, x2, x3, x4)
                yy1 = min(y1, y2, y3, y4)
                yy2 = max(y1, y2, y3, y4)

                xx1 = limit_value(xx1, width)
                xx2 = limit_value(xx2, width)
                yy1 = limit_value(yy1, height)
                yy2 = limit_value(yy2, height)

                box = [xx1, yy1, xx2, yy2, label, kunnan]
                boxes.append(box)
            else:  # 否则是obb
                x1 = limit_value(x1, width)
                y1 = limit_value(y1, height)
                x2 = limit_value(x2, width)
                y2 = limit_value(y2, height)
                x3 = limit_value(x3, width)
                y3 = limit_value(y3, height)
                x4 = limit_value(x4, width)
                y4 = limit_value(y4, height)

                box = [x1, y1, x2, y2, x3, y3, x4, y4, label, kunnan]
                boxes.append(box)
    return boxes


def writeXml(tmp, imgname, w, h, d, bboxes, hbb=True):
    doc = Document()
    # owner
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
    # owner
    folder = doc.createElement('folder')
    annotation.appendChild(folder)
    folder_txt = doc.createTextNode("VOC2007")
    folder.appendChild(folder_txt)

    filename = doc.createElement('filename')
    annotation.appendChild(filename)
    filename_txt = doc.createTextNode(imgname)
    filename.appendChild(filename_txt)
    # ones#
    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    source.appendChild(database)
    database_txt = doc.createTextNode("My Database")
    database.appendChild(database_txt)

    annotation_new = doc.createElement('annotation')
    source.appendChild(annotation_new)
    annotation_new_txt = doc.createTextNode("VOC2007")
    annotation_new.appendChild(annotation_new_txt)

    image = doc.createElement('image')
    source.appendChild(image)
    image_txt = doc.createTextNode("flickr")
    image.appendChild(image_txt)
    # owner
    owner = doc.createElement('owner')
    annotation.appendChild(owner)

    flickrid = doc.createElement('flickrid')
    owner.appendChild(flickrid)
    flickrid_txt = doc.createTextNode("NULL")
    flickrid.appendChild(flickrid_txt)

    ow_name = doc.createElement('name')
    owner.appendChild(ow_name)
    ow_name_txt = doc.createTextNode("idannel")
    ow_name.appendChild(ow_name_txt)
    # onee#
    # twos#
    size = doc.createElement('size')
    annotation.appendChild(size)

    width = doc.createElement('width')
    size.appendChild(width)
    width_txt = doc.createTextNode(str(w))
    width.appendChild(width_txt)

    height = doc.createElement('height')
    size.appendChild(height)
    height_txt = doc.createTextNode(str(h))
    height.appendChild(height_txt)

    depth = doc.createElement('depth')
    size.appendChild(depth)
    depth_txt = doc.createTextNode(str(d))
    depth.appendChild(depth_txt)
    # twoe#
    segmented = doc.createElement('segmented')
    annotation.appendChild(segmented)
    segmented_txt = doc.createTextNode("0")
    segmented.appendChild(segmented_txt)

    for bbox in bboxes:
        # threes#
        object_new = doc.createElement("object")
        annotation.appendChild(object_new)

        name = doc.createElement('name')
        object_new.appendChild(name)
        name_txt = doc.createTextNode(str(bbox[-2]))
        name.appendChild(name_txt)

        pose = doc.createElement('pose')
        object_new.appendChild(pose)
        pose_txt = doc.createTextNode("Unspecified")
        pose.appendChild(pose_txt)

        truncated = doc.createElement('truncated')
        object_new.appendChild(truncated)
        truncated_txt = doc.createTextNode("0")
        truncated.appendChild(truncated_txt)

        difficult = doc.createElement('difficult')
        object_new.appendChild(difficult)
        difficult_txt = doc.createTextNode(bbox[-1])
        difficult.appendChild(difficult_txt)
        # threes-1#
        bndbox = doc.createElement('bndbox')
        object_new.appendChild(bndbox)

        if hbb:
            xmin = doc.createElement('xmin')
            bndbox.appendChild(xmin)
            xmin_txt = doc.createTextNode(str(bbox[0]))
            xmin.appendChild(xmin_txt)

            ymin = doc.createElement('ymin')
            bndbox.appendChild(ymin)
            ymin_txt = doc.createTextNode(str(bbox[1]))
            ymin.appendChild(ymin_txt)

            xmax = doc.createElement('xmax')
            bndbox.appendChild(xmax)
            xmax_txt = doc.createTextNode(str(bbox[2]))
            xmax.appendChild(xmax_txt)

            ymax = doc.createElement('ymax')
            bndbox.appendChild(ymax)
            ymax_txt = doc.createTextNode(str(bbox[3]))
            ymax.appendChild(ymax_txt)
        else:
            x0 = doc.createElement('x0')
            bndbox.appendChild(x0)
            x0_txt = doc.createTextNode(str(bbox[0]))
            x0.appendChild(x0_txt)

            y0 = doc.createElement('y0')
            bndbox.appendChild(y0)
            y0_txt = doc.createTextNode(str(bbox[1]))
            y0.appendChild(y0_txt)

            x1 = doc.createElement('x1')
            bndbox.appendChild(x1)
            x1_txt = doc.createTextNode(str(bbox[2]))
            x1.appendChild(x1_txt)

            y1 = doc.createElement('y1')
            bndbox.appendChild(y1)
            y1_txt = doc.createTextNode(str(bbox[3]))
            y1.appendChild(y1_txt)

            x2 = doc.createElement('x2')
            bndbox.appendChild(x2)
            x2_txt = doc.createTextNode(str(bbox[4]))
            x2.appendChild(x2_txt)

            y2 = doc.createElement('y2')
            bndbox.appendChild(y2)
            y2_txt = doc.createTextNode(str(bbox[5]))
            y2.appendChild(y2_txt)

            x3 = doc.createElement('x3')
            bndbox.appendChild(x3)
            x3_txt = doc.createTextNode(str(bbox[6]))
            x3.appendChild(x3_txt)

            y3 = doc.createElement('y3')
            bndbox.appendChild(y3)
            y3_txt = doc.createTextNode(str(bbox[7]))
            y3.appendChild(y3_txt)

    xmlname = os.path.splitext(imgname)[0]
    tempfile = os.path.join(tmp, xmlname + '.xml')
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return


if __name__ == '__main__':
    data_path = 'G:/DOTA/DOTAship/imageSplit'
    images_path = os.path.join(data_path)  # 样本图片路径
    labeltxt_path = os.path.join('G:/DOTA/DOTAship/labelTxtSplit')  # DOTA标签的所在路径
    anno_new_path = os.path.join('G:/DOTA/DOTAship/Annotation')  # 新的voc格式存储位置(hbb形式)
    ext = '.png'  # 样本图片的后缀
    filenames = os.listdir(labeltxt_path)  # 获取每一个txt的名称
    for filename in filenames:
        filepath = labeltxt_path + '/' + filename  # 每一个DOTA标签的具体路径
        picname = os.path.splitext(filename)[0] + ext
        pic_path = os.path.join(images_path, picname)
        im = cv2.imread(pic_path)  # 读取相应的图片
        (H, W, D) = im.shape  # 返回样本的大小
        boxes = readlabeltxt(filepath, H, W, hbb=True)  # 默认是矩形(hbb)得到gt
        if len(boxes) == 0:
            print('文件为空', filepath)
        # 读取对应的样本图片,得到H,W,D用于书写xml

        # 书写xml
        writeXml(anno_new_path, picname, W, H, D, boxes, hbb=True)
        print('正在处理%s' % filename)
5.Rename.py

批量修改文件夹中文件名为(000000)格式,方便直接替换VOC数据集进行训练

import os
path = 'G:/DOTA/DOTAship/labelTxtSplit'
filelist = os.listdir(path) #该文件夹下所有的文件(包括文件夹)
count=0
for file in filelist:
    print(file)
for file in filelist:   #遍历所有文件
    Olddir=os.path.join(path,file)   #原来的文件路径
    if os.path.isdir(Olddir):   #如果是文件夹则跳过
        continue
    filename=os.path.splitext(file)[0]   #文件名
    filetype=os.path.splitext(file)[1]   #文件扩展名
    Newdir=os.path.join(path,str(count).zfill(6)+filetype)  #用字符串函数zfill 以0补全所需位数
    os.rename(Olddir,Newdir)#重命名
    count+=1
6.ImageSets.py

制作ImageSets文件夹下Main文件夹中的4个文件(test.txttrain.txttrainval.txtval.txt

test.txt 测试集 train.txt 训练集 val.txt 验证集 trainval.txt训练和验证集

在原始VOC2007数据集中,trainval大约占整个数据集的50%,test大约为整个数据集的50%;train大约是trainval的50%,val大约为trainval的50%,按照此标准生成txt文件

import os
import random

trainval_percent = 0.5
train_percent = 0.5
xmlfilepath = 'G:/DOTA/DOTAship/Annotation'
txtsavepath = 'G:/DOTA/DOTAship/ImageSets/Main'
total_xml = os.listdir(xmlfilepath)

num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
### 将DOTA数据集标签转换为COCO格式 要将DOTA数据集的标签文件转换为COCO格式,可以按照以下方法实现。此过程涉及解析原始DOTA标注文件将其重新结构化以适应COCO的数据模型。 #### DOTA 数据集概述 DOTA 是一种面向遥感图像的目标检测数据集,其标注通常由旋转矩形框表示。这些旋转框通过五个参数定义:`x_center, y_center, width, height, angle`[^1]。而 COCO 格式的标注则基于标准矩形边界框(即 `xmin, ymin, xmax, ymax`),因此需要进行一定的转换操作。 --- #### 转换流程说明 ##### 1. 解析 DOTADATA 集中的 TXT 文件 DOTA 的标注存储在 `.txt` 文件中,每行对应一个目标实例,格式如下: ``` 类别名 x_cen y_cen w h θ ``` 其中: - `(x_cen, y_cen)` 表示旋转框中心坐标, - `w` 和 `h` 分别代表宽度和高度, - `θ` 是角度值(单位为弧度或度数)。 可以通过 Python 编写脚本来读取这些文件,提取所需的信息。 ##### 2. 计算对应的水平边框 (Bounding Box) 由于 COCO 使用的是轴对齐的矩形框,所以需将旋转框映射到最小外接矩形上。这一步可通过 OpenCV 或其他几何库完成: ```python import cv2 import numpy as np def rotated_box_to_bbox(rotated_box): """ Convert a single rotated box to its axis-aligned bounding box. Args: rotated_box: List or array of shape (5,), representing [xc, yc, w, h, theta]. Returns: A list with format [xmin, ymin, xmax, ymax], which is the corresponding bbox. """ xc, yc, w, h, theta = rotated_box # Create points for the rotated rectangle corners = np.array([[-w / 2, -h / 2], [ w / 2, -h / 2], [-w / 2, h / 2], [ w / 2, h / 2]]) rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) rotated_corners = np.dot(corners, rotation_matrix.T) + [[xc, yc]] xmin, ymin = np.min(rotated_corners, axis=0).astype(int) xmax, ymax = np.max(rotated_corners, axis=0).astype(int) return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)] ``` 上述函数接受单个旋转框作为输入,返回相应的水平边界框。 ##### 3. 构建 COCO JSON 结构 COCO 数据集的标准 JSON 文件包含三个主要部分:“images”,“annotations”以及“categories”。以下是各部分内容的具体构建方式: - **Images**: 每张图片应有一个条目,记录 ID、文件路径及其他元信息。 - **Annotations**: 对于每个对象实例,提供唯一的 annotation_id、所属 image_id、category_id 及包围盒位置等字段。 - **Categories**: 列举所有可能的物体分类及其索引编号。 下面展示了一个完整的生成逻辑样例代码片段: ```python import os from tqdm import tqdm class DotaToCocoConverter: def __init__(self, dota_dir, output_json_path): self.dota_dir = dota_dir self.output_json_path = output_json_path self.images_info = [] self.annotations = [] self.categories = [{"id": i+1, "name": cls} for i, cls in enumerate(self.get_dota_classes())] @staticmethod def get_dota_classes(): """Return all class names defined by DOTA.""" return ['plane', 'ship', ... ] # Add other classes here. def convert_and_save(self): img_idx = ann_idx = 1 labels_folder = os.path.join(self.dota_dir, 'labelTxt') images_folder = os.path.join(self.dota_dir, 'images') label_files = sorted(os.listdir(labels_folder)) total_images = len(label_files) for fname in tqdm(label_files[:total_images]): full_label_path = os.path.join(labels_folder, fname) bboxes, categories = self.parse_single_txt(full_label_path) filename = f"{fname.split('.')[0]}.png" im_height, im_width = self._get_image_size(os.path.join(images_folder, filename)) self.images_info.append({ "file_name": filename, "height": im_height, "width": im_width, "id": img_idx}) for cat, bbox in zip(categories, bboxes): converted_bb = rotated_box_to_bbox(bbox) area = converted_bb[2]*converted_bb[3] self.annotations.append({ "segmentation": [], "area": float(area), "iscrowd": 0, "image_id": img_idx, "bbox": converted_bb, "category_id": self.categories.index({"name":cat})+1, "id": ann_idx}) ann_idx += 1 img_idx += 1 final_dict = { "info": {"description":"Converted from DOTA"}, "licenses":[], "images": self.images_info, "annotations": self.annotations, "categories": self.categories} import json with open(output_json_path,'w')as jf: json.dump(final_dict,jf) @staticmethod def _get_image_size(image_file): img_cv = cv2.imread(image_file) return img_cv.shape[:2][::-1] if __name__ == "__main__": converter = DotaToCocoConverter("/path/to/dataset", "/desired/output.json") converter.convert_and_save() ``` 以上程序实现了从 DOTA 至 COCO 的自动化迁移功能。 --- #### 注意事项 尽管该方案能够有效执行基本转化任务,但在实际应用过程中仍可能存在某些局限性。例如,当目标具有较大倾斜角时,简单投影可能导致显著偏差;另外,不同版本间细微差异也可能影响最终效果。 ---
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值