叠加前景和背景生成voc格式目标检测样本图片和其对应的xml标注文件 python&opencv

本文链接：https://blog.youkuaiyun.com/ning_yi/article/details/114255936

自己生成一些训练数据，使用labelImg打开效果是这样：
在这里插入图片描述
代码：

import cv2
import os
import random
import xml.dom.minidom
from math import *


def spin(img,ang): # 旋转前景目标
    height, width = img.shape[:2]
    degree = ang
    # 旋转后的尺寸
    heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree))))
    widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree))))

    matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1)

    matRotation[0, 2] += (widthNew - width) / 2
    matRotation[1, 2] += (heightNew - height) / 2

    rx=random.randint(1,width-1)
    ry=random.randint(1,height-1)
    boardColor=img[ry,rx]
    r=int(boardColor[0])
    g=int(boardColor[1])
    b=int(boardColor[2])

    imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(r,g,b))

    return imgRotation


# 背景图片命名从0.jpg开始，按数字命名
# 前景图片放在itemPathF的子文件夹下，每类文件夹按数字从0开始
# 每类前景图片命名也与背景一样从0.jpg开始
classnum = 5  # 类别数
samplenum = 110  # 每类样本数
itemIndex = [0 for i in range(0, classnum)]
bacgroundPath = "E:\\data\\pic\\back2\\"  # 背景图片路径
itemPathF = "E:\\data\\pic\\"  # 前景父目录
xmlPath = "E:\\data\\pic\\xml\\"  # 标注文件存储路径
sampleDst = "E:\\data\\pic\\sample\\"  # 生成的样本存储路径
itemNumRange = 3  # 每个背景图片随机叠加1-3个前景

if __name__ == '__main__':
    for subDir in os.listdir(bacgroundPath):
        img_file = subDir
        backImgPath=bacgroundPath+subDir
        print(backImgPath)
        backImg = cv2.imread(backImgPath)
        backImgH, backImgW = backImg.shape[:2]

        # create an empty dom document object
        doc = xml.dom.minidom.Document()
        # creat a root node which name is annotation
        annotation = doc.createElement('annotation')
        # add the root node to the dom document object
        doc.appendChild(annotation)

        # add the folder subnode
        folder = doc.createElement('folder')
        folder_text = doc.createTextNode('JPEGImages')
        folder.appendChild(folder_text)
        annotation.appendChild(folder)

        # add the filename subnode
        filename = doc.createElement('filename')
        filename_text = doc.createTextNode(img_file)
        filename.appendChild(filename_text)
        annotation.appendChild(filename)

        # add the path subnode
        path = doc.createElement('path')
        path_text = doc.createTextNode('./JPEGImages/' + img_file)
        path.appendChild(path_text)
        annotation.appendChild(path)

        # add the source subnode
        source = doc.createElement('source')
        database = doc.createElement('database')
        database_text = doc.createTextNode('Unknown')
        source.appendChild(database)
        database.appendChild(database_text)
        annotation.appendChild(source)

        # add the size subnode
        size = doc.createElement('size')
        width = doc.createElement('width')
        width_text = doc.createTextNode(str(backImgW))
        height = doc.createElement('height')
        height_text = doc.createTextNode(str(backImgH))
        depth = doc.createElement('depth')
        depth_text = doc.createTextNode('3')
        size.appendChild(width)
        width.appendChild(width_text)
        size.appendChild(height)
        height.appendChild(height_text)
        size.appendChild(depth)
        depth.appendChild(depth_text)
        annotation.appendChild(size)

        segmented = doc.createElement('segmented')
        segmented_text = doc.createTextNode('0')
        segmented.appendChild(segmented_text)
        annotation.appendChild(segmented)

        itemNum = random.randint(1, itemNumRange)
        for i in range(0, itemNum):
            itemClass = random.randint(0, classnum)
            itemIndex[itemClass] += 1
            if itemIndex[itemClass] > samplenum:
                itemIndex[itemClass] = 1
            itemPath = itemPathF + str(itemClass) + "\\"+str(itemIndex[itemClass])+".jpg"
            print(itemPath)
            item=cv2.imread(itemPath)  # 读取前景图片

            #范围内随机缩放
            #if itemClass == 2:
            #    itemH, itemW = item.shape[:2]
            #    item = cv2.resize(item, (100, int(100/itemW*itemH)))
            #elif itemClass == 0 or itemClass == 1 or itemClass == 3:
            #    itemH, itemW = item.shape[:2]
            #    randomPixel=random.randint(90,110)
            #    item = cv2.resize(item, (randomPixel, int(randomPixel/itemW*itemH)))
            #else:
            #    itemH, itemW = item.shape[:2]
            #    randomPixel = random.randint(190, 220)
            #    item = cv2.resize(item, (randomPixel, int(randomPixel / itemW * itemH)))

            # 随机进行90度旋转
            angle = random.randint(0, 1)
            if angle==1:
                item = spin(item,90)

            # 随机位置
            itemH, itemW = item.shape[:2]
            x = random.randint(10, (backImgW-itemW))
            y = random.randint(int(2*backImgH/5), (backImgH-itemH))

            # 叠加前景背景
            backImg[y:y+itemH, x:x+itemW] = item

            # 前景的xml标注生成
            object = doc.createElement('object')
            name = doc.createElement('name')
            name_text = doc.createTextNode(str(itemClass))
            difficult = doc.createElement('difficult')
            difficult_text = doc.createTextNode('0')
            pose = doc.createElement('pose')
            pose_text = doc.createTextNode('Unspecified')
            truncated = doc.createElement('truncated')
            truncated_text = doc.createTextNode('0')
            bndbox = doc.createElement('bndbox')
            xmin = doc.createElement('xmin')
            xmin_text = doc.createTextNode(str(x))
            ymin = doc.createElement('ymin')
            ymin_text = doc.createTextNode(str(y))
            xmax = doc.createElement('xmax')
            xmax_text = doc.createTextNode(str(x+itemW))
            ymax = doc.createElement('ymax')
            ymax_text = doc.createTextNode(str(y+itemH))
            object.appendChild(name)
            name.appendChild(name_text)
            object.appendChild(pose)
            pose.appendChild(pose_text)
            object.appendChild(truncated)
            truncated.appendChild(truncated_text)
            object.appendChild(difficult)
            difficult.appendChild(difficult_text)
            object.appendChild(bndbox)
            bndbox.appendChild(xmin)
            xmin.appendChild(xmin_text)
            bndbox.appendChild(ymin)
            ymin.appendChild(ymin_text)
            bndbox.appendChild(xmax)
            xmax.appendChild(xmax_text)
            bndbox.appendChild(ymax)
            ymax.appendChild(ymax_text)
            annotation.appendChild(object)

        imgName = str(int(img_file.split(".")[0])+501)

        cv2.imwrite(sampleDst + "%s.jpg" % imgName, backImg)

        fp = open(xmlPath + '%s.xml' % imgName, 'w+')
        doc.writexml(fp, indent='\t', addindent='\t', newl='\n', encoding='utf-8')
        fp.close()