【python脚本】python实现xml及json文件角度，亮度数据集增强_nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle)-优快云博客

该博客介绍了自用脚本，用于解决模型训练数据集过少问题。包含XML数据集角度和亮度数据增强，以及json数据集角度数据增强，分别给出对应脚本名，还列出了参考文章。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

基本思想：自用脚本，通过labelImg或labelme制作完标签后，解决模型训练数据集过少问题。

一、XML数据集角度数据增强

angle_aug.py

import cv2
import math
import numpy as np
import os
import pdb
import xml.etree.ElementTree as ET


class ImgAugemention():
    def __init__(self):
        self.angle = 90

    # 旋转图像
    def rotate_image(self, src, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        # convet angle into rad
        rangle = np.deg2rad(angle)  # angle in radians
        # 计算新图像的宽度和高度
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # 向OpenCV查询旋转矩阵
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # 计算从旧中心到新中心的移动
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # map
        return cv2.warpAffine(
            src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
            flags=cv2.INTER_LANCZOS4)

    def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        rangle = np.deg2rad(angle)  # angle in radians
        # now calculate new image width and height
        # get width and heigh of changed image
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # rot_mat: the final rot matrix
        # get the four center of edges in the initial martix，and convert the coord
        point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
        point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
        point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
        point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
        # concat np.array
        concat = np.vstack((point1, point2, point3, point4))
        # change type
        concat = concat.astype(np.int32)
        print(concat)
        rx, ry, rw, rh = cv2.boundingRect(concat)
        return rx, ry, rw, rh

    def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list):
        # assign the rot angles
        num = 1320
        for angle in angle_list:
            for img_name in os.listdir(imgs_path):
                # split filename and suffix
                n, s = os.path.splitext(img_name)
                # for the sake of use yol model, only process '.jpg'
                if s == ".jpg":
                    img_path = os.path.join(imgs_path, img_name)
                    img = cv2.imread(img_path)
                    rotated_img = self.rotate_image(img, angle)
                    # 写入图像
                    # cv2.imwrite(img_save_path + n + "_" + str(angle) + "d.jpeg", rotated_img)
                    cv2.imwrite(img_save_path + "%d.jpg" % num , rotated_img)
                    
                    print("log: [%sd] %s is processed." % (angle, img))
                    xml_url = img_name.split('.')[0] + '.xml'
                    xml_path = os.path.join(xmls_path, xml_url)
                    tree = ET.parse(xml_path)
                    root = tree.getroot()
                    
                    height, width, _ = rotated_img.shape
                    # 修改 XML 中的 <width> 和 <height> 元素的值
                    for size_element in root.findall('.//size'):
                        width_element = size_element.find('width')
                        height_element = size_element.find('height')
                        width_element.text = str(width)
                        height_element.text = str(height)
                    
                    for box in root.iter('bndbox'):
                        xmin = float(box.find('xmin').text)
                        ymin = float(box.find('ymin').text)
                        xmax = float(box.find('xmax').text)
                        ymax = float(box.find('ymax').text)
                        x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
                        # change the coord
                        box.find('xmin').text = str(x)
                        box.find('ymin').text = str(y)
                        box.find('xmax').text = str(x+w)
                        box.find('ymax').text = str(y+h)
                        box.set('updated', 'yes')
                    # write into new xml
                    # tree.write(xml_save_path + n + "_" + str(angle) + "d.xml")
                    tree.write(xml_save_path + "%d.xml" % num)
                    num += 1
                print("[%s] %s is processed." % (angle, img_name))
        print(num)


if __name__ == '__main__':
    img_aug = ImgAugemention()
    imgs_path = '/home/ubuntu/img/item_img/'
    img_save_path = '/home/ubuntu/aug_datasets/'

    xmls_path = '/home/ubuntu/img/item_ano/'    
    xml_save_path = '/home/ubuntu/aug_datasets/'
    # 对所有图片旋转的度数
    angle_list = [-90, 0, 90, 180]  # 角度
    img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list)

二、XML数据集亮度数据增强

light_aug.py

# coding=utf-8
import os
import cv2
import math
import numpy as np
import shutil
from PIL import Image
from PIL import ImageEnhance

"""
1、对比度：白色画面(最亮时)下的亮度除以黑色画面(最暗时)下的亮度；
2、色彩饱和度：：彩度除以明度，指色彩的鲜艳程度，也称色彩的纯度；
3、色调：向负方向调节会显现红色，正方向调节则增加黄色。适合对肤色对象进行微调；
4、锐度：是反映图像平面清晰度和图像边缘锐利程度的一个指标。
"""


def compute(img):
    per_image_Rmean = []
    per_image_Gmean = []
    per_image_Bmean = []
    per_image_Bmean.append(np.mean(img[:, :, 0]))
    per_image_Gmean.append(np.mean(img[:, :, 1]))
    per_image_Rmean.append(np.mean(img[:, :, 2]))
    R_mean = np.mean(per_image_Rmean)
    G_mean = np.mean(per_image_Gmean)
    B_mean = np.mean(per_image_Bmean)
    return math.sqrt(0.241 * (R_mean ** 2) + 0.691 * (G_mean ** 2) + 0.068 * (B_mean ** 2))

def fun_color(image, coefficient, path_save):
    # 色度,增强因子为1.0是原始图像
    # 色度增强 1.5
    # 色度减弱 0.8
    enh_col = ImageEnhance.Color(image)
    image_colored1 = enh_col.enhance(coefficient)
    image_colored1.save(path_save)


def fun_Contrast(image, coefficient, path_save):
    # 对比度，增强因子为1.0是原始图片
    # 对比度增强 1.5
    # 对比度减弱 0.8
    enh_con = ImageEnhance.Contrast(image)
    image_contrasted1 = enh_con.enhance(coefficient)
    image_contrasted1.save(path_save)

def fun_Sharpness(image, coefficient, path_save):
    # 锐度，增强因子为1.0是原始图片
    # 锐度增强 3
    # 锐度减弱 0.8
    enh_sha = ImageEnhance.Sharpness(image)
    image_sharped1 = enh_sha.enhance(coefficient)
    image_sharped1.save(path_save)
def fun_bright(image, coefficient, path_save):
    # 变亮 1.5
    # 变暗 0.8
    # 亮度增强,增强因子为0.0将产生黑色图像； 为1.0将保持原始图像。
    enh_bri = ImageEnhance.Brightness(image)
    image_brightened1 = enh_bri.enhance(coefficient)
    image_brightened1.save(path_save)
'''
def show_all():
    file_root = "C:/Users/xx/Desktop/kaiguan/image/"
    xml_root = "C:/Users/xx/Desktop/kaiguan/xml/"
    save_root = "C:/Users/xx/Desktop/kaiguan_aug/JPEGImages/"
    xml_save = "C:/Users/xx/Desktop/kaiguan_aug/Annotations/"
    list_file = os.listdir(file_root)
    cnt = 0
    for img_name in list_file:
        cnt += 1
        print("cnt=%d,img_name=%s" % (cnt, img_name))
        path = file_root + img_name
        name = img_name.replace(".jpg", "")
        image = Image.open(path)
        list_coe = [0.5,1,3]
        for val in list_coe:
            # 亮度
            path_save_bright = save_root + name + "_bri_" + str(val) + ".jpg"
            fun_bright(image, val, path_save_bright)

            # 色调
            path_save_color = save_root + name + "_color_" + str(val) + ".jpg"
            fun_color(image, val, path_save_color)

            # 对比度
            path_save_contra = save_root + name + "_contra_" + str(val) + ".jpg"
            fun_Contrast(image, val, path_save_contra)

            # 锐度
            path_save_sharp = save_root + name + "_sharp_" + str(val) + ".jpg"
            fun_Sharpness(image, val, path_save_sharp)
'''

def my_aug():
    file_root = '/home/ubuntu/ball_img/angle_img/'
    save_root = '/home/ubuntu/ball_img/light_img/'
   
    xml_root = '/home/ubuntu/ball_img/angle_xml/'
    xml_save = '/home/ubuntu/ball_img/light_xml/'
    list_file = os.listdir(file_root)
    cnt = 0
    mun = 1041
    for img_name in list_file:
        cnt += 1
        print("cnt=%d,img_name=%s" % (cnt, img_name))
        path = file_root + img_name
        name = img_name.replace(".jpg", "")
        image = Image.open(path)
        img = cv2.imread(path)
        mean_1 = compute(img)
        cof = 0.0
        if mean_1 != 0:
            cof = 0.7
        # if mean_1 < 40:
        #     cof = 0.8
        # elif mean_1 < 50:
        #     cof = 0.8
        # elif mean_1 < 60:
        #     cof = 0.8
        # elif mean_1 < 70:
        #     cof = 0.8
        # elif mean_1 < 80:
        #     cof = 0.8
        # elif mean_1 < 90:
        #     cof = 0.8
        # elif mean_1 < 100:
        #     cof = 0.8
        # elif mean_1 < 110:
        #     cof = 0.3
        # elif mean_1 > 130:
        #      cof = 0
        else:
             cof = 0.81

        cof_contrast = 0.0
        if cof>1:
            cof_contrast = 1.5
        else:
            cof_contrast = 0.9
        xmlpath = xml_root + name + '.xml'
        
        # path_save_bright = save_root + name + '.jpg'
        # path_save_bright_xml = xml_save + name + '.jpg'
        path_save_bright = save_root + '%d.jpg' % mun
        path_save_bright_xml = xml_save + '%d.jpg' % mun
        mun += 1
        shutil.copy(xmlpath,path_save_bright_xml.replace(".jpg", ".xml"))
        fun_bright(image, cof, path_save_bright)

        # path_save_sharp = save_root + name + "_sharp_" + str(3) + '.jpg'
        # path_save_sharp_xml = xml_save + name + "_sharp_" + str(3) + '.jpg'
        # shutil.copy(xmlpath, path_save_sharp_xml.replace(".jpg", ".xml"))
        # fun_Sharpness(image, 2, path_save_sharp)

        # path_save_contra = save_root + name + "_contra_" + str(cof_contrast) + ".jpg"
        # path_save_contra_xml = xml_save + name + "_contra_" + str(cof_contrast) + ".jpg"
        # shutil.copy(xmlpath, path_save_contra_xml.replace(".jpg", ".xml"))
        # fun_Contrast(image, cof_contrast, path_save_contra)

        # path_save_color = save_root + name + "_color_" + str(0.5) + ".jpg"
        # path_save_color_xml = xml_save + name + "_color_" + str(0.5) + ".jpg"
        # shutil.copy(xmlpath,path_save_color_xml.replace(".jpg", ".xml"))
        # fun_color(image, 1.5, path_save_color)


if __name__ == "__main__":

    #show_all()
    my_aug()

三、json数据集角度数据增强

json_data_aug.py

# -*- coding: utf-8 -*-
import os
import sys
import json
import io
import random
import re
import cv2
import numpy as np
from random import choice
import math
 
source_path = r'datasets'
destination_path = r'data_aug'

      


# angle=[]
# for item in range(0,-10,-2):
#     angle.append(item)

angle = [-4,-3,-2,-1,0,1,2,3,4] # 角度设定

for angle_item in angle:
    article_info = {}
    data_json = json.loads(json.dumps(article_info))
    data_json['version'] = '5.0.1'
    data_json['flags'] = {}
 
    data_json["lineColor"] = [
        0,
        255,
        0,
        128
    ]
    data_json["fillColor"] = [
        255,
        0,
        0,
        128
    ]
 
 
    def file_name(file_dir):
        L = []
        for root, dirs, files in os.walk(file_dir):
            for file in files:
                if os.path.splitext(file)[1] == '.json':
                    L.append(os.path.join(root, file))
            return L
 
 
    def rotation_point(img, angle,pts):
        cols = img.shape[1]
        rows = img.shape[0]
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
        heightNew = int(cols * math.fabs(math.sin(math.radians(angle))) + rows * math.fabs(math.cos(math.radians(angle))))
        widthNew = int(rows * math.fabs(math.sin(math.radians(angle))) + cols * math.fabs(math.cos(math.radians(angle))))
        M[0, 2] += (widthNew - cols) / 2
        M[1, 2] += (heightNew - rows) / 2
        img = cv2.warpAffine(img, M, (widthNew, heightNew))
        pts = cv2.transform(np.asarray(pts, dtype=np.float64).reshape((-1, 1, 2)), M)
        
        return img,pts
    
    
    def rotate_image(img, angle, pts, scale=1.):
        w = img.shape[1]
        h = img.shape[0]
        
        rangle = np.deg2rad(angle)
        
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0]))
        
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        
        img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
        pts = cv2.transform(np.asarray(pts, dtype=np.float64).reshape((-1, 1, 2)), rot_mat)
        # cv2.imwrite("test/0.jpg", img)
        return img,pts
    


    for name in enumerate(file_name(source_path)):
        shape_json = []
        m_path = name[1]
        dir = os.path.dirname(m_path)
        file_json = io.open(m_path, 'r', encoding='utf-8')
        json_data = file_json.read()
        data = json.loads(json_data)
        data_json['imageData'] = None
        data_name = data['imagePath']
        data_path = os.path.join(dir ,data_name)
        object_name = os.path.splitext(data['imagePath'])[0]
        data_new_json_name =None
        list_point=[]
        for i in range(len(data['shapes'])):
            m_name_0 = data['shapes'][i]['label']
            print('m_name_0=', m_name_0)
            item_point = []
            for j in range(len(data['shapes'][i]['points'])):
                print(data['shapes'][i]['points'][j][0],data['shapes'][i]['points'][j][1])
                item_point.append([data['shapes'][i]['points'][j][0],data['shapes'][i]['points'][j][1]])
 
 
 
            data_json_fill_color=None
            data_json_rec = data['shapes'][i]['shape_type']
            img = cv2.imread(data_path)
 
            # im_rotate,item_point = rotation_point(img, angle_item,np.asarray(item_point))
            im_rotate,item_point = rotate_image(img, angle_item, np.asarray(item_point))
            item_point=np.squeeze(item_point).tolist()
            print(item_point)
            (filename, extension) = os.path.splitext(data_name)
            data_new_picture_name = os.path.join(destination_path ,filename+ ".".join([str(angle_item), "jpg"]))
            data_new_json_name = os.path.join(destination_path , filename+".".join([str(angle_item), "json"]))
            data_json['imagePath'] = filename + ".".join([str(angle_item) , "jpg"])
            cv2.imwrite(data_new_picture_name, im_rotate)
            im_rotate = cv2.imread(data_new_picture_name)
            data_json['imageWidth'] = im_rotate.shape[1]
            data_json['imageHeight'] = im_rotate.shape[0]
            shape_json_item = {"label": m_name_0,
                               "points": item_point, "shape_type": data_json_rec}
            shape_json.append(shape_json_item)
        data_json['shapes'] = shape_json
        data_info = json.dumps(data_json, ensure_ascii=False)
        fp = open(data_new_json_name, "w+")
        json.dump(data_info, fp, ensure_ascii=False, indent=4)
        fp.close()
        fp = open(data_new_json_name, "r")
        for x in fp.readlines():
            y = x.replace("\\\"", "\"")
            z = y.replace("\"{", "{")
            w = z.replace("}\"", "}")
            fp.close()
            fp = open(data_new_json_name, "w+")
            fp.write(w)
            fp.close()
    
    for file in os.listdir(destination_path):
        n , s = os.path.splitext(file)
        if s == ".json":
            json_path = os.path.join(destination_path, file)
            # json_ = json.dumps(json_file)
            with open(json_path, 'r') as json_file:
                data = json.load(json_file)
                data_ = json.dumps(data, indent=4)
                with open(json_path, 'w') as f_out:
                    f_out.write(data_)

参考文章：