Python TXT 转为XML文件

最新推荐文章于 2025-05-18 14:23:26 发布

l_ml_m_lm_m

最新推荐文章于 2025-05-18 14:23:26 发布

阅读量5.9k

点赞数 2

CC 4.0 BY-SA版权

分类专栏： python 深度学习

本文链接：https://blog.youkuaiyun.com/l_ml_m_lm_m/article/details/79664750

python 同时被 2 个专栏收录

18 篇文章

订阅专栏

深度学习

13 篇文章

订阅专栏

本文介绍如何使用Python的xml.dom.minidom模块创建XML文档。详细解释了Document对象及其方法，如createElement()、createTextNode()和setAttribute()等，并通过示例展示了如何构建完整的XML文件。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

XML文档

Document 对象
Document 对象是一棵文档树的根，可为我们提供对文档数据的最初（或最顶层）的访问入口。
用于元素节点、文本节点、注释、处理指令等均无法存在于 document 之外，document 对象同样提供了创建这些对象的方法。Node 对象提供了一个 ownerDocument 属性，此属性可把它们与在其中创建它们的 Document 关联起来。
createElement() 方法可创建元素节点。
此方法可返回一个 Element 对象。
语法

createElement(name)

参数： name
描述：字符串值，这个字符串可为此元素节点规定名称。

createTextNode() 可创建文本节点。节点的标签值(data)
此方法可返回 Text 对象
语法

createTextNode(data)

参数：data
描述：字符串值，可规定此节点的文本。

setAttribute（）给节点添加属性值(Attribute)

node.setAttribute("att_name", "arr_value")

在创建完节点(节点值)之后，还需使用下面的方法添加到指点的节点的位置下面：

prev_node.appendChild(cur_node)

这里的prev_node要添加节点的上一层节点，而cur_node即为当前要添加的节点了

遇到问题：
UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xe8 in position 54: ordinal not in range(128)
解决方法：
原因就是python的str默认是ascii编码，和unicode编码冲突，就会报这个标题错误。在代码中加上如下几句即可。

import sys
reload(sys)
sys.setdefaultencoding('utf8')

其他更多XML Document对象函数，请查看：
http://www.w3school.com.cn/xmldom/dom_document.asp

代码：

# -*- coding: utf-8 -*-

from xml.dom.minidom import Document
import os
import os.path
from PIL import Image
import sys
reload(sys)
sys.setdefaultencoding('utf8')

ann_path = "/home/ubuntu/Downloads/txt_9000/"
img_path = "/home/ubuntu/Downloads/image_9000/"
xml_path = "/home/ubuntu/Downloads/label_9000/"

if not os.path.exists(xml_path):
    os.mkdir(xml_path)


def writeXml(tmp, imgname, w, h, objbud, wxml):
    doc = Document()
    # owner
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
    # owner
    folder = doc.createElement('folder')
    annotation.appendChild(folder)
    folder_txt = doc.createTextNode("VOC2007")
    folder.appendChild(folder_txt)

    filename = doc.createElement('filename')
    annotation.appendChild(filename)
    filename_txt = doc.createTextNode(imgname)
    filename.appendChild(filename_txt)
    # ones#
    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    source.appendChild(database)
    database_txt = doc.createTextNode("The VOC2007 Database")
    database.appendChild(database_txt)

    annotation_new = doc.createElement('annotation')
    source.appendChild(annotation_new)
    annotation_new_txt = doc.createTextNode("PASCAL VOC2007 ")
    annotation_new.appendChild(annotation_new_txt)

    image = doc.createElement('image')
    source.appendChild(image)
    image_txt = doc.createTextNode("flickr")
    image.appendChild(image_txt)
    # onee#
    # twos#
    size = doc.createElement('size')
    annotation.appendChild(size)

    width = doc.createElement('width')
    size.appendChild(width)
    width_txt = doc.createTextNode(str(w))
    width.appendChild(width_txt)

    height = doc.createElement('height')
    size.appendChild(height)
    height_txt = doc.createTextNode(str(h))
    height.appendChild(height_txt)

    depth = doc.createElement('depth')
    size.appendChild(depth)
    depth_txt = doc.createTextNode("3")
    depth.appendChild(depth_txt)
    # twoe#
    segmented = doc.createElement('segmented')
    annotation.appendChild(segmented)
    segmented_txt = doc.createTextNode("0")
    segmented.appendChild(segmented_txt)

    for i in range(0, len(objbud) / 5):
        # threes#
        object_new = doc.createElement("object")
        annotation.appendChild(object_new)

        name = doc.createElement('name')
        object_new.appendChild(name)
        name_txt = doc.createTextNode(objbud[i * 5])
        name.appendChild(name_txt)

        pose = doc.createElement('pose')
        object_new.appendChild(pose)
        pose_txt = doc.createTextNode("Unspecified")
        pose.appendChild(pose_txt)

        truncated = doc.createElement('truncated')
        object_new.appendChild(truncated)
        truncated_txt = doc.createTextNode("0")
        truncated.appendChild(truncated_txt)

        difficult = doc.createElement('difficult')
        object_new.appendChild(difficult)
        difficult_txt = doc.createTextNode("0")
        difficult.appendChild(difficult_txt)
        # threes-1#
        bndbox = doc.createElement('bndbox')
        object_new.appendChild(bndbox)

        xmin = doc.createElement('xmin')
        bndbox.appendChild(xmin)
        xmin_txt = doc.createTextNode(objbud[i * 5 + 1])
        xmin.appendChild(xmin_txt)

        ymin = doc.createElement('ymin')
        bndbox.appendChild(ymin)
        ymin_txt = doc.createTextNode(objbud[i * 5 + 2])
        ymin.appendChild(ymin_txt)

        xmax = doc.createElement('xmax')
        bndbox.appendChild(xmax)
        xmax_txt = doc.createTextNode(objbud[i * 5 + 3])
        xmax.appendChild(xmax_txt)

        ymax = doc.createElement('ymax')
        bndbox.appendChild(ymax)
        ymax_txt = doc.createTextNode(objbud[i * 5 + 4])
        ymax.appendChild(ymax_txt)
        # threee-1#
        # threee#

    tempfile = tmp + "test.xml"
    with open(tempfile, "w") as f:
        f.write(doc.toprettyxml(indent="\t", newl="\n", encoding="utf-8"))

    rewrite = open(tempfile, "r")
    lines = rewrite.read().split('\n')
    newlines = lines[1:len(lines) - 1]

    fw = open(wxml, "w")
    for i in range(0, len(newlines)):
        fw.write(newlines[i] + '\n')

    fw.close()
    rewrite.close()
    os.remove(tempfile)
    return


for files in os.walk(ann_path):
    temp = "/home/ubuntu/temp/"
    if not os.path.exists(temp):
        os.mkdir(temp)
    for file in files[2]:
        print file + "-->start!"
        img_name = os.path.splitext(file)[0] + '.jpg'
        fileimgpath = img_path + img_name
        im = Image.open(fileimgpath)
        width = int(im.size[0])
        height = int(im.size[1])

        filelabel = open(ann_path + file, "r")
        lines = filelabel.read().split('\n')
        obj = lines[:len(lines) - 1]

        filename = xml_path + os.path.splitext(file)[0] + '.xml'
        writeXml(temp, img_name, width, height, obj, filename)
    os.rmdir(temp)