xml dom minidom

最新推荐文章于 2025-03-03 08:15:00 发布

转载最新推荐文章于 2025-03-03 08:15:00 发布 · 212 阅读

1 ·

CC 4.0 BY-SA版权

原文链接：http://www.cnblogs.com/hupeng1234/p/6684667.html

文章标签：

#python

本文介绍了XML的基本概念、术语及其解析和创建方法。详细演示了如何使用Python的xml.dom.minidom模块进行XML文件的读取、解析及生成操作，并提供了一个简单的类封装示例。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一. xml相关术语:

1.Document(文档): 对应一个xml文件

2.Declaration(声明):

<?xml version="1.0" encoding="utf-8"?>

version指定了版本,encoding指定了文件编码

3.Comment（注释），同html中的注释

<!--just a comment about book_store-->

4.Element（元素）:指的是从（且包括）开始标签直到
（且包括）结束标签的部分，如<book_store></book_store>

<book_store name="newhua" website="https://www.amazon.cn/b?node=1876097071">
    <book1>
        <name>hamlet</name>
        <author>William Shakespeare</author>
    </book1>
</book_store>

5.Tag(标签): 用于表示素的起始与结束，如book1,name,author等

6.Attribute(属性),如上面的name,website

7.Text(文本),如hamelt

二.解析xml

有三种方法

from xml.dom.minidom import parse,parseString

dom1 = parse('test.xml')  #通过文件名解析xml
data = open('test.xml')
dom2 = parse(data)  #通过解析已打开的xml文件

note = """
<note>
<to>Peter</to>
<from>Jane</from>
<heading>Reminder</heading>
<body>Don't be late for the meeting</body>
</note>
"""
dom3 = parseString(note)  #解析字符串

2.得到根元素

doc = parse('test.xml')  #通过文件名解析xml
root = doc.documentElement

三.创建xml

from xml.dom.minidom import Document
doc = Document()  #创建一篇空的文档


from xml.dom.minidom import getDOMImplementation
impl = getDOMImplementation()
#创建doc，并且添加根节点book_store
doc = impl.createDocument(None,"book_store",None)
print(doc.documentElement.tagName)  #book_store

#doc同doc=Document()
doc2 = impl.createDocument(None,None,None)

四.类及层次结构

可以发现：Element,Text,Comment,Attribute的创建工作全部由Document完成，然后通过appendChild或insertBefore方法将新的对象插入到Document中。

五.具体操作

1.解析xml文件

movies.xml

<collection shelf="New Arrivals">
<movie title="Enemy Behind">
   <type>War, Thriller</type>
   <format>DVD</format>
   <year>2003</year>
   <rating>PG</rating>
   <stars>10</stars>
   <description>Talk about a US-Japan war</description>
</movie>
<movie title="Transformers">
   <type>Anime, Science Fiction</type>
   <format>DVD</format>
   <year>1989</year>
   <rating>R</rating>
   <stars>8</stars>
   <description>A schientific fiction</description>
</movie>
   <movie title="Trigun">
   <type>Anime, Action</type>
   <format>DVD</format>
   <episodes>4</episodes>
   <rating>PG</rating>
   <stars>10</stars>
   <description>Vash the Stampede!</description>
</movie>
<movie title="Ishtar">
   <type>Comedy</type>
   <format>VHS</format>
   <rating>PG</rating>
   <stars>2</stars>
   <description>Viewable boredom</description>
</movie>
</collection>

代码:

import xml.dom.minidom
from xml.dom.minidom import parse

#使用minidom解析器打开xml文档
tree = xml.dom.minidom.parse('movies.xml')
#print(type(tree)) #<class 'xml.dom.minidom.Document'>
collection = tree.documentElement  #获取文档根元素
if collection.hasAttribute('shelf'):
    print("root element attribute:",collection.getAttribute("shelf"))
#print(collection.getAttribute('shelf'))  #获取属性
movie_list = collection.getElementsByTagName('movie')
movie_info_list = []
for movie in movie_list:
    print('******Movie*****')
    if movie.hasAttribute("title"):
        title = movie.getAttribute('title')
        print('Title',title)
    type = movie.getElementsByTagName('type')[0].childNodes[0].data
    format = movie.getElementsByTagName('format')[0].childNodes[0].data
    rating = movie.getElementsByTagName("rating")[0].firstChild.data
    stars = movie.getElementsByTagName('stars')[0].firstChild.data
    description = movie.getElementsByTagName("description")[0].firstChild.data
    print("type: ", type)
    print("format: ", format)
    print("rating: ", rating)
    print("stars: ", stars)
    print('description: ', description)

2.写xml文件

效果:

<?xml version="1.0" encoding="utf-8"?>
<!--just a comment about book_store-->
<book_store name="amzon" website="https://www.amazon.cn/b?node=1876097071">
    <book1>
        <name>hamlet</name>
        <author>William Shakespeare</author>
    </book1>
</book_store>

代码:

from xml.dom.minidom import Document
doc = Document()

　comment = doc.createComment('just a comment about book_store') #添加注释
doc.appendChild(comment)

# from xml.dom.minidom import getDOMImplementation
# impl = getDOMImplementation()
# doc = impl.createDocument(None, None, None)

book_store = doc.createElement('book_store')  # 创建根节点
book_store.setAttribute('name', 'amazon')  #设置属性
book_store.setAttribute('website', 'https://www.amazon.cn/b?node=1876097071')
doc.appendChild(book_store)  #添加节点
book1 = doc.createElement('book1') #创建元素book1
book1_name = doc.createElement('name')
book1_name_value = doc.createTextNode('hamlet')  #创建text节点
book1_name.appendChild(book1_name_value)
book1_author = doc.createElement('author')
book1_author_value = doc.createTextNode('William Shakespeare')
book1_author.appendChild(book1_author_value)
book1.appendChild(book1_name)
book1.appendChild(book1_author)
book_store.appendChild(book1)

print(doc.toprettyxml(indent='\t', newl='\n', encoding='utf-8').decode('utf-8'))
# with open('book_store.xml','wb') as f:  #写入的数据是bytes类型，所以wb方法写入
#     data = doc.toprettyxml(indent='\t', newl='\n', encoding='utf-8') #bytes类型数据
#     f.write(data)
with open('test_store.xml', 'w') as f:
    doc.writexml(f, indent='\t', newl='\n', encoding='utf-8') #写入的是str类型数据,所以w方法写入

3.简单封装

class MyXMLGenerator:
    def __init__(self,xml_name):
        self.xml_name = xml_name
        self.doc = xml.dom.minidom.Document()

    def createComment(self,value):
        c = self.doc.createComment(value)
        self.doc.appendChild(c)

    def setNodeAttribute(self,node,attname,value):
        node.setAttribute(attname,value)

    def createElement(self,tagName):
        ele = self.doc.createElement(tagName)
        return ele

    def appendChild(self,node,parent_node=None):
        if parent_node is not None:
            parent_node.appendChild(node)
        else:
            self.doc.appendChild(node)

    def setNodeValue(self,node,value):
        text_node = self.doc.createTextNode(value)
        node.appendChild(text_node)

    def genXML(self):
        #print(self.doc.toprettyxml(indent='\t',newl='\n',encoding='utf-8').decode('utf-8'))
        with open(self.xml_name,'wb') as f:
            data = self.doc.toprettyxml(indent='\t',newl='\n',encoding='utf-8') #python3中二进制数据
            f.write(data)