Python代码:
# -*- coding: UTF-8 -*-
'''
Created on 2016年3月20日
@author: Administrator
'''
from xml.dom import minidom
import os
def listFiles(path):
return os.listdir(path)
def readXML(xmlPath):
# 加载读取XML文件
doc = minidom.parse(xmlPath)
# 获取XML文档对象
root = doc.documentElement
bookList = root.getElementsByTagName("bookList")[0]
# 获取book节点列表
bookNodeList = bookList.getElementsByTagName("book")
# 节点操作
for node in bookNodeList:
# #读取节点属性为“id”的值
# book_id = node.getAttribute("id")
# 读取节点属性为“name”的值
book_name = node.getAttribute("name")
if book_name not in attributeValueList:
attributeValueList.append(book_name)
# books["id"]=book_id
# books["name"]=book_name
# 读取节点的文本内容。
# value=node.childNodes[0].nodeValue
# valueList["value"]=value
def compareXML(xmlPath):
for tfile in listFiles(xmlPath):
readXML(xmlPath + os.path.join(tfile))
def printList(tlist):
for tl in tlist:
print tl
def writeTxt(txtPath, argList):
# if os.path.exists(txtPath):
# os.remove(txtPath)
f = open(txtPath, "w")
for arg in sorted(argList):
f.write(arg + "\n")
f.close()
if __name__ == '__main__':
attributeValueList = []
# valueList={}
#
# books={}
# flag=False
xmlPath = "D:\\test\\"
txtPath = "D:\\out\\out_file.txt"
compareXML(xmlPath)
printList(attributeValueList)
writeTxt(txtPath,attributeValueList)
test.xml:
<?xml version="1.0" encoding="UTF-8"?> <mybook id="mb001"> <bookList> <book name="bookA" id="A">AAA</book> <book name="bookB" id="B">BBB</book> <book name="bookD" id="D">DDD</book> <book name="bookE" id="E">EEE</book> </bookList> <bc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bc> </mybook>
test2.xml
<?xml version="1.0" encoding="UTF-8"?> <mybook id="mb001"> <bookList> <book name="bookA" id="A">AAA</book> <book name="bookB" id="B">BBB</book> <book name="bookC" id="C">CCC</book> </bookList> <bc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bc> <bcc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bcc> </mybook>