读取xml文件
在pascal_voc里面就常见到这种,整理一下。
比如一个原文件是这样的
VOC2007
008973.jpg
The VOC2007 Database
PASCAL VOC2007
flickr
335707085
kjmurray
Katherine Murray
500
333
3
1
cow
Unspecified
0
0
271
43
444
279
那读的时候可以这样读
Python 3.6.5 |Anaconda, Inc.| (default, Apr 29 2018, 16:14:56)
[GCC 7.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import xml.etree.ElementTree as ET
>>> tree = ET.parse("008973.xml")
>>> objs = tree.findall("object")
>>> objs
[]
>>> imgpath = tree.find("filename").text.strip()
>>> imgpath
'008973.jpg'
>>> for idx, obj in enumerate(objs):
... bbox = obj.find("bndbox")
... x1 = float(bbox.find("xmin").text)-1
... print(x1)
...
270.0
>>>
xml 的写入
from xml.dom.minidom import Document
def main():
# 创建dom文档
doc = Document()
# 创建根节点
orderlist = doc.createElement("orderlist")
# 根节点insert dom 树
doc.appendChild(orderlist)
#
mydict = dict(zip(range(5), range(0,5,2)))
for k, v in mydict.items():
# 每一组信息都要先创建节点order,后insert到下面
order = doc.createElement("order")
orderlist.appendChild(order)
# 下面的也是先创建节点然后insert到其父节点上面。
customer = doc.createElement("customer")
customer_text = doc.createTextNode(str(k))
customer.appendChild(customer_text)
order.appendChild(customer)
#with open("xml.xml", "w") as w:
# w.write(doc.toprettyxml(indent='t'))
with open("xml.xml", "wb+") as w:
w.write(doc.toprettyxml(indent='t', encoding="utf-8"))
return
if __name__ == '__main__':
main()
写完后的结果是
0
1
2
~