xml模块
描述:xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但是Json使用起来更简单,json还没有诞生时,xml已经开始使用很久,至今很多传统公司如金融行业很多系统的接口还是主要使用xml
存储数据的格式:一个个的标签组成
[root@python3 xml]# cat test.xml
<?xml version="1.0"?>
<data>
<country name="Liechtenstein"> #国家,country是标签,name是它的属性
<rank updated="yes">2</rank> #排名
<year>2008</year> #年份
<gdppc>141100</gdppc> #gdp
<neighbor name="Austria" direction="E"/> #邻国
<neighbor name="Switzerland" direction="W"/> #邻国,这种写法是自闭合的写法
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
xml在各个语言中都支持,以下是在python中的使用
[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("test.xml") #解析那个xml的数据
root = tree.getroot() #得到最外层标签data
print(root.tag)
遍历xml文档
for child in root: #root是最外层,其他是它的子标签
print(child.tag, child.attrib) #标签名,属性打印
for i in child:
print(i.tag, i.text) #子下还有标签,文本
只遍历year节点
for node in root.iter('year'):
print(node.tag, node.text)
root的内容
[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root.tag)
[root@python3 xml]# python3 py_xml.py
data
打印国家
[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
print(root.tag)
for child in root:
print(child.tag, child.attrib)
[root@python3 xml]# python3 py_xml.py
data
country {'name': 'Liechtenstein'} #country: child.tag, {'name': 'Liechtenstein'}是child.attrib
country {'name': 'Singapore'}
country {'name': 'Panama'}
打印文本
[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
print(root.tag)
for child in root:
print(child.tag, child.attrib)
for i in child:
print(i.tag, i.text)
[root@python3 xml]# python3 py_xml.py
data
country {'name': 'Liechtenstein'} #国家下的文本
rank 2 #rank: i.tag 2: i.text
year 2008
gdppc 141100
neighbor None #neighbor是自定义的标签没有自己的文本内容
neighbor None
country {'name': 'Singapore'}
rank 5
year 2011
gdppc 59900
neighbor None
country {'name': 'Panama'}
rank 69
year 2011
gdppc 13600
neighbor None
neighbor None
xml的修改
[root@python3 xml]# cat py_mod_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
for node in root.iter('year'): #对root下year的标签做循环,node是year的标签,year标签中有text(是那个数字,如2011)
new_year = int(node.text) + 1 #对年份的份做修改,处理下,加1,变成新的值,如2011+1 = 2012
node.text = str(new_year) #再转换成字符串,再转换回它的node.text
node.set("updated","yes") #还有一个updated的属性,使用set来修改
tree.write("test1.xml") #修改完后都在tree对象里,再使用tree调用write方法重新写
[root@python3 xml]# python3 py_mod_xml.py
[root@python3 xml]# cat test1.xml
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year updated="yes">2009</year> #修改了,重新加一个属性
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year updated="yes">2012</year> ###
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year updated="yes">2012</year> ####
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
xml的删除操作
[root@python3 xml]# cat py_del_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
tree = ET.parse("test.xml")
root = tree.getroot()
for country in root.findall('country'): #遍历所有国家
rank = int(country.find('rank').text) #找到排名的标签,取到text(排名的数字)
if rank > 50: #排名大于50的移除
root.remove(country)
tree.write('test2.xml')
[root@python3 xml]# python3 py_del_xml.py
[root@python3 xml]# cat test2.xml
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
</data>[root@python3 xml]#
自创建xml
[root@python3 xml]# vim py_create_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET
new_xml = ET.Element("namelist") #ET调用Element方法
name1 = ET.SubElement(new_xml,"name", attrib={"enrolled": "yes"}) #ET调用SubElement方法,和属性
age = ET.SubElement(name1, "age", attrib={"checked": "no"})
sex = ET.SubElement(name1, "sex")
age.text = '33'
name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"})
age = ET.SubElement(name2, "age")
age.text = '19'
et = ET.ElementTree(new_xml) #生成文档对象
et.write("test3.xml", encoding="utf-8", xml_declaration=True)
ET.dump(new_xml) #打印生成格式
[root@python3 xml]# python3 py_create_xml.py
<namelist><name enrolled="yes"><age checked="no">33</age><sex /></name><name enrolled="no"><age>19</age></name></namelist>