1.countries.xml文件
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
2.python代码
使用python标准库xml中的etree处理。
2.1 查找和遍历
查找可以使用XPath函数族find*来做。
root = ET.parse('countries.xml')
country_node = root.getiterator('country')
for node in country_node:
print("=" * 30)
if 'name' in node.attrib:
print(node.tag, '=> name: ', node.attrib['name'])
else:
print(node.tag)
print("-" * 30)
country_node_children = node.getchildren()
for country_node_child in country_node_children:
print(country_node_child.tag, '=> ', country_node_child.text)
print("~" * 30)
print("Find all countries which have neighbors on the west as ['direction']=='W'")
countries = root.findall("./country/neighbor[@direction='W']/..")
for country in countries:
print("-" * 30)
if 'name' in country.attrib:
print(country.tag, '=> name: ', country.attrib['name'])
else:
print(country.tag)
print("-" * 30)
country_infos = country.getchildren()
for country_info in country_infos:
if country_info.attrib and country_info.text:
print(country_info.tag, '=> ', country_info.text, country_info.attrib)
elif country_info.attrib:
print(country_info.tag, '=> ', country_info.attrib)
elif country_info.text:
print(country_info.tag, '=> ', country_info.text)
else:
print(country_info.tag)
2.2 删除
使用要删除节点的父节点来删除。不能用iterator来删除,iterator只是删除迭代子中的引用。
print("~" * 30)
print("Remove all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
for country in find_countries:
print("Remove...")
show_country(country)
# Dont using all_countries iterator to remove
# which only remove reference from iterator not from elementTree
root_data.remove(country)
2.3 添加
将节点加入到父节点来实现节点加入。
print("~" * 30)
print("Add one country...")
one_country = ET.Element("country", {"name": "Panama"})
one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})
one_country_rank.text = "69"
one_country_year = ET.SubElement(one_country, "year")
one_country_year.text = "2011"
one_country_gdppc = ET.SubElement(one_country, "gdppc")
one_country_gdppc.text = "13600"
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})
ET.dump(one_country)
root_data.append(one_country)
2.4 完整代码
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
def show_country(acountry):
print("=" * 30)
if 'name' in acountry.attrib:
print(acountry.tag, '=> name: ', acountry.attrib['name'])
else:
print(acountry.tag)
print("-" * 30)
_country_infos = acountry.getchildren()
for _country_info in _country_infos:
if _country_info.attrib and _country_info.text:
print(_country_info.tag, '=> ', _country_info.text, _country_info.attrib)
elif _country_info.attrib:
print(_country_info.tag, '=> ', _country_info.attrib)
elif _country_info.text:
print(_country_info.tag, '=> ', _country_info.text)
else:
print(_country_info.tag)
def show_countries(country_list):
for _country in country_list:
show_country(_country)
tree = ET.parse('countries.xml')
root_data = tree.getroot() # point to xml node: <data...
all_countries = tree.getiterator('country')
show_countries(all_countries)
print("~" * 30)
print("Find all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
show_countries(find_countries)
print("~" * 30)
print("Remove all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
for country in find_countries:
print("Remove...")
show_country(country)
# Dont using all_countries iterator to remove
# which only remove reference from iterator not from elementTree
root_data.remove(country)
print("~" * 30)
print("Remain countries...")
countries = tree.getiterator('country')
show_countries(countries)
print("~" * 30)
print("Add one country...")
one_country = ET.Element("country", {"name": "Panama"})
one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})
one_country_rank.text = "69"
one_country_year = ET.SubElement(one_country, "year")
one_country_year.text = "2011"
one_country_gdppc = ET.SubElement(one_country, "gdppc")
one_country_gdppc.text = "13600"
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})
ET.dump(one_country)
root_data.append(one_country)
countries = tree.getiterator('country')
show_countries(countries)