Python3 XML操作实例

本文介绍如何使用Python标准库中的xml.etree模块解析和操作XML文件。通过实例演示了查找、遍历、删除及添加XML元素的方法。

1.countries.xml文件

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

2.python代码

使用python标准库xml中的etree处理。

2.1 查找和遍历

查找可以使用XPath函数族find*来做。

root = ET.parse('countries.xml')
country_node = root.getiterator('country')
for node in country_node:
    print("=" * 30)
    if 'name' in node.attrib:
        print(node.tag, '=> name: ', node.attrib['name'])
    else:
        print(node.tag)

    print("-" * 30)
    country_node_children = node.getchildren()
    for country_node_child in country_node_children:
        print(country_node_child.tag, '=> ', country_node_child.text)

print("~" * 30)
print("Find all countries which have neighbors on the west as ['direction']=='W'")
countries = root.findall("./country/neighbor[@direction='W']/..")
for country in countries:
    print("-" * 30)
    if 'name' in country.attrib:
        print(country.tag, '=> name: ', country.attrib['name'])
    else:
        print(country.tag)

    print("-" * 30)
    country_infos = country.getchildren()
    for country_info in country_infos:
        if country_info.attrib and country_info.text:
            print(country_info.tag, '=> ', country_info.text, country_info.attrib)
        elif country_info.attrib:
            print(country_info.tag, '=> ', country_info.attrib)
        elif country_info.text:
            print(country_info.tag, '=> ', country_info.text)
        else:
            print(country_info.tag)

2.2 删除

使用要删除节点的父节点来删除。不能用iterator来删除,iterator只是删除迭代子中的引用。

print("~" * 30)
print("Remove all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
for country in find_countries:
    print("Remove...")
    show_country(country)
    # Dont using all_countries iterator to remove 
    #        which only remove reference from iterator not from elementTree
    root_data.remove(country)

2.3 添加

将节点加入到父节点来实现节点加入。

print("~" * 30)
print("Add one country...")
one_country = ET.Element("country", {"name": "Panama"})
one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})
one_country_rank.text = "69"
one_country_year = ET.SubElement(one_country, "year")
one_country_year.text = "2011"
one_country_gdppc = ET.SubElement(one_country, "gdppc")
one_country_gdppc.text = "13600"
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})
ET.dump(one_country)
root_data.append(one_country)

2.4 完整代码

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET


def show_country(acountry):
    print("=" * 30)
    if 'name' in acountry.attrib:
        print(acountry.tag, '=> name: ', acountry.attrib['name'])
    else:
        print(acountry.tag)

    print("-" * 30)
    _country_infos = acountry.getchildren()
    for _country_info in _country_infos:
        if _country_info.attrib and _country_info.text:
            print(_country_info.tag, '=> ', _country_info.text, _country_info.attrib)
        elif _country_info.attrib:
            print(_country_info.tag, '=> ', _country_info.attrib)
        elif _country_info.text:
            print(_country_info.tag, '=> ', _country_info.text)
        else:
            print(_country_info.tag)


def show_countries(country_list):
    for _country in country_list:
        show_country(_country)


tree = ET.parse('countries.xml')
root_data = tree.getroot()  # point to xml node: <data...
all_countries = tree.getiterator('country')
show_countries(all_countries)

print("~" * 30)
print("Find all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
show_countries(find_countries)

print("~" * 30)
print("Remove all countries has neighbor on west as ['direction']=='W'")
find_countries = tree.findall("./country/neighbor[@direction='W']/..")
for country in find_countries:
    print("Remove...")
    show_country(country)
    # Dont using all_countries iterator to remove 
    #        which only remove reference from iterator not from elementTree
    root_data.remove(country)

print("~" * 30)
print("Remain countries...")
countries = tree.getiterator('country')
show_countries(countries)

print("~" * 30)
print("Add one country...")
one_country = ET.Element("country", {"name": "Panama"})
one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})
one_country_rank.text = "69"
one_country_year = ET.SubElement(one_country, "year")
one_country_year.text = "2011"
one_country_gdppc = ET.SubElement(one_country, "gdppc")
one_country_gdppc.text = "13600"
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})
one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})
ET.dump(one_country)
root_data.append(one_country)

countries = tree.getiterator('country')
show_countries(countries)

转载于:https://my.oschina.net/smallfan/blog/1545156

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值