from lxml import etree
root = etree.XML('<root><a><b/></a></root>')
print(etree.tostring(root))
#输出:b'<root><a><b/></a></root>'
print(etree.tostring(root, xml_declaration=True))
#输出: <?xml version='1.0' encoding='ASCII'?>
# <root><a><b/></a></root>
print(etree.tostring(root, encoding='iso-8859-1'))
#输出: <?xml version='1.0' encoding='iso-8859-1'?>
# <root><a><b/></a></root>
print(etree.tostring(root, pretty_print=True))
'''输出:
<root>
<a>
<b/>
</a>
</root>
'''
root = etree.XML('<html><head/><body><p>Hello<br/>World</p></body></html>')
print(etree.tostring(root))
#输出: b'<html><head/><body><p>Hello<br/>World</p></body></html>'
print(etree.tostring(root, method='xml'))
#输出: b'<html><head/><body><p>Hello<br/>World</p></body></html>'
print(etree.tostring(root, method='html'))
#输出: b'<html><head></head><body><p>Hello<br>World</p></body></html>'
print(etree.tostring(root, method='html', pretty_print=True))
'''输出:
<html>
<head></head>
<body><p>Hello<br>World</p></body>
</html>
'''
print(etree.tostring(root, method='text'))
#输出: b'HelloWorld'
br = next(root.iter('br'))
br.tail = u'W\xf6rld'
etree.tostring(root, method='text')
#输出: 报错,因为编码默认是ascii,而不是unicode
print(etree.tostring(root, encoding='unicode', method='text'))
#输出:HelloWörld
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
本文介绍使用Python的lxml库处理XML文档的方法,包括不同格式化选项、编码方式及文本提取技巧。通过实例展示了如何生成整洁易读的XML输出,并处理特殊字符。
395

被折叠的 条评论
为什么被折叠?



