一、lxml主要是用来爬取html数据
from lxml import etree
html = '''
<html>
<body>
<div>
<h1>Hello World</h1>
<p>This is a paragraph.</p>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</body>
</html>
'''
selector = etree.HTML(html)
items = selector.xpath('//li[@class]/a/text()')
print(items)
items = selector.xpath('//li[@class="item-0"]')
print(items)
texts = selector