from lxml import html
import requests
# 目标网页URL
url = "https://www.4399.com/flash/"
# 使用requests获取网页内容
response = requests.get(url)
# 检查请求是否成功
if response.status_code == 200:
# 使用lxml的html模块解析HTML内容
tree = html.fromstring(response.content)
print(response.status_code)
# 使用XPath来定位你想要的数据
# 例如,获取所有的段落文本
paragraphs = tree.xpath('/html/body/div[8]/ul/li[1]/a//b/text()')
#// *[ @ id = "s-top-left"] / a[1]
# 打印结果
for paragraph in paragraphs:
print((paragraph))
else:
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
输出结果为:
200
寻新年礼物