python requests lxml etree xpath 获取网页信息
import requests
from lxml import etree
import re
baseurl = "https://desk.zol.com.cn/"
url = "https://desk.zol.com.cn/dongman/"
#获取网页
data = requests.get(url) #获取网页内容
data.encoding = "gbk" #编码为gbk
et = etree.HTML(data.text) #将网页内容加载到etree中
#从网页中找链接
list_urls = et.xpath('//ul[@class="pic-list2 clearfix"]/li/a/@href') #使用xpath找到对应元素列表
for item in list_urls: #拼接url
urls = baseurl + item
#列出链接
print(urls)