# 易中天品三国音频爬取
import requests
from lxml import etree
from urllib import parse,request
base_url = 'https://www.ximalaya.com/lishi/13396678/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(base_url,headers=headers)
html_ele = etree.HTML(response.text)
re_p1 = '//*[@id="root"]/main/section/div/div[2]/div[1]/div[2]/div[2]/div/nav/ul/li[1]/a/@href'
re_p = '//*[@id="root"]/main/section/div/div[2]/div[1]/div[2]/div[2]/div/nav/ul/li[@class="Yetd page-item"]/a/@href'
href_list = []
href_list1 = html_ele.xpath(re_p1)
href_list2 = html_ele.xpath(re_p)
href_list += href_list1
href_list += href_list2
for href in href_list:
base_url = request.urljoin(base_url,href)
爬虫day5 易中天品三国音频爬取
最新推荐文章于 2022-01-08 13:28:31 发布