采用面向对象的方式代码如下:
import requests
from lxml import etree
from urllib import request
import time
class WangYiYun:
def __init__(self,base_url):
# 初始化tree
self.html = self.request_url(base_url)
self.parse_html()
def request_url(self,base_url):
# 请求访问 生成树结构
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
self.html = requests.get(url = base_url, headers = headers).content.decode('utf-8')
self.tree = etree.HTML(self.html)
return self.tree
def parse_html(self):
# 解析各类歌手url
group_list = self.html.xpath('//div[@class = "blk"]')
for group in group_list:
# 连接分类
href_list = group.xpath('.//a/@href')
# 歌手分类
group_name_list = group.xpath('.//a/text()')
# print(group_name_list)
for href, gr