#1.xpath的数据解析 #2.文件名的连接 from lxml import etree import urllib.request def get_request(page): if page == 1: url = 'https://sc.chinaz.com/tupian/qinglvtupian.html' else: url = 'https://sc.chinaz.com/tupian/qinglvtupian_'+str(page)+'.html' headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42' } request = urllib.request.Request(url=url,headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(content): #数据解析 tree = etree.HTML(content) #使用xpath语法来进行数据筛选 name_list = tree.xpath('//img[@class="lazy"]/@alt') src_list = tree.xpath('//div/img/@data-original') for i in range(len(name_list)): #遍历两个列表,获取下标 name = name_list[i] src = src_list[i] url = 'https:' + src #camp为自定义文件夹的名称 urllib.request.urlretrieve(url=url,filename='./camp/'+name+'.jpg') #程序入口 if __name__ == '__main__': start_page = int(input('请输入起始页')) end_page = int(input('请输入终止页')) for page in range(start_page,end_page+1): #定制request请求对象 request = get_request(page) #模拟浏览器向服务器发送请求 content = get_content(request) #下载数据 down_load(content)