#导入模块
import requests
from lxml import etree
import os
if __name__ == '__main__':
#如果pic目录不存在就存在pic目录
if not os.path.exists('./pic'):
os.mkdir('./pic')
#请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
}
#访问url
url='https://pic.netbian.com/4kmeinv/index_%d.html'
for pagenum in range(2,10):
n_url = format(url%pagenum) #格式URL
#请求URL
response = requests.get(url=n_url,headers=headers)
#中文转码
response.encoding='gbk'
page_text = response.text
#封装etree对象
tree = etree.HTML(page_text)
#xpath表达式获取照片上级位置
li_list = tree.xpath('//*[@id="main"]/div[3]/ul/li')
for se in li_list:
#获取照片的地址(需要拼接)
new_url = 'https://pic.netbian.com'+se.xpath('./a/img/@src')[0]
#获取照片的名称(也需要拼接)
name = se.xpath('./a/img/@alt')[0]+'.jpg'
#请求照片地址
img_data = requests.get(url=new_url,headers=headers).content
#保存照片位置
img_path = './pic/'+name
with open(img_path,'wb') as f:
f.write(img_data)
print(name,'爬取成功!!!')