爬取216页分块区所有图片,爬的内容有点抽象。说实在的这网站壁纸挺檫边的.....
import requests
from bs4 import BeautifulSoup
import os
# 创建目录
directory = 'pictures'
url = ''
if not os.path.exists(directory):
os.makedirs(directory)
for i in range(216):
if i == 0:
url = 'https://www.umei.cc/meinvtupian/meinvxiezhen/'
elif i >= 2:
#爬取网站源码
url = 'https://www.umei.cc/meinvtupian/meinvxiezhen/' + 'index_{}'.format(i) + '.htm'
resp = requests.get(url)
resp.encoding = 'utf-8'
# print(resp.text)
#解析html
main_page = BeautifulSoup(resp.text,"html.parser")
#找
step1 = main_page.find("div",class_="listlbc_cont_l").find_all("div",class_="item masonry_brick")
#子网页网址
url_new = ''
index = 0
step2 = []
for a in step1:
step2 = a.find_all("a")[0]
url_new = 'https://www.umei.cc' + step2.get("href")
#子网页寻找
resp1 = requests.get(url_new)
resp1.encoding = 'utf-8'
#解析
next_page = BeautifulSoup(resp1.text,'html.parser')
#找
step3 = next_page.find("div",class_="wrapper").find("div",class_="big-pic").find("a").find("img").get("src")
print(step3)
with open(os.path.join(directory, f"{index}.jpg"),mode='wb') as f:
get_pictures = requests.get(step3).content
f.write(get_pictures)
index += 1