# 抓去高清图片
# 1、进入主页面,提取主页面的超链接
# 2、点击进去获取高清图片
import requests
from bs4 import BeautifulSoup
import time
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}
#临时存的变换页码的字典
list = {'0'}
#获取路径后缀字典
def query_url(mainUrl):
init_url = mainUrl
for i in range(5):
if i > 1:
#替换路径后缀
init_url = mainUrl +'index_'+str(i)+'.htm'
resp = requests.get(url=init_url , headers=headers)
resp.encoding = 'utf-8'
# 源代码交到beatifulsoup
main_page = BeautifulSoup(resp.text, "html.parser")
# print(main_page)
ahref = main_page.find("div", class_="item_list infinite_scroll")
ahref = ahref.find_all("a")
for a in ahref:
ss = a.get('href')
#将获取到的网址放到字典中
list.add(ss)
#进入页面获取高清图
def get_inner_img(url,child_url):
url = url+child_url
resp = requests.get(url=url , headers=headers)
resp.encoding = 'utf-8'
main_page = BeautifulSoup(resp.text, "html.parser")
ahref = main_page.find("div", class_="pages")
li_all = ahref.find_all('li')
li_url = li_all[-1].find('a').get('href')
#获取一个页面中的尾页的最大数
max_int = li_url.split('_')[-1].split('.')[0]
get_photo(url,int(max_int))
#通过心地址获取页面元素
def get_photo(url,max_int):
init_url = url
for i in range(max_int):
if i < 1:
continue
if i > 1:
old = init_url.split('/')[-1]
end = old.split('.')[0]+"_"+str(i)+".htm"
url = init_url.replace(old,end)
resp = requests.get(url=url , headers=headers)
resp.encoding = 'utf-8'
main_page = BeautifulSoup(resp.text, "html.parser")
ahref = main_page.find("div", class_="big-pic")
# print(type(ahref))
aimg = ahref.find("img")
src = aimg.get('src')
img_resource = requests.get(src, headers=headers)
img_name = src.split("/")[-1]
#写文件途径
with open("touxiang/" + img_name+ '.jpg', mode="wb") as f:
f.write(img_resource.content) # 图片写入文件
print("over", img_name)
time.sleep(0.1)
#下载一页数据
if __name__ == '__main__':
# mainUrl = 'https://www.umei.cc/meinvtupian/meinvzipai/'
mainUrl = 'https://更换成你的路径www.umei.cc/meinvtupian/meinvzipai/'
#获取所有新的地址
# update_url(mainUrl)
query_url(mainUrl)
#查询数据
for i in list:
if i == 0:
continue
get_inner_img('https://更换成你的路径www.umei.cc',i)
工作要求找部分美女图片--贴代码啦
于 2022-12-28 23:46:14 首次发布