如何用BeautifulSoup批量下载美女图片

原创于 2024-07-18 21:49:05 发布 · 854 阅读

CC 4.0 BY-SA版权

文章标签：


 # https://www.umei.cc/bizhitupian/meinvbizhi/index_2.htm
#本文公供个人学习之用
import requests
from bs4 import BeautifulSou

headers = {
    'authority': 'www.umei.cc',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'max-age=0',
    # 'cookie': '__51vcke__K0KOUvCHIpTH8Vt6=81cf9431-fdbf-5504-93a6-08ac77e51820; __51vuft__K0KOUvCHIpTH8Vt6=1721268844171; __51uvsct__K0KOUvCHIpTH8Vt6=3; gxgefecookieinforecord=%2C67-317404%2C; __vtins__K0KOUvCHIpTH8Vt6=%7B%22sid%22%3A%20%223b0f6380-c11e-5ba6-af30-dcd1896efbea%22%2C%20%22vd%22%3A%205%2C%20%22stt%22%3A%20482411%2C%20%22dr%22%3A%2016586%2C%20%22expires%22%3A%201721271448467%2C%20%22ct%22%3A%201721269648467%7D',


    'referer': 'https://www.umei.cc/bizhitupian/',
    'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
}
#在打印输出时出现304的原因大家可以参见header中有两个参数得删除。
response = requests.get('https://www.umei.cc/bizhitupian/meinvbizhi/', cookies=cookies, headers=headers)
response.encoding="utf-8"
#打印输出时是乱码
soup =BeautifulSoup(response.text,'lxml')
photos = soup.select("div.title a")
#查找图片链接
for p in photos:
#找到a的属性并分割，补全网址，和图片名称
     p_url = p['href'].split("=")[-1]
     pnurl = "https://www.umei.cc/"+p_url
     p_name =p.text
     print(pnurl,p_name)
#进行图片地址找到图片地址
# < div class ="big-pic" > < a href="/bizhitupian/meinvbizhi/314119.htm" >
# < img alt="" src="https://www.umei.cc/d/file/20230520/b19e18d9fe13a6c284d8447ea68e9113.jpg" / > < / a > < / div >
     res = requests.get(url=pnurl,cookies=cookies, headers=headers)
     res.encoding=("utf-8"),

     soup = BeautifulSoup(res.text, 'lxml')
     pp = soup.select("div.big-pic a img")[-1]
     pp_url=pp.【‘src'】
     res1 = requests.get(url=pp_url, cookies=cookies, headers=headers)

     open(f'{p_name}.jpg','wb').write(res1.content)