# https://www.umei.cc/bizhitupian/meinvbizhi/index_2.htm
#本文公供个人学习之用
import requests
from bs4 import BeautifulSou
headers = {
'authority': 'www.umei.cc',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
# 'cookie': '__51vcke__K0KOUvCHIpTH8Vt6=81cf9431-fdbf-5504-93a6-08ac77e51820; __51vuft__K0KOUvCHIpTH8Vt6=1721268844171; __51uvsct__K0KOUvCHIpTH8Vt6=3; gxgefecookieinforecord=%2C67-317404%2C; __vtins__K0KOUvCHIpTH8Vt6=%7B%22sid%22%3A%20%223b0f6380-c11e-5ba6-af30-dcd1896efbea%22%2C%20%22vd%22%3A%205%2C%20%22stt%22%3A%20482411%2C%20%22dr%22%3A%2016586%2C%20%22expires%22%3A%201721271448467%2C%20%22ct%22%3A%201721269648467%7D',
'referer': 'https://www.umei.cc/bizhitupian/',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
}
#在打印输出时出现304的原因大家可以参见header中有两个参数得删除。
response = requests.get('https://www.umei.cc/bizhitupian/meinvbizhi/', cookies=cookies, headers=headers)
response.encoding="utf-8"
#打印输出时是乱码
soup =BeautifulSoup(response.text,'lxml')
photos = soup.select("div.title a")
#查找图片链接
for p in photos:
#找到a的属性并分割,补全网址,和图片名称
p_url = p['href'].split("=")[-1]
pnurl = "https://www.umei.cc/"+p_url
p_name =p.text
print(pnurl,p_name)
#进行图片地址找到图片地址
# < div class ="big-pic" > < a href="/bizhitupian/meinvbizhi/314119.htm" >
# < img alt="" src="https://www.umei.cc/d/file/20230520/b19e18d9fe13a6c284d8447ea68e9113.jpg" / > < / a > < / div >
res = requests.get(url=pnurl,cookies=cookies, headers=headers)
res.encoding=("utf-8"),
soup = BeautifulSoup(res.text, 'lxml')
pp = soup.select("div.big-pic a img")[-1]
pp_url=pp.【‘src'】
res1 = requests.get(url=pp_url, cookies=cookies, headers=headers)
open(f'{p_name}.jpg','wb').write(res1.content)
03-25
1378
1378

被折叠的 条评论
为什么被折叠?



