爬美女图的的脚本分享, 灯灯灯噔....

 大批量下美女图的,  enjoy yourself....

#encoding=utf-8
import os
import time
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup

gid = [21036, 27886, 28030, 22204]


heads = {"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding":"gzip, deflate, br",
"Accept-Language":"zh-CN,zh;q=0.9",
"Cache-Control":"no-cache",
"Connection":"keep-alive",
"Cookie":"__guid=186729645.3689942829764010500.1604539727900.4944; monitor_count=42",
#"Host":"t1.onvshen.com:85",
"Pragma":"no-cache",
"Referer":"https://m.nvshens.org/galleryimg.aspx?img=https://t1.onvshen.com:85/gallery/22204/34185/0.jpg",
"Sec-Fetch-Mode":"navigate",
"Sec-Fetch-Site":"none",
"Sec-Fetch-User":"?1",
"Upgrade-Insecure-Requests":"1",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}


def saveImg(dir, fn, url):
    session = requests.Session()
    session.max_redirects = 3
    r = session.get(url, headers = heads, allow_redirects=False)
    #fn = time.strftime("%Y-%m-%d %H:%M:%S")
    #fn = url[-11:].replace("/","_")
    f = open(dir + "/" + fn,"wb")
    f.write(r.content)
    r.close()

def visPage(url):
    r = requests.get(url, headers = heads)
    r.encoding = 'utf-8'
    return r.text       

'''
r = requests.get("https://m.nvshens.org/girl/22204/album/#")
r.encoding = 'utf-8'
soup =  BeautifulSoup(r.content,"lxml")
imgs = soup.select(".hezi img")

for img in imgs:
    print(img['src'])
    #saveImg(img['src'])
'''

#解析 相册列表 页
url = "https://m.nvshens.org/girl/22204/album/#";

html1 = visPage(url)
soup1 =  BeautifulSoup(html1,"lxml")
pks = {}
lis = soup1.select(".ck-item a.ck-link")
#print("相册列表 文 " + html1)
for li in lis:
    alnk = li['href']

    #解析 套图 页
    html11 = visPage( urljoin(url, alnk) )
    soup11 =  BeautifulSoup(html11,"lxml")
    ddinfo =  soup11.select("#ddinfo span")[0].get_text()
    max_count = int(ddinfo[:-3])
    print("当前 套图 URL %s, 有 %d 张照片..."%(alnk, max_count) )
    
    
    pks[alnk[3:-1]] = max_count

fail_pk_ones = []
rdir = "meitu/" 
for pk in pks:
    lnks = pks[pk]
    if not os.path.exists(rdir):
        os.makedirs(rdir)

    pi = 0
    while pi < lnks:
        furl = "https://t1.onvshen.com:85/gallery/%s/%s/%s.jpg"%("22204", pk ,  str(pi).zfill(3))
        print("save img - " + furl)
        try:
            saveImg(rdir, "%s_%s.jpg"%(pk, str(pi).zfill(3)), furl)
        except Exception as e:
            print(e)
            fail_pk_ones.append( {pk:pi} )
        pi += 1

print("fail ones ... "+ str(fail_pk_ones))
print("img download finish....")
#print(r.text)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值