使用BeautifulSoup实现的图片爬虫

主要用了BeautifulSoup获取图片链接,然后使用urllib.retrieve()这个函数把图片下载到对应的文件夹

from  bs4 import BeautifulSoup
from urllib.request import urlopen
import urllib
import re
import os

def get_web_bsobj(target_url):
    req = urllib.request.Request(target_url)
    h = urllib.request.urlopen(req)
    bsobj = BeautifulSoup(h, "html5lib")
    print(bsobj.title)
    return  bsobj

def get_file_bsobj():
    h = urlopen("file:c:/mzt.html")
    bsobj = BeautifulSoup(h,'html5lib')
    return bsobj


def download_img(target_url,file_path):
    bsobj = get_web_bsobj(target_url)
    print(str(bsobj.title.text)+"解析就绪")
    content = bsobj.findAll('li',{"id":re.compile("comment-[0-9]+")},recursive=True);
    my_img_url=[]
    for i in content:
        temp = i.find_all('a',{'class':'view_img_link'})
        for j in temp:
            print(j['href'])
            my_img_url.append(j['href'])
        print("###########")
    print("^^^^^^^^^^^the next^^^^^^^^^^^^^")
    j=1
    for i in my_img_url:
        print(i)
        fn = file_path+str(j)+".jpg"
        urllib.request.urlretrieve(i,fn)
        j=j+1
        print("saved %s" %j)


def run():
    print("""
    #########################################
    #       煎蛋网妹子图片下载器        #####
    #########################################
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    运行之前请先在D盘下面新建mzt文件夹 摸摸蛋
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    """)
    start_page_num = int(input("输入页码:"))
    total_page_num = int(input("输入要获取的页数"))

    s_url = "http://jandan.net/ooxx/page-"
    file_path = "d:/mzt/"

    for i in range(total_page_num):
        temp_dir = file_path + str(start_page_num) + "/"
        print(temp_dir)
        os.mkdir(temp_dir)
        url = s_url + str(start_page_num)
        start_page_num += 1
        print(url)
        download_img(url, temp_dir)
        print("##################" + url + "下载完毕" + "################")


if __name__ == '__main__':
    run()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值