Python获取图片保存对应的文件夹

 

首先创建的文件夹,先定义一个创建文件夹的函数:

def makdir(path):
    try:
        isExists = os.path.exists(path)
        #判断是否存在文件夹
        if not isExists:
            #不存在创建
            os.makedirs(path)
            print(path+'文件夹创建成功!')
        else:
            print(path+'文件夹已存在!')
    except Exception as e:
        print(e)

爬取图片并保存图片函数

def pic_download(paths,url):
    # 请求头池
    headers = {
        
        'user-agent':"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
    }
    
    web = requests.get(url, headers=headers).content.decode()
    # print(web.encoding)
    data = etree.HTML(web)
    lists = data.xpath('//div[@class="text_left text_lefts"]/div[@id="container"]/div')
    for lis in lists:
        new_url = 'https:' + lis.xpath('./p/a/@href')[0]
        name = lis.xpath('./p/a/@alt')[0]
        #创建相应文件夹
        makdir(paths+'/' + name)
        new_web = requests.get(new_url, headers=headers).content.decode()
        new_data = etree.HTML(new_web)
        finall_url = 'https:' + new_data.xpath('//div[@class="imga"]/a/@href')[0]
        #以二进制的方式写入图片
        try:
            r = requests.get(finall_url, headers=headers).content
            with open(paths+'/' + name+'/'+name+'.jpg','wb') as f:
                f.write(r)
                print('%s 下载成功' % (name))
                time.sleep(0.5)
        except Exception:
            print('%s下载失败' % (name))
            time.sleep(0.5)

调用函数

if __name__ == '__main__':
    path = 'D:/爬虫下载'
    #爬取第5页到第7页的数据
    for i in range(5,8):
        print('==========开始第{}页的爬取=========='.format(i))
        url = 'https://sc.chinaz.com/tupian/renwutupian_{}.html'.format(i)
        pic_download(path+'/' + str(i),url)
        print('----------完成第{}页的爬取----------'.format(i))
        time.sleep(1)

完整代码:

from lxml import etree
import time
import requests
import os

#创建文件夹
def makdir(path):
    try:
        isExists = os.path.exists(path)
        #判断是否存在文件夹
        if not isExists:
            #不存在创建
            os.makedirs(path)
            print(path+'文件夹创建成功!')
        else:
            print(path+'文件夹已存在!')
    except Exception as e:
        print(e)


#获取图片
def pic_download(paths,url):
    # 请求头池
    headers = {
        
        'user-agent':"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
    }
    
    web = requests.get(url, headers=headers).content.decode()
    # print(web.encoding)
    data = etree.HTML(web)
    lists = data.xpath('//div[@class="text_left text_lefts"]/div[@id="container"]/div')
    for lis in lists:
        new_url = 'https:' + lis.xpath('./p/a/@href')[0]
        name = lis.xpath('./p/a/@alt')[0]
        #创建相应文件夹
        makdir(paths+'/' + name)
        new_web = requests.get(new_url, headers=headers).content.decode()
        new_data = etree.HTML(new_web)
        finall_url = 'https:' + new_data.xpath('//div[@class="imga"]/a/@href')[0]
        #以二进制的方式写入图片
        try:
            r = requests.get(finall_url, headers=headers).content
            with open(paths+'/' + name+'/'+name+'.jpg','wb') as f:
                f.write(r)
                print('%s 下载成功' % (name))
                time.sleep(0.5)
        except Exception:
            print('%s下载失败' % (name))
            time.sleep(0.5)
    #     #print(name)

if __name__ == '__main__':
    path = 'D:/爬虫下载'
    #爬取第5页到第7页的数据
    for i in range(5,6):
        print('==========开始第{}页的爬取=========='.format(i))
        url = 'https://sc.chinaz.com/tupian/renwutupian_{}.html'.format(i)
        pic_download(path+'/' + str(i),url)
        print('----------完成第{}页的爬取----------'.format(i))
        time.sleep(1)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值