Python获取图片保存对应的文件夹

Python_QB

已于 2022-07-02 22:22:01 修改

阅读量5k

点赞数 3

文章标签： python 爬虫

于 2022-07-02 21:12:00 首次发布

本文链接：https://blog.youkuaiyun.com/qq_33267306/article/details/125577581

版权

首先创建的文件夹，先定义一个创建文件夹的函数：

def makdir(path):
    try:
        isExists = os.path.exists(path)
        #判断是否存在文件夹
        if not isExists:
            #不存在创建
            os.makedirs(path)
            print(path+'文件夹创建成功！')
        else:
            print(path+'文件夹已存在！')
    except Exception as e:
        print(e)

爬取图片并保存图片函数

def pic_download(paths,url):
    # 请求头池
    headers = {
        
        'user-agent':"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
    }
    
    web = requests.get(url, headers=headers).content.decode()
    # print(web.encoding)
    data = etree.HTML(web)
    lists = data.xpath('//div[@class="text_left text_lefts"]/div[@id="container"]/div')
    for lis in lists:
        new_url = 'https:' + lis.xpath('./p/a/@href')[0]
        name = lis.xpath('./p/a/@alt')[0]
        #创建相应文件夹
        makdir(paths+'/' + name)
        new_web = requests.get(new_url, headers=headers).content.decode()
        new_data = etree.HTML(new_web)
        finall_url = 'https:' + new_data.xpath('//div[@class="imga"]/a/@href')[0]
        #以二进制的方式写入图片
        try:
            r = requests.get(finall_url, headers=headers).content
            with open(paths+'/' + name+'/'+name+'.jpg','wb') as f:
                f.write(r)
                print('%s 下载成功' % (name))
                time.sleep(0.5)
        except Exception:
            print('%s下载失败' % (name))
            time.sleep(0.5)

调用函数

if __name__ == '__main__':
    path = 'D:/爬虫下载'
    #爬取第5页到第7页的数据
    for i in range(5,8):
        print('==========开始第{}页的爬取=========='.format(i))
        url = 'https://sc.chinaz.com/tupian/renwutupian_{}.html'.format(i)
        pic_download(path+'/' + str(i),url)
        print('----------完成第{}页的爬取----------'.format(i))
        time.sleep(1)

完整代码：

from lxml import etree
import time
import requests
import os

#创建文件夹
def makdir(path):
    try:
        isExists = os.path.exists(path)
        #判断是否存在文件夹
        if not isExists:
            #不存在创建
            os.makedirs(path)
            print(path+'文件夹创建成功！')
        else:
            print(path+'文件夹已存在！')
    except Exception as e:
        print(e)


#获取图片
def pic_download(paths,url):
    # 请求头池
    headers = {
        
        'user-agent':"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
    }
    
    web = requests.get(url, headers=headers).content.decode()
    # print(web.encoding)
    data = etree.HTML(web)
    lists = data.xpath('//div[@class="text_left text_lefts"]/div[@id="container"]/div')
    for lis in lists:
        new_url = 'https:' + lis.xpath('./p/a/@href')[0]
        name = lis.xpath('./p/a/@alt')[0]
        #创建相应文件夹
        makdir(paths+'/' + name)
        new_web = requests.get(new_url, headers=headers).content.decode()
        new_data = etree.HTML(new_web)
        finall_url = 'https:' + new_data.xpath('//div[@class="imga"]/a/@href')[0]
        #以二进制的方式写入图片
        try:
            r = requests.get(finall_url, headers=headers).content
            with open(paths+'/' + name+'/'+name+'.jpg','wb') as f:
                f.write(r)
                print('%s 下载成功' % (name))
                time.sleep(0.5)
        except Exception:
            print('%s下载失败' % (name))
            time.sleep(0.5)
    #     #print(name)

if __name__ == '__main__':
    path = 'D:/爬虫下载'
    #爬取第5页到第7页的数据
    for i in range(5,6):
        print('==========开始第{}页的爬取=========='.format(i))
        url = 'https://sc.chinaz.com/tupian/renwutupian_{}.html'.format(i)
        pic_download(path+'/' + str(i),url)
        print('----------完成第{}页的爬取----------'.format(i))
        time.sleep(1)