爬取百度图片

最新推荐文章于 2024-07-07 09:46:06 发布

瑾admin

最新推荐文章于 2024-07-07 09:46:06 发布

阅读量527

点赞数

CC 4.0 BY-SA版权

分类专栏： python 计算机

本文链接：https://blog.youkuaiyun.com/qq_36607551/article/details/80792702

python 同时被 2 个专栏收录

19 篇文章

订阅专栏

计算机

10 篇文章

订阅专栏

本文介绍了一种使用Python爬取百度图片的方法，并提供了完整的代码示例。通过调整参数可以实现不同关键词和页数的图片爬取，最后将图片保存为ZIP文件。

import requests
import os
import zipfile

'''
爬取百度图片
'''

imgnum = 0

def get_url(starpagenum,keyword):
    params={
    'tn': 'resultjson_com',
    'ipn': 'rj',
    'ct': '201326592',
    'is': '',
    'fp': 'result',
    'cl': '2',
    'lm': '-1',
    'ie': 'utf-8',
    'oe': 'utf-8',
    'adpicid': '',
    'st': '-1',
    'z': '',
    'ic':' 0',
    'queryWord': keyword,
    'word': keyword,
    's': '',
    'se': '',
    'tab':'',
    'width': '',
    'height': '',
    'face': '0',
    'istype':'2',
    'qc': '',
    'nc': '1',
    'fr':'',
    'pn': starpagenum+30,
    'rn': '30',
        }
    url = 'https://image.baidu.com/search/acjson'

    request = requests.get(url,params=params)
    request.encoding="utf-8"
    json =request.json().get('data')
    for item in json:
        if item == {}:
            return
        yield{
            'url':item.get('thumbURL'),
            'name':item.get('fromPageTitleEnc')[1:10]
        }

def save_imgs(imgs):
    filepath = r"E:\python\pythonzong\爬虫\request\imgbaidutu\\"
    for item in imgs:
        url = item.get('url')
        name = ''.join(item.get('name').split()).replace('*','')
        global imgnum
        imgnum += 1
        filename = filepath+str(str(imgnum)+name+".jpg")
        print(filename)

        request = requests.get(url)
        with open(filename,'wb') as file:
            file.write(request.content)
        
def save_zip(newpath,path):
    print("开始压缩文件")
    with zipfile.ZipFile(newpath,'w',zipfile.ZIP_DEFLATED) as zp:
        for path, fileparename, filename in os.walk(path):      #path fileparename  filename 路劲，文件夹名，文件名
            for name in filename:
                zp.write(os.path.join(path,name))               # 把要压缩的文件写入压缩
    print("压缩文件成功")

if __name__ == '__main__':
    keyword = "美女"
    for num in range(0,10):
        save_imgs(get_url(num,keyword))

    path = r"E:\python\pythonzong\爬虫\request\imgbaidutu"
    zippath = "E:\\python\\pythonzong\\爬虫\\request\\"+keyword+".zip"
    save_zip(zippath,path)

这里我们可以换关键字和页数都是自己换的