爬取狗狗图片-优快云博客

本文介绍了一种使用Python的requests和re模块从百度图片搜索中抓取特定品种狗狗图片的方法，并将其保存到本地指定路径。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import requests
import re

#哈士奇访问地址
# urls = 'https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%B9%FE%CA%BF%C6%E6&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=111111'
#德国牧羊犬访问地址
urls = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1553158259919_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&hs=2&word=德国牧羊犬'
# response = '<a class="pull-rs" title="查看 2哈"  target="_self"  href="/search/index?ct=201326592&cl=2&st=-1&lm=-1&nc=1&ie=utf-8&tn=baiduimage&ipn=r&rps=1&pv=&fm=rs1&word=2%E5%93%88&hs=2&oriquery=%25E5%2593%2588%25E5%25A3%25AB%25E5%25A5%2587&ofr=%25E5%2593%2588%25E5%25A3%25AB%25E5%25A5%2587&sensitive=0">'
response = requests.get(urls)
response.encoding  = 'utf-8' #text格式，网页源码里看，不然中文会乱码
# print(response.text)
html = response.text

scrap_urls = re.findall(r'"thumbURL":"(.*?)"',html) #正则表达式-查找规则
#print(scrapys)

    #按序号
path_name = r'F:\jupyternb\keras\zhengzhengclass\scrapy\scrap_img'
d_type = ['哈士奇', '德国牧羊犬', '拉布拉多', '萨摩耶犬']
i = 0
for index,scrap_url in enumerate(scrap_urls):
    response = requests.get(scrap_url)
    i += 1
    if i>28:
        break
    with open('%s/%s/0_%s.%s' %(path_name,d_type[1],index,scrap_url.split('.')[-1]),'wb') as f:
        f.write(response.content)