import requests
import urllib.request
import os
from pyquery import PyQuery as pq
import re
from requests_html import HTMLSession
session = HTMLSession()
url1='XXX' #源网址
r=session.get(url1)
pic1=r.html.xpath('//img/@src')
print('图片的数量: ',len(pic1))
for i in range(len(pic1)):
print('获取图片的网址:',pic1[i])
if 'http' in pic1[i]:
y=requests.get(pic1[i])
path = os.getcwd() + pic1[i].split("/")[-1]
print(path)
with open(path,"wb") as f:
f.write(y.content)
else:
print('不完整的http: ',pic1[i])
****************************************************************************
#通过正则表达式匹配
pattern = "((http):[^\s]*?(jpge|jpg|png|PNG|JPG))"
t=re.findall(pattern,r.text)
print('匹配出的图片数量:', len(t), "\n匹配出的图片连接:", t)
for i in range(len(t)):
picture=(t[i])[0].replace('\\','')
print('picture的值: ',picture)
y=requests.get(picture)
path = os.getcwd() + picture.split("/")[-1]
with open(path,"wb") as f:
f.write(y.content)
本文介绍了一种使用Python爬虫技术抓取网页上图片的方法,包括利用requests库下载图片,以及通过正则表达式匹配图片链接。文章详细展示了如何遍历图片链接,检查链接是否完整,并将图片保存到本地。
2393

被折叠的 条评论
为什么被折叠?



