import urllib.request
import re
import os
def handle_content(request):
response = urllib.request.urlopen(request)
html = response.read().decode('utf-8')
pattern = re.compile(r'<div class="thumb">.*?</div>', re.S)
ret_list = pattern.findall(html)
img_list = []
for i in ret_list:
x = re.compile(r'img src="(.*?)" alt', re.S)
y = x.findall(i)
for src in y:
img_path = 'http:' + src
img_list.append(img_path)
# print(os.path.basename(img_path))
save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)) + '\qiubai',os.path.basename(img_path))
file1 = urllib.request.urlopen(img_path)
file2 = open(save_path,'wb')
file2.write(file1.read())
file2.close()
print(os.path.basename(img_path)+'下载成功')
# print(img_list)
# 构建请求对象,并且返回请求对象
def handle_url(url,page):
url = url + str(page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}
request = urllib.request.Request(url,headers=headers)
return request
def main():
url = 'https://www.qiushibaike.com/pic/page/'
start_page = int(input('请输入抓取的起始页:'))
end_page = int(input('请输入抓取的结束页:'))
for page in range(start_page,end_page + 1):
# 拼接url,生成一个request
request = handle_url(url,page)
#获取内容,处理内容
handle_content(request)
if __name__ == '__main__':
main()
Python爬虫之urllib批量下载图片
最新推荐文章于 2025-05-15 12:04:46 发布