***
已删除
***
code:
import requests
import re
def craw(url, page):
# url = "https://list.jd.com/list.html?cat=1713,3258,3317"
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Mobile Safari/537.36",
}
response = requests.get(url, headers=headers, timeout=10)
html = response.text
# print(html)
urls = re.findall(
r'<div class="p-img".*?img width="200" height="200" data-img="1" src="//(img.*?\.360buyimg.com/.*?\.jpg).*?</a>',
html, re.S)
x = 1
for b_url in urls:
p_url = "https://" + str(b_url)
# print(b_url)
print(p_url)
filename = "C:/Users/XIEG2/Desktop/ll/" + str(page) +"--" + str(x) + ".jpg"
fb = open(filename, 'wb+')
req = requests.get(p_url)
# print(req.encoding)
# print(req.text)
fb.write(req.content)
fb.close()
x += 1
# 选择爬取页数
def craw_page(page):
for i in range(1, page):
url = "https://list.jd.com/list.html?cat=1713,3258,3317&page=" + str(i)
craw(url, i)
craw_page(10)
本文介绍如何利用requests库爬取京东网站上的图书历史图片,并将其下载保存到本地。通过实例代码详细讲解了爬虫实现过程。
1878

被折叠的 条评论
为什么被折叠?



