import requests
import json
from openpyxl import Workbook, load_workbook
url = "https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=2725fe7b-b2c2-4769-8f6f" \
"-c95f04c70275"
headers = {
"bookTagId": "2725fe7b-b2c2-4769-8f6f-c95f04c70275"
}
resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'
res = json.loads(resp.text)
results = res['data']
# 创建一个xlsx文件对象
workbook = Workbook()
worksheets = workbook.worksheets[0]
worksheets.append(['bookName(书名)', 'bookId(书本ID)', "picPath"])
workbook2 = Workbook()
worksheets2 = workbook2.worksheets[0]
worksheets2.append(['bookName(书名)', 'discountPrice(价格)', "num(库存)"])
for result in results:
name = result['bookName']
worksheets.append([result['bookName'], result['bookId'], result['picPath']])
workbook.save("计算机新书基本信息.xlsx")
result = result['bookId']
url = "https://www.ptpress.com.cn/bookinfo/getBookDetailsById"
data = {
'bookId': result
}
resp = requests.post(url, data=data)
# print(resp)
tans_dict = json.loads(resp.text)
# 获取到价格
# print(tans_dict['data']['discountPrice'])
url = "https://www.ptpress.com.cn/order/getBookSaleStock"
isbn = tans_dict['data']['photos'][0]['isbn']
data = {'isbn': isbn}
resp = requests.post(url, data=data)
tans_dict1 = json.loads(resp.text)
# 获取到库存信息
worksheets2.append([name, tans_dict['data']['discountPrice'], tans_dict1['data']])
workbook2.save('计算机类新书价格库存信息表.xlsx')
利用Requests爬取图书信息
最新推荐文章于 2023-01-12 09:28:24 发布
本文介绍了一个使用 Python 的 requests 库从指定网站爬取图书基本信息、价格及库存信息的方法,并将这些信息分别保存到两个 Excel 文件中。首先通过 GET 请求获取图书列表,然后逐条抓取详细信息并 POST 请求获取价格和库存。
1199

被折叠的 条评论
为什么被折叠?



