一、酒店数据爬取
1.京东的酒店搜索如下图所示,可以看到搜索框中选项有:城市、时间、酒店关键字等选项:
2.我们按下F12(开发者工具)来查看网络请求
经过查看response发现这个api请求就是拿到酒店数据的接口。
3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。
通过修改data中cityId,checkInDate、checkInDate、keyword等参数等价于修改搜索框中的数据,可以获得不同的酒店数据。
import requests
cookies = {
'flash': '换成自己的',
}
headers = {
'origin': 'https://hotel.jd.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
}
params = {
'appid': 'hotel',
'functionId': 'hotel_getHotelRoomType',
'loginType': '3',
't': '1752807857575',
'client': 'pc',
'clientVersion': '6.0.0',
'uuid': '17525460275561396589123',
'h5st': '20250718110423581;gwdp93p932p0wwp7;6c8bf;tk03waf271ce118nuAysZfoIR2YTmmIf4H_J4YH869RLmGciGd85kKeWncExRRe8sLcn2oncGkBHttwvZdPS8kudFcch;fce5ff1f5a34cef46350de8c47661d57;5.1;1752807857581;smePkm8h_R3R_uHi7KrS7KrSKR3VMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmodLi_xYUHJIdMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_Mm-FJbel7a26ceX5KmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;3fb372aa5157c0ac8546f41aea8b58b3;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
'eid': 'TG2IIMFKEJ3GPKTMEKNS735STGHYDUV3LGX56QOKVI4P6ODDBOBVAIF2NYSDTJO7PFRRM4Q5CDJVKKEIC625XDCKKY',
}
data = {
'body': '{"hotelId":"10013","cityId":"36","checkInDate":"2025-07-19","checkOutDate":"2025-07-20","eKey":"QQ1gDpyrE+hZKGKqlNKhKi1RFOoGifLMNSPe4OOPJG0o7nNqGvRbMLb7M1/K+q9pabWW20jE3Ucgl6PGNBzFvWi0dNNCSBv0tqveTrHVQE8OoU3MjNZ+skWomAVdYFJeiKRq5+bzT1aeux5QZWZpum7WgRPMnv3VNA/Twq6RSCw=","eData":"ReGqK1Cj2w5zzLjw0vptBQ==","cuid":"","channel":1010}',
}
response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)
hotel_list = response.json().get("body", {}).get("list", [])
hotel_name_list = []
hotel_id_list = []
for hotel in hotel_list:
hotel_name = hotel.get("name")
hotel_name_list.append(hotel_name)
hotel_id = hotel.get("hotelId")
hotel_id_list.append(hotel_id)
print(hotel_name_list)
print(hotel_id_list)
二、房间详细数据爬取
1.通过上述代码获取到酒店id,就可以进一步使用酒店id来获取该酒店中所有房间的价格数据,这里就直接给出代码吧(同样需要替换cookies中的flash参数):
修改data中参数同理,可以获取不同酒店的房间数据,比如hotelId等。
import json
import requests
cookies = {
'flash': '换成自己的',
}
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://hotel.jd.com',
'priority': 'u=1, i',
'referer': 'https://hotel.jd.com/',
'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
'x-referer-page': 'https://hotel.jd.com/detail',
'x-rp-client': 'h5_1.0.0',
}
params = {
'appid': 'hotel',
'functionId': 'hotel_getHotelRoomType',
'loginType': '3',
't': '1752830704826',
'client': 'pc',
'clientVersion': '6.0.0',
'uuid': '17525460275561396589123',
'h5st': '20250718172510832;gwdp93p932p0wwp7;6c8bf;tk03wb48a1be418ntRg9JLSgxggpvfBFcFwh-p6aiy9TBOumYw7ZNNGLTT0M1R9GwD4M2WGbtIaePgJn-E9HkDN-Ynut;10ff2d225d5b79b9f5c826e2fcf6b374;5.1;1752830704832;smePkm8h_R3R_uHi7KrS7KrSKR3VMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmrpJiiVIeDBoZMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmFNYd4mLZllbhnRKmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm2laatNnQj1aR4xKZPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;1511a30ddc21c06f82c45cd8817d5fc7;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
'eid': 'TG2IIMFKEJ3GPKTMEKNS735STGHYDUV3LGX56QOKVI4P6ODDBOBVAIF2NYSDTJO7PFRRM4Q5CDJVKKEIC625XDCKKY',
}
data = {
'body': '{"hotelId":"10013","cityId":"36","checkInDate":"2025-07-19","checkOutDate":"2025-07-20","eKey":"h3rXh3hRfRdvRM1ohEtMkfRNC3YlkHqVSuNW6S7iLpfsbK2n6CEyCMum3UDBH5JnsrZSWHlVL2FetLwkemSdk1B8IuFZtTJVHrS9Vxzfb64FY175ALJoIXkxrooTHEGX73kIGs0tUMvr3d+772Oc5ykk2Zgfed0hJL/od3XS7yg=","eData":"TQFTcJnh8LiJZDfNg1NCyA==","cuid":"","channel":1010}',
}
response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)
# 初始化结果字典
hotel_dict = {}
# 遍历所有房型
for room in response.json()["body"]["roomTypeRatePlanList"]:
room_id = room["roomTypeId"]
room_name = room["roomTypeName"]
# 存储当前房型的所有套餐
room_rates = {}
# 遍历套餐
for rate in room["ratePlanPriceList"]:
rate_id = rate["ratePlanId"]
# 提取价格信息
rate_info = {
"room_name": room_name,
"marking_price": float(rate.get("avgManPrice", 0)), # 标价
"actual_price": float(rate.get("avgPrice", 0)), # 实际价格
"total_price": float(rate.get("totalPrice", 0)), # 订单总价
"has_room": rate.get("hasRoom", 0) == 1, # 是否有房
"remaining_rooms": rate.get("roomLimit", 0), # 剩余可订数
"has_breakfast": rate.get("breakfastInfo", 0) == 1, # 是否含早
"breakfast_desc": rate.get("breakfastDesc", ""), # 早餐描述
}
# 添加到当前房型的套餐列表
room_rates[rate_id] = rate_info
# 添加到最终结果
hotel_dict[f"room_{room_id}"] = room_rates
# 输出结果
print(json.dumps(hotel_dict, indent=2, ensure_ascii=False))