一、酒店数据爬取
1.京东的酒店搜索如下图所示,可以看到搜索框中选项有:城市、时间、酒店关键字等选项:

2.我们按下F12(开发者工具)来查看网络请求
经过查看response发现这个api请求就是拿到酒店数据的接口。

3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。
通过修改data中cityId,checkInDate、checkOutDate、keyword等参数等价于修改搜索框中的数据,可以获得不同的酒店数据。
import requests
cookies = {
'flash': '换成自己的'
}
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://hotel.jd.com',
'priority': 'u=1, i',
'referer': 'https://hotel.jd.com/',
'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0',
'x-referer-page': 'https://hotel.jd.com/list.html',
'x-rp-client': 'h5_1.0.0',
# 'cookie': 'unpl=JF8EAJJnNSttWRlVARNQSRNAS1oBWw0MH0cBajRRA19ZHAcMGwVPEUB7XlVdWRRKER9uYRRUVFNLXQ4ZASsSEXteXV5tC0oXBW5uBV1cWUtkNRgCKxsgS1VSVloIQhQEbFcEZFpoS1UEGgAbExJPWlRfbThLFwRvZwxkXGhKZEdPXhIaGE9fXRNdAE0fBG9uBlNeaEpkBg; __jdv=181111935|norefer|t_281_20170818001|cpc|_0_0c059cc0a16d4a5ea34bef30fb816e2a|1751883514174; __jdu=1979618262; shshshfpa=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpx=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpb=BApXSl5Rt5_JAfMbofpfbUkPBI65WnCibBhPJMjto9xJ1Mkaldo62; ipLoc-djd=8-573-577-46902; 3AB9D23F7A4B3CSS=jdd03ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3QAAAAMYFOFSLXIAAAAACQXW3EICY3DL5UX; wlfstk_smdl=k65s0uflizdry6xncaqc0y70nb5t8bys; TrackID=14cRofSfW2QGAB2zCMvmxUHq4eJioQ9vV3hcSNA7OYe4cijqZQ5oNMbYfbZJ_k17btVWYFLCm_hMWRlr83RrawhJTH5CTiEms01tH1wMK11HxXJ9E9kVZU0pYU1Jo5T7X; thor=B2A2BAF2F5FB6B1920765133A6C787C3C68BD1F57D8991C913E3C940F109A7D535D35ACA5E2E027CFDAEA1C0EA9C5EC9AB4B2EE552212F9C06BBDE9F7A3A92E90EC65F8B23D9938B96841C0312E7A06272F101066AF31D78A83775FBD683BEE3C50AB6517274E74365AB4D68E1068A2F096C621BFF37F61A12C87DFA9B587FFC072E380C1DE216A9C96CF41F95B5278ED3323C57E10F55932B7AF56A50AF5A8A; light_key=AASBKE7rOxgWQziEhC_QY6yaF6znYbkCOIAkB1ooHYzfuaCmX_iatzGG_7NO_73QsH1Ne72j; pinId=3Zl8pFgwfDihaM-NkRHXBw; pin=jd_HGKggrROVrmv; unick=jd_r8sj2650paww19; ceshi3.com=000; _tp=HnCCEwmo8xNO%2BwfzwTdpQQ%3D%3D; _pst=jd_HGKggrROVrmv; __jda=107159195.1979618262.1751883512.1751883514.1753077191.2; __jdc=107159195; flash=3_n6SqYnI4lnu8bT07DelsJXDepOkJ1DyJdVxXJzfqkSrlSFtXzZEEE-fM_Ufb-DB2GbLrKeBmguhyH-U3KcdrzNjwybEgb_dKJNnGhJ09VxrCizCNJeR-Mx3Tk3URmOuTwRELLc5PTi_LMg4lbj-Zz6DmYijCh4xj2BhiPqsjXrbuSI8EyTqz; __jdb=107159195.11.1979618262|2.1753077191; sdtoken=AAbEsBpEIOVjqTAKCQtvQu17j4z_WkPB3q5omaMmvtKJDqSOF6QOHdzzs7fHZLPh6xXTPygFVIihTJ5VYx4UJzl2nBKyLhb3Ek8jVuw4wliqfPTX4Bv4JXk9ItMKBEfemwualz60XxiFQPM; 3AB9D23F7A4B3C9B=ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}
params = {
'appid': 'hotel',
'functionId': 'hotel_getHotelList',
'loginType': '3',
't': '1753078987705',
'client': 'pc',
'clientVersion': '6.0.0',
'uuid': '1979618262',
'h5st': '20250721142313711;azaapwid9q00jww1;6c8bf;tk03w6fae1acd18nWVxhDJE3zek9L3sg01Cz6O9gK12K7dS2kzwQbf1H1eJD8GYnxY1piJLyE1CJR3L75X8LrBcakKzk;a8e902c6adbd4d03af2adc0fe5558d4f;5.1;1753078987711;smePkmci3RHU_ubS1eYU3tXWNFXWMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmfJJinJ7hApZiMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmNZIaDlbe5m5a4iLmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_sg9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj8mrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;cd58d4239fb74971ddd1e49bb924e509;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
'eid': 'ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}
data = {
'body': '{"keyword":"","cityId":"36","checkInDate":"2025-08-22","checkOutDate":"2025-08-23","minPrice":"","maxPrice":"","stars":"","pageSize":30,"pageNum":1,"bedType":"","amenities":"","promotions":"","themes":"","breakfast":"","order":"","agreementHotel":"0","payMode":"","poiType":"72","poiCode":"1","channel":1010}',
}
response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)
hotel_list = response.json().get("body", {}).get("list", [])
hotel_name_list = []
hotel_id_list = []
for hotel in hotel_list:
hotel_name = hotel.get("name")
hotel_name_list.append(hotel_name)
hotel_id = hotel.get("hotelId")
hotel_id_list.append(hotel_id)
print(hotel_name_list)
print(hotel_id_list)
1.运行上述代码结果如下,两个列表分别是搜索出来的酒店的名称和id列表:

二、房间详细数据爬取
1.我们随便点击一个酒店的查看详情进入酒店的详情链接。

2.我们按下F12(开发者工具)来查看网络请求
经过查看response发现这个api请求就是拿到房间详细数据的接口。

3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。
通过修改data中cityId,checkInDate、checkOutDate、hotelId等参数可以实现爬取不同酒店的房间数据。
import requests
import json
cookies = {
'flash': '换成自己的'
}
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://hotel.jd.com',
'priority': 'u=1, i',
'referer': 'https://hotel.jd.com/',
'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0',
'x-referer-page': 'https://hotel.jd.com/detail',
'x-rp-client': 'h5_1.0.0',
# 'cookie': 'unpl=JF8EAJJnNSttWRlVARNQSRNAS1oBWw0MH0cBajRRA19ZHAcMGwVPEUB7XlVdWRRKER9uYRRUVFNLXQ4ZASsSEXteXV5tC0oXBW5uBV1cWUtkNRgCKxsgS1VSVloIQhQEbFcEZFpoS1UEGgAbExJPWlRfbThLFwRvZwxkXGhKZEdPXhIaGE9fXRNdAE0fBG9uBlNeaEpkBg; __jdv=181111935|norefer|t_281_20170818001|cpc|_0_0c059cc0a16d4a5ea34bef30fb816e2a|1751883514174; __jdu=1979618262; shshshfpa=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpx=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpb=BApXSl5Rt5_JAfMbofpfbUkPBI65WnCibBhPJMjto9xJ1Mkaldo62; ipLoc-djd=8-573-577-46902; 3AB9D23F7A4B3CSS=jdd03ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3QAAAAMYFOFSLXIAAAAACQXW3EICY3DL5UX; wlfstk_smdl=k65s0uflizdry6xncaqc0y70nb5t8bys; TrackID=14cRofSfW2QGAB2zCMvmxUHq4eJioQ9vV3hcSNA7OYe4cijqZQ5oNMbYfbZJ_k17btVWYFLCm_hMWRlr83RrawhJTH5CTiEms01tH1wMK11HxXJ9E9kVZU0pYU1Jo5T7X; thor=B2A2BAF2F5FB6B1920765133A6C787C3C68BD1F57D8991C913E3C940F109A7D535D35ACA5E2E027CFDAEA1C0EA9C5EC9AB4B2EE552212F9C06BBDE9F7A3A92E90EC65F8B23D9938B96841C0312E7A06272F101066AF31D78A83775FBD683BEE3C50AB6517274E74365AB4D68E1068A2F096C621BFF37F61A12C87DFA9B587FFC072E380C1DE216A9C96CF41F95B5278ED3323C57E10F55932B7AF56A50AF5A8A; light_key=AASBKE7rOxgWQziEhC_QY6yaF6znYbkCOIAkB1ooHYzfuaCmX_iatzGG_7NO_73QsH1Ne72j; pinId=3Zl8pFgwfDihaM-NkRHXBw; pin=jd_HGKggrROVrmv; unick=jd_r8sj2650paww19; ceshi3.com=000; _tp=HnCCEwmo8xNO%2BwfzwTdpQQ%3D%3D; _pst=jd_HGKggrROVrmv; __jda=107159195.1979618262.1751883512.1751883514.1753077191.2; __jdc=107159195; __jdb=107159195.15.1979618262|2.1753077191; sdtoken=AAbEsBpEIOVjqTAKCQtvQu17xRg23H79PWS0dzKYpbim_PQdYkiwoptQfoe4rrp-XpP4tA65zmQJxpkgyvlW7Lws9T221yofvr6nYJ-_5SwBjAGgZwzr60kcX2HagvP8tWDAH547wN5pTmo; flash=3_i98Uq_neSro_igyIyFmsiI5EhEojm5ok4Igy9SS6qu-m6JOojUrE28-42WmALMDtNKyjb9LkWSWKlGsY8Q3r2UMhCK-o660v4F8qDetBkHkXGtQS404-LiQSLiEEeS6pM-Kyqp0_6ENZ8_gNy38Z2Aql-ZN97H94WpqPjG7Ulni260X7Dycz; 3AB9D23F7A4B3C9B=ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}
params = {
'appid': 'hotel',
'functionId': 'hotel_getHotelRoomType',
'loginType': '3',
't': '1753079682910',
'client': 'pc',
'clientVersion': '6.0.0',
'uuid': '1979618262',
'h5st': '20250721143448914;azaapwid9q00jww1;6c8bf;tk03w6fae1acd18nWVxhDJE3zek9L3sg01Cz6O9gK12K7dS2kzwQbf1H1eJD8GYnxY1piJLyE1CJR3L75X8LrBcakKzk;32094aec00d4625fae5d28cf4e065c5a;5.1;1753079682914;smePkmci3RHU_ubS1eYU3tXWNFXWMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmcBKi_S6W-ZKVMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmdFpdBlLUBlrbiZImOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;f6aa75e945947e622fbdb65bd9db5ac6;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
'eid': 'ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}
data = {
'body': '{"hotelId":"3016387","cityId":"36","checkInDate":"2025-07-22","checkOutDate":"2025-07-23","eKey":"en+vs+TN5YKgRViuXe3uN34pzwc0V2gNoMSVB0O7nJeccq8moMbX3Z4wZDk6D/5D8BL85Die61UalcWCuEKHfNVp9UOu0yd+EAo1CvU7hHkphxQ5mGOYFFJpBI3NNmBvbIMmbVydxx5EzXB+C2wZWYnXTtJcGicLgycM6hwxS/A=","eData":"bABwAYzTmaX7TmTCgQGKdg==","cuid":"","channel":1010}',
}
response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)
# 初始化结果字典
hotel_dict = {}
# 遍历所有房型
for room in response.json()["body"]["roomTypeRatePlanList"]:
room_id = room["roomTypeId"]
room_name = room["roomTypeName"]
# 存储当前房型的所有套餐
room_rates = {}
# 遍历套餐
for rate in room["ratePlanPriceList"]:
rate_id = rate["ratePlanId"]
# 提取价格信息
rate_info = {
"room_name": room_name,
"marking_price": float(rate.get("avgManPrice", 0)), # 标价
"actual_price": float(rate.get("avgPrice", 0)), # 实际价格
"total_price": float(rate.get("totalPrice", 0)), # 订单总价
"has_room": rate.get("hasRoom", 0) == 1, # 是否有房
"remaining_rooms": rate.get("roomLimit", 0), # 剩余可订数
"has_breakfast": rate.get("breakfastInfo", 0) == 1, # 是否含早
"breakfast_desc": rate.get("breakfastDesc", ""), # 早餐描述
}
# 添加到当前房型的套餐列表
room_rates[rate_id] = rate_info
# 添加到最终结果
hotel_dict[f"room_{room_id}"] = room_rates
# 输出结果
print(json.dumps(hotel_dict, indent=2, ensure_ascii=False))
4.运行结果如下:

三、总结
将上述两个步骤合在一起就可以实现爬取某个酒店的房间详细数据,具体实现可以自行组合,如有需求也可以私信联系UP主。
2617

被折叠的 条评论
为什么被折叠?



