京东酒店价格数据爬取 Python

一、酒店数据爬取

链接地址:酒店预订,酒店查询,网上订酒店【京东酒店】

1.京东的酒店搜索如下图所示,可以看到搜索框中选项有:城市、时间、酒店关键字等选项:

2.我们按下F12(开发者工具)来查看网络请求

经过查看response发现这个api请求就是拿到酒店数据的接口。

3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。

通过修改data中cityId,checkInDate、checkOutDate、keyword等参数等价于修改搜索框中的数据,可以获得不同的酒店数据。

import requests

cookies = {
    'flash': '换成自己的'
}

headers = {
    'accept': 'application/json, text/javascript, */*; q=0.01',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'content-type': 'application/x-www-form-urlencoded',
    'origin': 'https://hotel.jd.com',
    'priority': 'u=1, i',
    'referer': 'https://hotel.jd.com/',
    'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0',
    'x-referer-page': 'https://hotel.jd.com/list.html',
    'x-rp-client': 'h5_1.0.0',
    # 'cookie': 'unpl=JF8EAJJnNSttWRlVARNQSRNAS1oBWw0MH0cBajRRA19ZHAcMGwVPEUB7XlVdWRRKER9uYRRUVFNLXQ4ZASsSEXteXV5tC0oXBW5uBV1cWUtkNRgCKxsgS1VSVloIQhQEbFcEZFpoS1UEGgAbExJPWlRfbThLFwRvZwxkXGhKZEdPXhIaGE9fXRNdAE0fBG9uBlNeaEpkBg; __jdv=181111935|norefer|t_281_20170818001|cpc|_0_0c059cc0a16d4a5ea34bef30fb816e2a|1751883514174; __jdu=1979618262; shshshfpa=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpx=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpb=BApXSl5Rt5_JAfMbofpfbUkPBI65WnCibBhPJMjto9xJ1Mkaldo62; ipLoc-djd=8-573-577-46902; 3AB9D23F7A4B3CSS=jdd03ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3QAAAAMYFOFSLXIAAAAACQXW3EICY3DL5UX; wlfstk_smdl=k65s0uflizdry6xncaqc0y70nb5t8bys; TrackID=14cRofSfW2QGAB2zCMvmxUHq4eJioQ9vV3hcSNA7OYe4cijqZQ5oNMbYfbZJ_k17btVWYFLCm_hMWRlr83RrawhJTH5CTiEms01tH1wMK11HxXJ9E9kVZU0pYU1Jo5T7X; thor=B2A2BAF2F5FB6B1920765133A6C787C3C68BD1F57D8991C913E3C940F109A7D535D35ACA5E2E027CFDAEA1C0EA9C5EC9AB4B2EE552212F9C06BBDE9F7A3A92E90EC65F8B23D9938B96841C0312E7A06272F101066AF31D78A83775FBD683BEE3C50AB6517274E74365AB4D68E1068A2F096C621BFF37F61A12C87DFA9B587FFC072E380C1DE216A9C96CF41F95B5278ED3323C57E10F55932B7AF56A50AF5A8A; light_key=AASBKE7rOxgWQziEhC_QY6yaF6znYbkCOIAkB1ooHYzfuaCmX_iatzGG_7NO_73QsH1Ne72j; pinId=3Zl8pFgwfDihaM-NkRHXBw; pin=jd_HGKggrROVrmv; unick=jd_r8sj2650paww19; ceshi3.com=000; _tp=HnCCEwmo8xNO%2BwfzwTdpQQ%3D%3D; _pst=jd_HGKggrROVrmv; __jda=107159195.1979618262.1751883512.1751883514.1753077191.2; __jdc=107159195; flash=3_n6SqYnI4lnu8bT07DelsJXDepOkJ1DyJdVxXJzfqkSrlSFtXzZEEE-fM_Ufb-DB2GbLrKeBmguhyH-U3KcdrzNjwybEgb_dKJNnGhJ09VxrCizCNJeR-Mx3Tk3URmOuTwRELLc5PTi_LMg4lbj-Zz6DmYijCh4xj2BhiPqsjXrbuSI8EyTqz; __jdb=107159195.11.1979618262|2.1753077191; sdtoken=AAbEsBpEIOVjqTAKCQtvQu17j4z_WkPB3q5omaMmvtKJDqSOF6QOHdzzs7fHZLPh6xXTPygFVIihTJ5VYx4UJzl2nBKyLhb3Ek8jVuw4wliqfPTX4Bv4JXk9ItMKBEfemwualz60XxiFQPM; 3AB9D23F7A4B3C9B=ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}

params = {
    'appid': 'hotel',
    'functionId': 'hotel_getHotelList',
    'loginType': '3',
    't': '1753078987705',
    'client': 'pc',
    'clientVersion': '6.0.0',
    'uuid': '1979618262',
    'h5st': '20250721142313711;azaapwid9q00jww1;6c8bf;tk03w6fae1acd18nWVxhDJE3zek9L3sg01Cz6O9gK12K7dS2kzwQbf1H1eJD8GYnxY1piJLyE1CJR3L75X8LrBcakKzk;a8e902c6adbd4d03af2adc0fe5558d4f;5.1;1753078987711;smePkmci3RHU_ubS1eYU3tXWNFXWMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmfJJinJ7hApZiMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmNZIaDlbe5m5a4iLmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_sg9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj8mrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;cd58d4239fb74971ddd1e49bb924e509;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
    'eid': 'ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}

data = {
    'body': '{"keyword":"","cityId":"36","checkInDate":"2025-08-22","checkOutDate":"2025-08-23","minPrice":"","maxPrice":"","stars":"","pageSize":30,"pageNum":1,"bedType":"","amenities":"","promotions":"","themes":"","breakfast":"","order":"","agreementHotel":"0","payMode":"","poiType":"72","poiCode":"1","channel":1010}',
}

response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)

hotel_list = response.json().get("body", {}).get("list", [])
hotel_name_list = []
hotel_id_list = []
for hotel in hotel_list:
    hotel_name = hotel.get("name")
    hotel_name_list.append(hotel_name)

    hotel_id = hotel.get("hotelId")
    hotel_id_list.append(hotel_id)

print(hotel_name_list)
print(hotel_id_list)

1.运行上述代码结果如下,两个列表分别是搜索出来的酒店的名称和id列表:

二、房间详细数据爬取

1.我们随便点击一个酒店的查看详情进入酒店的详情链接。

2.我们按下F12(开发者工具)来查看网络请求

经过查看response发现这个api请求就是拿到房间详细数据的接口。

3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。

通过修改data中cityId,checkInDate、checkOutDate、hotelId等参数可以实现爬取不同酒店的房间数据。

import requests
import json

cookies = {
    'flash': '换成自己的'
}

headers = {
    'accept': 'application/json, text/javascript, */*; q=0.01',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'content-type': 'application/x-www-form-urlencoded',
    'origin': 'https://hotel.jd.com',
    'priority': 'u=1, i',
    'referer': 'https://hotel.jd.com/',
    'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0',
    'x-referer-page': 'https://hotel.jd.com/detail',
    'x-rp-client': 'h5_1.0.0',
    # 'cookie': 'unpl=JF8EAJJnNSttWRlVARNQSRNAS1oBWw0MH0cBajRRA19ZHAcMGwVPEUB7XlVdWRRKER9uYRRUVFNLXQ4ZASsSEXteXV5tC0oXBW5uBV1cWUtkNRgCKxsgS1VSVloIQhQEbFcEZFpoS1UEGgAbExJPWlRfbThLFwRvZwxkXGhKZEdPXhIaGE9fXRNdAE0fBG9uBlNeaEpkBg; __jdv=181111935|norefer|t_281_20170818001|cpc|_0_0c059cc0a16d4a5ea34bef30fb816e2a|1751883514174; __jdu=1979618262; shshshfpa=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpx=0e8a68f8-8cd0-35e6-f108-4ad9ed5a9941-1751883514; shshshfpb=BApXSl5Rt5_JAfMbofpfbUkPBI65WnCibBhPJMjto9xJ1Mkaldo62; ipLoc-djd=8-573-577-46902; 3AB9D23F7A4B3CSS=jdd03ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3QAAAAMYFOFSLXIAAAAACQXW3EICY3DL5UX; wlfstk_smdl=k65s0uflizdry6xncaqc0y70nb5t8bys; TrackID=14cRofSfW2QGAB2zCMvmxUHq4eJioQ9vV3hcSNA7OYe4cijqZQ5oNMbYfbZJ_k17btVWYFLCm_hMWRlr83RrawhJTH5CTiEms01tH1wMK11HxXJ9E9kVZU0pYU1Jo5T7X; thor=B2A2BAF2F5FB6B1920765133A6C787C3C68BD1F57D8991C913E3C940F109A7D535D35ACA5E2E027CFDAEA1C0EA9C5EC9AB4B2EE552212F9C06BBDE9F7A3A92E90EC65F8B23D9938B96841C0312E7A06272F101066AF31D78A83775FBD683BEE3C50AB6517274E74365AB4D68E1068A2F096C621BFF37F61A12C87DFA9B587FFC072E380C1DE216A9C96CF41F95B5278ED3323C57E10F55932B7AF56A50AF5A8A; light_key=AASBKE7rOxgWQziEhC_QY6yaF6znYbkCOIAkB1ooHYzfuaCmX_iatzGG_7NO_73QsH1Ne72j; pinId=3Zl8pFgwfDihaM-NkRHXBw; pin=jd_HGKggrROVrmv; unick=jd_r8sj2650paww19; ceshi3.com=000; _tp=HnCCEwmo8xNO%2BwfzwTdpQQ%3D%3D; _pst=jd_HGKggrROVrmv; __jda=107159195.1979618262.1751883512.1751883514.1753077191.2; __jdc=107159195; __jdb=107159195.15.1979618262|2.1753077191; sdtoken=AAbEsBpEIOVjqTAKCQtvQu17xRg23H79PWS0dzKYpbim_PQdYkiwoptQfoe4rrp-XpP4tA65zmQJxpkgyvlW7Lws9T221yofvr6nYJ-_5SwBjAGgZwzr60kcX2HagvP8tWDAH547wN5pTmo; flash=3_i98Uq_neSro_igyIyFmsiI5EhEojm5ok4Igy9SS6qu-m6JOojUrE28-42WmALMDtNKyjb9LkWSWKlGsY8Q3r2UMhCK-o660v4F8qDetBkHkXGtQS404-LiQSLiEEeS6pM-Kyqp0_6ENZ8_gNy38Z2Aql-ZN97H94WpqPjG7Ulni260X7Dycz; 3AB9D23F7A4B3C9B=ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}

params = {
    'appid': 'hotel',
    'functionId': 'hotel_getHotelRoomType',
    'loginType': '3',
    't': '1753079682910',
    'client': 'pc',
    'clientVersion': '6.0.0',
    'uuid': '1979618262',
    'h5st': '20250721143448914;azaapwid9q00jww1;6c8bf;tk03w6fae1acd18nWVxhDJE3zek9L3sg01Cz6O9gK12K7dS2kzwQbf1H1eJD8GYnxY1piJLyE1CJR3L75X8LrBcakKzk;32094aec00d4625fae5d28cf4e065c5a;5.1;1753079682914;smePkmci3RHU_ubS1eYU3tXWNFXWMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmcBKi_S6W-ZKVMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmdFpdBlLUBlrbiZImOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;f6aa75e945947e622fbdb65bd9db5ac6;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
    'eid': 'ZZYMC5EILHGA7TP67A4MXYLL7XDEMPNPEDU462POPSR7ICGITWX6ZZRMPTQSXIFJ6O5LEEUGZIF2BQO4737353XC3Q',
}

data = {
    'body': '{"hotelId":"3016387","cityId":"36","checkInDate":"2025-07-22","checkOutDate":"2025-07-23","eKey":"en+vs+TN5YKgRViuXe3uN34pzwc0V2gNoMSVB0O7nJeccq8moMbX3Z4wZDk6D/5D8BL85Die61UalcWCuEKHfNVp9UOu0yd+EAo1CvU7hHkphxQ5mGOYFFJpBI3NNmBvbIMmbVydxx5EzXB+C2wZWYnXTtJcGicLgycM6hwxS/A=","eData":"bABwAYzTmaX7TmTCgQGKdg==","cuid":"","channel":1010}',
}

response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)

# 初始化结果字典
hotel_dict = {}
# 遍历所有房型
for room in response.json()["body"]["roomTypeRatePlanList"]:
    room_id = room["roomTypeId"]
    room_name = room["roomTypeName"]

    # 存储当前房型的所有套餐
    room_rates = {}

    # 遍历套餐
    for rate in room["ratePlanPriceList"]:
        rate_id = rate["ratePlanId"]

        # 提取价格信息
        rate_info = {
            "room_name": room_name,
            "marking_price": float(rate.get("avgManPrice", 0)),  # 标价
            "actual_price": float(rate.get("avgPrice", 0)),  # 实际价格
            "total_price": float(rate.get("totalPrice", 0)),  # 订单总价
            "has_room": rate.get("hasRoom", 0) == 1,  # 是否有房
            "remaining_rooms": rate.get("roomLimit", 0),  # 剩余可订数
            "has_breakfast": rate.get("breakfastInfo", 0) == 1,  # 是否含早
            "breakfast_desc": rate.get("breakfastDesc", ""),  # 早餐描述
        }

        # 添加到当前房型的套餐列表
        room_rates[rate_id] = rate_info

    # 添加到最终结果
    hotel_dict[f"room_{room_id}"] = room_rates

# 输出结果
print(json.dumps(hotel_dict, indent=2, ensure_ascii=False))

4.运行结果如下:

三、总结

将上述两个步骤合在一起就可以实现爬取某个酒店的房间详细数据,具体实现可以自行组合,如有需求也可以私信联系UP主。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值