京东酒店价格数据爬取 Python

一、酒店数据爬取

链接地址:酒店预订,酒店查询,网上订酒店【京东酒店】

1.京东的酒店搜索如下图所示,可以看到搜索框中选项有:城市、时间、酒店关键字等选项:

2.我们按下F12(开发者工具)来查看网络请求

经过查看response发现这个api请求就是拿到酒店数据的接口。

3.使用Python requests库模拟这个请求,其中cookies中的flash参数需要换成自己的,它是登录后生成的账号凭证。

通过修改data中cityId,checkInDate、checkInDate、keyword等参数等价于修改搜索框中的数据,可以获得不同的酒店数据。

import requests

cookies = {
    'flash': '换成自己的',
}

headers = {
    'origin': 'https://hotel.jd.com',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
}

params = {
    'appid': 'hotel',
    'functionId': 'hotel_getHotelRoomType',
    'loginType': '3',
    't': '1752807857575',
    'client': 'pc',
    'clientVersion': '6.0.0',
    'uuid': '17525460275561396589123',
    'h5st': '20250718110423581;gwdp93p932p0wwp7;6c8bf;tk03waf271ce118nuAysZfoIR2YTmmIf4H_J4YH869RLmGciGd85kKeWncExRRe8sLcn2oncGkBHttwvZdPS8kudFcch;fce5ff1f5a34cef46350de8c47661d57;5.1;1752807857581;smePkm8h_R3R_uHi7KrS7KrSKR3VMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmodLi_xYUHJIdMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_Mm-FJbel7a26ceX5KmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm45ISYxKa8R4VjRqcPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;3fb372aa5157c0ac8546f41aea8b58b3;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
    'eid': 'TG2IIMFKEJ3GPKTMEKNS735STGHYDUV3LGX56QOKVI4P6ODDBOBVAIF2NYSDTJO7PFRRM4Q5CDJVKKEIC625XDCKKY',
}

data = {
    'body': '{"hotelId":"10013","cityId":"36","checkInDate":"2025-07-19","checkOutDate":"2025-07-20","eKey":"QQ1gDpyrE+hZKGKqlNKhKi1RFOoGifLMNSPe4OOPJG0o7nNqGvRbMLb7M1/K+q9pabWW20jE3Ucgl6PGNBzFvWi0dNNCSBv0tqveTrHVQE8OoU3MjNZ+skWomAVdYFJeiKRq5+bzT1aeux5QZWZpum7WgRPMnv3VNA/Twq6RSCw=","eData":"ReGqK1Cj2w5zzLjw0vptBQ==","cuid":"","channel":1010}',
}

response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)

hotel_list = response.json().get("body", {}).get("list", [])
hotel_name_list = []
hotel_id_list = []
for hotel in hotel_list:
    hotel_name = hotel.get("name")
    hotel_name_list.append(hotel_name)

    hotel_id = hotel.get("hotelId")
    hotel_id_list.append(hotel_id)

print(hotel_name_list)
print(hotel_id_list)

二、房间详细数据爬取

1.通过上述代码获取到酒店id,就可以进一步使用酒店id来获取该酒店中所有房间的价格数据,这里就直接给出代码吧(同样需要替换cookies中的flash参数):

修改data中参数同理,可以获取不同酒店的房间数据,比如hotelId等。

import json

import requests

cookies = {
    'flash': '换成自己的',
}

headers = {
    'accept': 'application/json, text/javascript, */*; q=0.01',
    'accept-language': 'zh-CN,zh;q=0.9',
    'content-type': 'application/x-www-form-urlencoded',
    'origin': 'https://hotel.jd.com',
    'priority': 'u=1, i',
    'referer': 'https://hotel.jd.com/',
    'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
    'x-referer-page': 'https://hotel.jd.com/detail',
    'x-rp-client': 'h5_1.0.0',
}

params = {
    'appid': 'hotel',
    'functionId': 'hotel_getHotelRoomType',
    'loginType': '3',
    't': '1752830704826',
    'client': 'pc',
    'clientVersion': '6.0.0',
    'uuid': '17525460275561396589123',
    'h5st': '20250718172510832;gwdp93p932p0wwp7;6c8bf;tk03wb48a1be418ntRg9JLSgxggpvfBFcFwh-p6aiy9TBOumYw7ZNNGLTT0M1R9GwD4M2WGbtIaePgJn-E9HkDN-Ynut;10ff2d225d5b79b9f5c826e2fcf6b374;5.1;1752830704832;smePkm8h_R3R_uHi7KrS7KrSKR3VMuMgMuHVMusmk_sg9uMgM24WLlsmOGujMurV8arg4m4h8eYi8K7h_mIW6mLWLV4h9m4WLR7i8uLhMuMgMuHdCRIWJRHmOuMsCmsh5ubW8mYV7mYV9q4i2u7h8W7i5WYVJJLV4mrhIp4hJlsm0m8SNVHTNhImOuMsCmciBmsm0msh5lImOuMsCmsgAqLj5W3XJ9YUIxZhGlsm0mMRMusmk_MmrpJiiVIeDBoZMuMgM64TK1YW8lsmOGujMm7iAJ4ZMuMgMWoSMusmk_cPOuMs8uMgMqbi5lImOusmOGuj8qrm0msi9aHWMusmOuMsCObjOGLm8qbRMlsmOusmk_MmFNYd4mLZllbhnRKmOGLmBxoVApISMusmOuMsCurm0msg5lImOusmOGuj_uMgMSbRMlsmOusmk_Mh9uMgMWbRMlsmOusmk_siOGLm5aHWMusmOuMsCurm0msh5lImOusmOGuj1qrm0m8i5lImOusmOGujMeLj92siMuMgMqbRMlsmOusmk_siOGLmDRHmOusmOGuj5uMgMinTMusmOuMsCurm0msTMusmOuMsCurm0msV3lsmOusmkCnm0msVAZoR2ZImOuMsC6nmOGOm2laatNnQj1aR4xKZPdIUMuMgMmrSMusmOuMsztMgMunSMusmk_Mm6WrQOCrh42YUXt8g_2si9usZgt8S3xoVAJ4ZMuMgMqYR7lsmOG_Q;1511a30ddc21c06f82c45cd8817d5fc7;tenjKJKT-JoRL1YRI9MT-J4S8ZIZ61YVF94WCeHTJJoTL9cQKxIWCeYU_tXW',
    'eid': 'TG2IIMFKEJ3GPKTMEKNS735STGHYDUV3LGX56QOKVI4P6ODDBOBVAIF2NYSDTJO7PFRRM4Q5CDJVKKEIC625XDCKKY',
}

data = {
    'body': '{"hotelId":"10013","cityId":"36","checkInDate":"2025-07-19","checkOutDate":"2025-07-20","eKey":"h3rXh3hRfRdvRM1ohEtMkfRNC3YlkHqVSuNW6S7iLpfsbK2n6CEyCMum3UDBH5JnsrZSWHlVL2FetLwkemSdk1B8IuFZtTJVHrS9Vxzfb64FY175ALJoIXkxrooTHEGX73kIGs0tUMvr3d+772Oc5ykk2Zgfed0hJL/od3XS7yg=","eData":"TQFTcJnh8LiJZDfNg1NCyA==","cuid":"","channel":1010}',
}

response = requests.post('https://api.m.jd.com/api', params=params, cookies=cookies, headers=headers, data=data)

# 初始化结果字典
hotel_dict = {}
# 遍历所有房型
for room in response.json()["body"]["roomTypeRatePlanList"]:
    room_id = room["roomTypeId"]
    room_name = room["roomTypeName"]

    # 存储当前房型的所有套餐
    room_rates = {}

    # 遍历套餐
    for rate in room["ratePlanPriceList"]:
        rate_id = rate["ratePlanId"]

        # 提取价格信息
        rate_info = {
            "room_name": room_name,
            "marking_price": float(rate.get("avgManPrice", 0)),  # 标价
            "actual_price": float(rate.get("avgPrice", 0)),  # 实际价格
            "total_price": float(rate.get("totalPrice", 0)),  # 订单总价
            "has_room": rate.get("hasRoom", 0) == 1,  # 是否有房
            "remaining_rooms": rate.get("roomLimit", 0),  # 剩余可订数
            "has_breakfast": rate.get("breakfastInfo", 0) == 1,  # 是否含早
            "breakfast_desc": rate.get("breakfastDesc", ""),  # 早餐描述
        }

        # 添加到当前房型的套餐列表
        room_rates[rate_id] = rate_info

    # 添加到最终结果
    hotel_dict[f"room_{room_id}"] = room_rates

# 输出结果
print(json.dumps(hotel_dict, indent=2, ensure_ascii=False))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值