20python美团数据(未成功)

本文介绍了一种使用Python爬取美团网站上特定区域店铺ID的方法。通过发送POST请求并解析JSON响应,实现了自动化获取店铺名称、类别、ID及位置信息,并将其保存至CSV文件。文章详细展示了代码实现过程,包括请求头设置、参数构造、数据解析及错误处理。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# coding=utf-8
import csv
import time
import requests
import json


# 区域店铺id ct_Poi cateName抓取,传入参数为区域id
def crow_id(areaid):
    id_list = []
    url = 'https://meishi.meituan.com/i/api/channel/deal/list'
    head = {'Host': 'meishi.meituan.com',
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Referer': 'https://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36',
            'Cookie': 'XXXXXXXXXXXXXX'
            }
   # p = {'https': 'https://127.0.0.1:8080'}
    data = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
            "partner": 126, "riskLevel": 1, "optimusCode": 10,
            "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
            "offset": 0, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default",
            "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""}
    #r = requests.post(url, headers=head, data=data, proxies=p)
    r = requests.post(url, headers=head, data=data)
    result = json.loads(r.text)
    totalcount = result['data']['poiList']['totalCount']  # 获取该分区店铺总数,计算出要翻的页数
    datas = result['data']['poiList']['poiInfos']
    print(len(datas), totalcount)
    for d in datas:
        d_list = ['', '', '', '']
        d_list[0] = d['name']
        print(d['name'])
        d_list[1] = d['cateName']
        d_list[2] = d['poiid']
        d_list[3] = d['ctPoi']
        id_list.append(d_list)
    print('Page:1')
    # 将数据保存到本地csv
    with open('F:/top250/lianjia.csv', 'a', newline='', encoding='gb18030')as f:
        write = csv.writer(f)
        for i in id_list:
            print(i)
            write.writerow(i)

    # 开始爬取第2页到最后一页
    offset = 0
    if totalcount > 15:
        totalcount -= 15
        while offset < totalcount:
            id_list = []
            offset += 15
            m = offset / 15 + 1
            print('Page:%d' % m)
            # 构造post请求参数,通过改变offset实现翻页
            data2 = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
                     "partner": 126, "riskLevel": 1, "optimusCode": 10,
                     "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
                     "offset": offset, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid,
                     "sort": "default",
                     "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "",
                     "poi_attr_20033": ""}
            try:
             #   r = requests.post(url, headers=head, data=data2, proxies=p)
                r = requests.post(url, headers=head, data=data2)
                print(r.text)
                result = json.loads(r.text)
                datas = result['data']['poiList']['poiInfos']
                print(len(datas))
                for d in datas:
                    d_list = ['', '', '', '']
                    d_list[0] = d['name']
                    d_list[1] = d['cateName']
                    d_list[2] = d['poiid']
                    d_list[3] = d['ctPoi']
                    id_list.append(d_list)
                # 保存到本地
                with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f:
                    write = csv.writer(f)
                    for i in id_list:
                        write.writerow(i)
            except Exception as e:
                print(e)


if __name__ == '__main__':
    # 直接将html代码中区域的信息复制出来,南澳新区的数据需要处理下,它下面没有分区
    a = {"areaObj": {"28": [{"id": 28, "name": "全部", "regionName": "福田区", "count": 4022},

                            {"id": 7994, "name": "岗厦", "regionName": "岗厦", "count": 110},
                            {"id": 7996, "name": "福田保税区", "regionName": "福田保税区", "count": 29}],
                     "29": [{"id": 29, "name": "全部", "regionName": "罗湖区", "count": 2191},



                            {"id": 14095, "name": "KK mall", "regionName": "KK mall", "count": 74}],
                     "30": [{"id": 30, "name": "全部", "regionName": "南山区", "count": 3905},

                            {"id": 25152, "name": "南山京基百纳", "regionName": "南山京基百纳", "count": 22},
                            {"id": 36635, "name": "深圳湾", "regionName": "深圳湾", "count": 17}],
                     "31": [{"id": 31, "name": "全部", "regionName": "盐田区", "count": 407},
                            {"id": 754, "name": "大小梅沙", "regionName": "大小梅沙", "count": 36},

                            {"id": 38055, "name": "溪涌", "regionName": "溪涌", "count": ""}],
                     "32": [{"id": 32, "name": "全部", "regionName": "宝安区", "count": 6071},


                            {"id": 37084, "name": "光明新区", "regionName": "光明新区", "count": 1}],
                     "33": [{"id": 33, "name": "全部", "regionName": "龙岗区", "count": 5193},

                            {"id": 36636, "name": "坪山高铁站", "regionName": "坪山高铁站", "count": 41},
                            {"id": 37501, "name": "龙岗中心城", "regionName": "龙岗中心城", "count": 365}],
                     "9553": [{"id": 9553, "name": "全部", "regionName": "龙华区", "count": 3080},

                              {"id": 37723, "name": "龙华新区", "regionName": "龙华新区", "count": 14}],
                     "23420": [{"id": 23420, "name": "全部", "regionName": "坪山区", "count": 393},

                               {"id": 9535, "name": "南澳大鹏新区", "regionName": "南澳大鹏新区", "count": 91}]

                     }}

    datas = a['areaObj']
    b = datas.values()
    area_list = []
    for data in b:
        for d in data[1:]:
            area_list.append(d)  # 将每个区域信息保存到列表,元素是字典
    l = 0
    old = time.time()
    for i in area_list:
        l += 1
        print('开始抓取第%d个区域:' % l, i['regionName'], '店铺总数:', i['count'])
        try:
            crow_id(i['id'])
            now = time.time() - old
            print(i['name'], '抓取完成!', '时间:%d' % now)
        except Exception as e:
            print(e)

 

#怀疑故障点在cookies,还有其他方面

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值