# coding=utf-8 import csv import time import requests import json # 区域店铺id ct_Poi cateName抓取,传入参数为区域id def crow_id(areaid): id_list = [] url = 'https://meishi.meituan.com/i/api/channel/deal/list' head = {'Host': 'meishi.meituan.com', 'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'https://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36', 'Cookie': 'XXXXXXXXXXXXXX' } # p = {'https': 'https://127.0.0.1:8080'} data = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "", "partner": 126, "riskLevel": 1, "optimusCode": 10, "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1", "offset": 0, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default", "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""} #r = requests.post(url, headers=head, data=data, proxies=p) r = requests.post(url, headers=head, data=data) result = json.loads(r.text) totalcount = result['data']['poiList']['totalCount'] # 获取该分区店铺总数,计算出要翻的页数 datas = result['data']['poiList']['poiInfos'] print(len(datas), totalcount) for d in datas: d_list = ['', '', '', ''] d_list[0] = d['name'] print(d['name']) d_list[1] = d['cateName'] d_list[2] = d['poiid'] d_list[3] = d['ctPoi'] id_list.append(d_list) print('Page:1') # 将数据保存到本地csv with open('F:/top250/lianjia.csv', 'a', newline='', encoding='gb18030')as f: write = csv.writer(f) for i in id_list: print(i) write.writerow(i) # 开始爬取第2页到最后一页 offset = 0 if totalcount > 15: totalcount -= 15 while offset < totalcount: id_list = [] offset += 15 m = offset / 15 + 1 print('Page:%d' % m) # 构造post请求参数,通过改变offset实现翻页 data2 = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "", "partner": 126, "riskLevel": 1, "optimusCode": 10, "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1", "offset": offset, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default", "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""} try: # r = requests.post(url, headers=head, data=data2, proxies=p) r = requests.post(url, headers=head, data=data2) print(r.text) result = json.loads(r.text) datas = result['data']['poiList']['poiInfos'] print(len(datas)) for d in datas: d_list = ['', '', '', ''] d_list[0] = d['name'] d_list[1] = d['cateName'] d_list[2] = d['poiid'] d_list[3] = d['ctPoi'] id_list.append(d_list) # 保存到本地 with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f: write = csv.writer(f) for i in id_list: write.writerow(i) except Exception as e: print(e) if __name__ == '__main__': # 直接将html代码中区域的信息复制出来,南澳新区的数据需要处理下,它下面没有分区 a = {"areaObj": {"28": [{"id": 28, "name": "全部", "regionName": "福田区", "count": 4022}, {"id": 7994, "name": "岗厦", "regionName": "岗厦", "count": 110}, {"id": 7996, "name": "福田保税区", "regionName": "福田保税区", "count": 29}], "29": [{"id": 29, "name": "全部", "regionName": "罗湖区", "count": 2191}, {"id": 14095, "name": "KK mall", "regionName": "KK mall", "count": 74}], "30": [{"id": 30, "name": "全部", "regionName": "南山区", "count": 3905}, {"id": 25152, "name": "南山京基百纳", "regionName": "南山京基百纳", "count": 22}, {"id": 36635, "name": "深圳湾", "regionName": "深圳湾", "count": 17}], "31": [{"id": 31, "name": "全部", "regionName": "盐田区", "count": 407}, {"id": 754, "name": "大小梅沙", "regionName": "大小梅沙", "count": 36}, {"id": 38055, "name": "溪涌", "regionName": "溪涌", "count": ""}], "32": [{"id": 32, "name": "全部", "regionName": "宝安区", "count": 6071}, {"id": 37084, "name": "光明新区", "regionName": "光明新区", "count": 1}], "33": [{"id": 33, "name": "全部", "regionName": "龙岗区", "count": 5193}, {"id": 36636, "name": "坪山高铁站", "regionName": "坪山高铁站", "count": 41}, {"id": 37501, "name": "龙岗中心城", "regionName": "龙岗中心城", "count": 365}], "9553": [{"id": 9553, "name": "全部", "regionName": "龙华区", "count": 3080}, {"id": 37723, "name": "龙华新区", "regionName": "龙华新区", "count": 14}], "23420": [{"id": 23420, "name": "全部", "regionName": "坪山区", "count": 393}, {"id": 9535, "name": "南澳大鹏新区", "regionName": "南澳大鹏新区", "count": 91}] }} datas = a['areaObj'] b = datas.values() area_list = [] for data in b: for d in data[1:]: area_list.append(d) # 将每个区域信息保存到列表,元素是字典 l = 0 old = time.time() for i in area_list: l += 1 print('开始抓取第%d个区域:' % l, i['regionName'], '店铺总数:', i['count']) try: crow_id(i['id']) now = time.time() - old print(i['name'], '抓取完成!', '时间:%d' % now) except Exception as e: print(e)
#怀疑故障点在cookies,还有其他方面