# coding=utf-8
import csv
import time
import requests
import json
# 区域店铺id ct_Poi cateName抓取,传入参数为区域id
def crow_id(areaid):
id_list = []
url = 'https://meishi.meituan.com/i/api/channel/deal/list'
head = {'Host': 'meishi.meituan.com',
'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Referer': 'https://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36',
'Cookie': 'XXXXXXXXXXXXXX'
}
# p = {'https': 'https://127.0.0.1:8080'}
data = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
"partner": 126, "riskLevel": 1, "optimusCode": 10,
"originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
"offset": 0, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default",
"deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""}
#r = requests.post(url, headers=head, data=data, proxies=p)
r = requests.post(url, headers=head, data=data)
result = json.loads(r.text)
totalcount = result['data']['poiList']['totalCount'] # 获取该分区店铺总数,计算出要翻的页数
datas = result['data']['poiList']['poiInfos']
print(len(datas), totalcount)
for d in datas:
d_list = ['', '', '', '']
d_list[0] = d['name']
print(d['name'])
d_list[1] = d['cateName']
d_list[2] = d['poiid']
d_list[3] = d['ctPoi']
id_list.append(d_list)
print('Page:1')
# 将数据保存到本地csv
with open('F:/top250/lianjia.csv', 'a', newline='', encoding='gb18030')as f:
write = csv.writer(f)
for i in id_list:
print(i)
write.writerow(i)
# 开始爬取第2页到最后一页
offset = 0
if totalcount > 15:
totalcount -= 15
while offset < totalcount:
id_list = []
offset += 15
m = offset / 15 + 1
print('Page:%d' % m)
# 构造post请求参数,通过改变offset实现翻页
data2 = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
"partner": 126, "riskLevel": 1, "optimusCode": 10,
"originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
"offset": offset, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid,
"sort": "default",
"deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "",
"poi_attr_20033": ""}
try:
# r = requests.post(url, headers=head, data=data2, proxies=p)
r = requests.post(url, headers=head, data=data2)
print(r.text)
result = json.loads(r.text)
datas = result['data']['poiList']['poiInfos']
print(len(datas))
for d in datas:
d_list = ['', '', '', '']
d_list[0] = d['name']
d_list[1] = d['cateName']
d_list[2] = d['poiid']
d_list[3] = d['ctPoi']
id_list.append(d_list)
# 保存到本地
with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f:
write = csv.writer(f)
for i in id_list:
write.writerow(i)
except Exception as e:
print(e)
if __name__ == '__main__':
# 直接将html代码中区域的信息复制出来,南澳新区的数据需要处理下,它下面没有分区
a = {"areaObj": {"28": [{"id": 28, "name": "全部", "regionName": "福田区", "count": 4022},
{"id": 7994, "name": "岗厦", "regionName": "岗厦", "count": 110},
{"id": 7996, "name": "福田保税区", "regionName": "福田保税区", "count": 29}],
"29": [{"id": 29, "name": "全部", "regionName": "罗湖区", "count": 2191},
{"id": 14095, "name": "KK mall", "regionName": "KK mall", "count": 74}],
"30": [{"id": 30, "name": "全部", "regionName": "南山区", "count": 3905},
{"id": 25152, "name": "南山京基百纳", "regionName": "南山京基百纳", "count": 22},
{"id": 36635, "name": "深圳湾", "regionName": "深圳湾", "count": 17}],
"31": [{"id": 31, "name": "全部", "regionName": "盐田区", "count": 407},
{"id": 754, "name": "大小梅沙", "regionName": "大小梅沙", "count": 36},
{"id": 38055, "name": "溪涌", "regionName": "溪涌", "count": ""}],
"32": [{"id": 32, "name": "全部", "regionName": "宝安区", "count": 6071},
{"id": 37084, "name": "光明新区", "regionName": "光明新区", "count": 1}],
"33": [{"id": 33, "name": "全部", "regionName": "龙岗区", "count": 5193},
{"id": 36636, "name": "坪山高铁站", "regionName": "坪山高铁站", "count": 41},
{"id": 37501, "name": "龙岗中心城", "regionName": "龙岗中心城", "count": 365}],
"9553": [{"id": 9553, "name": "全部", "regionName": "龙华区", "count": 3080},
{"id": 37723, "name": "龙华新区", "regionName": "龙华新区", "count": 14}],
"23420": [{"id": 23420, "name": "全部", "regionName": "坪山区", "count": 393},
{"id": 9535, "name": "南澳大鹏新区", "regionName": "南澳大鹏新区", "count": 91}]
}}
datas = a['areaObj']
b = datas.values()
area_list = []
for data in b:
for d in data[1:]:
area_list.append(d) # 将每个区域信息保存到列表,元素是字典
l = 0
old = time.time()
for i in area_list:
l += 1
print('开始抓取第%d个区域:' % l, i['regionName'], '店铺总数:', i['count'])
try:
crow_id(i['id'])
now = time.time() - old
print(i['name'], '抓取完成!', '时间:%d' % now)
except Exception as e:
print(e)
#怀疑故障点在cookies,还有其他方面
本文介绍了一种使用Python爬取美团网站上特定区域店铺ID的方法。通过发送POST请求并解析JSON响应,实现了自动化获取店铺名称、类别、ID及位置信息,并将其保存至CSV文件。文章详细展示了代码实现过程,包括请求头设置、参数构造、数据解析及错误处理。
2045

被折叠的 条评论
为什么被折叠?



