高德POI 鉴于关键字搜索的信息最多显示900条,爬取不全的弊端,重新利用多边形搜索的矩形选框搜索,爬取相关数据
import requests
import time
import csv
import json
import pandas as pd
#todo 最全的poi点爬取
def pachong(path='C:/Users/mumu/PycharmProjects/zhongxing/API_gaode/data_poi/data_test',
save_name='商务住宅_住宅区',key='c3ce3f2a3a3133ad098bafef7a79c0d6',types='120300',city='370102',
baselng=116.970025,baselat=36.726465,widthlng=0.05,widthlat=0.04):
f = open('%s/%s_%s_%s.csv'%(path,city,types,save_name),'w+',encoding='utf-8-sig',newline='')
writer = csv.writer(f)
writer.writerow(['id','parent','pname','cityname','name','alias','adcode','adname','business_area','typ1',
'typ2','typ3','lon','lat','address','entr_location','exit_location','rating','cost','tag'])
for num in range(0, 4):
# 设定网格单元的左上与右下坐标的纬度值
# 在这里对数据进行处理,使之保留6位小数(不保留可能会莫名其妙出错)
startlat = round(baselat - num * widthlat, 6)
endlat = round(startlat - widthlat, 6)
for j in range(0, 5):
# 设定网格单元的左下与右上坐标的经度值
startlng = round(baselng + j * widthlng, 6)
endlng = round(startlng + widthlng, 6)
# 设置API的URL并进行输出测试
locStr = str(startlng) + "," + str(startlat) + "|" + str(endlng) + "," + str(endlat)
urls = ['https://restapi.amap.com/v3/place/polygon?types={}&polygon={}' \
'&offset=20&page={}&key={}&extensions=all'.format
(str(types),locStr,str(i),str(key)) for i in range(1,100)]
# urls = urls + urlss
# urls[0]
#&output=xml
#types指定POI类型
i=0
for url in urls:
i = i + 1
response = requests.get(url)
# response = requests.get(url,parameters)
if response.status_code == 200:
# content2 = response.content
# text2 = response.text
json2 = json.loads(response.text,strict=False)
count = json2['count']
# print(count,json_data)
if int(count) == 0:
print('翻页完毕')
break
else:
print(i, str(url))
pois = json2['pois']
for poi in pois:
# print(poi)
id=poi['id']
parent=poi['parent']
pname=poi['pname']
# citycode=poi['citycode']
cityname=poi['cityname']
adcode=poi['adcode']
adname=poi['adname']
name = poi['name']
alias = poi['alias']
business_area = poi['business_area']
typ = poi['type']
typ1 = typ.split(';')[0]
typ2 = typ.split(';')[1]
typ3 = typ.split(';')[2]
location = poi['location']
lon = location.split(',')[0]
lat = location.split(',')[1]
entr_location = poi['entr_location']
exit_location = poi['exit_location']
address = poi['address']
tag = poi['tag']
rating = poi['biz_ext']['rating']
cost = poi['biz_ext']['cost']
writer.writerow([id,parent,pname,cityname,name,alias,adcode,adname,business_area,typ1,
typ2,typ3,lon,lat,address,entr_location,exit_location,rating,cost,tag])
# print(pname,cityname,name,alias,adcode,adname,business_area,typ1,
# typ2,typ3,lon,lat,address,entr_location)
# writer.writerow([pname,cityname,name,alias,adcode,adname,business_area,typ1,
# typ2,typ3,lon,lat,address,entr_location,exit_location])
time.sleep(2)
print(save_name)
pachong(save_name='商务住宅_住宅区', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='120300', city='370102')
pachong(save_name='医疗保健服务_综合医院', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='090100', city='370102')
pachong(save_name='交通设施服务_公交车站相关', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='150700', city='370102')
pachong(save_name='科教文化服务_学校', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='141200', city='370102')
pachong(save_name='购物服务_商场', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='060100', city='370102')