矩形选框爬取poi点

高德POI   鉴于关键字搜索的信息最多显示900条,爬取不全的弊端,重新利用多边形搜索的矩形选框搜索,爬取相关数据

import requests
import time
import csv
import json
import pandas as pd

#todo 最全的poi点爬取
def pachong(path='C:/Users/mumu/PycharmProjects/zhongxing/API_gaode/data_poi/data_test',
            save_name='商务住宅_住宅区',key='c3ce3f2a3a3133ad098bafef7a79c0d6',types='120300',city='370102',
            baselng=116.970025,baselat=36.726465,widthlng=0.05,widthlat=0.04):

    f = open('%s/%s_%s_%s.csv'%(path,city,types,save_name),'w+',encoding='utf-8-sig',newline='')
    writer = csv.writer(f)
    writer.writerow(['id','parent','pname','cityname','name','alias','adcode','adname','business_area','typ1',
                  'typ2','typ3','lon','lat','address','entr_location','exit_location','rating','cost','tag'])
    for num in range(0, 4):
        # 设定网格单元的左上与右下坐标的纬度值
        # 在这里对数据进行处理,使之保留6位小数(不保留可能会莫名其妙出错)
        startlat = round(baselat - num * widthlat, 6)
        endlat = round(startlat - widthlat, 6)
        for j in range(0, 5):
            # 设定网格单元的左下与右上坐标的经度值
            startlng = round(baselng + j * widthlng, 6)
            endlng = round(startlng + widthlng, 6)
            # 设置API的URL并进行输出测试
            locStr = str(startlng) + "," + str(startlat) + "|" + str(endlng) + "," + str(endlat)

            urls = ['https://restapi.amap.com/v3/place/polygon?types={}&polygon={}' \
                    '&offset=20&page={}&key={}&extensions=all'.format
                    (str(types),locStr,str(i),str(key)) for i in range(1,100)]
            # urls = urls + urlss
            # urls[0]
            #&output=xml
            #types指定POI类型
            i=0
            for url in urls:
                i = i + 1
                response = requests.get(url)
                # response = requests.get(url,parameters)
                if response.status_code == 200:
                    # content2 = response.content
                    # text2 = response.text
                    json2 = json.loads(response.text,strict=False)
                count = json2['count']
                # print(count,json_data)
                if int(count) == 0:
                    print('翻页完毕')
                    break
                else:
                    print(i, str(url))
                    pois = json2['pois']
                    for poi in pois:
                        # print(poi)
                        id=poi['id']
                        parent=poi['parent']
                        pname=poi['pname']
                        # citycode=poi['citycode']
                        cityname=poi['cityname']
                        adcode=poi['adcode']
                        adname=poi['adname']
                        name = poi['name']
                        alias = poi['alias']
                        business_area = poi['business_area']
                        typ = poi['type']
                        typ1 = typ.split(';')[0]
                        typ2 = typ.split(';')[1]
                        typ3 = typ.split(';')[2]
                        location = poi['location']
                        lon = location.split(',')[0]
                        lat = location.split(',')[1]
                        entr_location = poi['entr_location']
                        exit_location = poi['exit_location']
                        address = poi['address']
                        tag = poi['tag']
                        rating = poi['biz_ext']['rating']
                        cost = poi['biz_ext']['cost']
                        writer.writerow([id,parent,pname,cityname,name,alias,adcode,adname,business_area,typ1,
                            typ2,typ3,lon,lat,address,entr_location,exit_location,rating,cost,tag])
                        # print(pname,cityname,name,alias,adcode,adname,business_area,typ1,
                        #       typ2,typ3,lon,lat,address,entr_location)
                        # writer.writerow([pname,cityname,name,alias,adcode,adname,business_area,typ1,
                        #       typ2,typ3,lon,lat,address,entr_location,exit_location])
                    time.sleep(2)

    print(save_name)

pachong(save_name='商务住宅_住宅区', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='120300', city='370102')
pachong(save_name='医疗保健服务_综合医院', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='090100', city='370102')

pachong(save_name='交通设施服务_公交车站相关', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='150700', city='370102')

pachong(save_name='科教文化服务_学校', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='141200', city='370102')
pachong(save_name='购物服务_商场', key='c3ce3f2a3a3133ad098bafef7a79c0d6', types='060100', city='370102')

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值