# coding=utf-8
import urllib
import json
import sys
import random
reload(sys)
sys.setdefaultencoding("utf-8")
"""
02作业要求:
寻找武汉市中学(或小学)周围500米(或其他)内的网吧,要求:提交代码py文件及运行结果文件(txt),txt文件格式如下:
1,XXX小学
1-1,XXX网吧
1-2,XXX网吧
2,XXX小学
2-1,XXX网吧
@:param
url:获取查询结果的网页地址
json_file: 解析了网页结果的json文件
bounds:查询区域的经纬度范围,该类中默认为武汉市的外接矩形经纬度
@:author
2019.3.23
"""
class SearchPOI:
"""
初始化参数,根据初始化的参数构造url,用于生产第一个区域块
"""
def __init__(self):
self.ipaddress = 'http://api.map.baidu.com/place/v2/search?query='
self.address = '中学'
# 武汉市经纬度范围,即查询区域
bounds = [29.966667, 113.683333, 31.366667, 115.083333]
self.radius = '&radius=500'
self.filter = '网吧'
self.total_bounds = []
# 用于存储所有需要遍历的区域块,当区域块中兴趣点数量超过400后,将划分的新快加入到该列表中
self.total_bounds.append(bounds)
# 输出格式设置为json
self.output = "output=" + "json"
# 开发者秘钥池,防止一个账号频繁访问获取不到数据
self.ak_pool = [换成自己的ak]
self.page_para = "page_size=20&page_num="
# 返回WGS84下的数据
self.coord = "&coord_type=1"
# 将经纬度范围连接成字符串,构造url
self.str_bounds = ','.join([str(_) for _ in bounds])
# 最开始的url
self.url = self.ipaddress + self.address + '&' + "bounds=" + self.str_bounds + '&' + self.output + '&' \
+ random.choice(self.ak_pool) + self.page_para
"""用于获取指定url下的结果文件,返回json格式的数据文件"""
@staticmethod
def get_json_file(url):
# time.sleep(1)
url_file = urllib.urlopen(url)
url_result = url_file.read()
json_file = json.loads(url_result)
try:
total = int(json_file["total"])
except KeyError:
# 如果获取不到文件,证明此时ak被限制时间,将json_file赋为空,便于更换ak
json_file = None
return json_file
"""传入json文件并读取其指定属性"""
@staticmethod
def read_json_file(json_file):
# 读取json文件的属性值存入列表中
itpretresult = []
for text in json_file["results"]:
itpretresult.append(text["name"])
itpretresult.append(text["location"])
return itpretresult
"""判断每个块的兴趣点总数"""
@staticmethod
def total_judge(json_file):
total = int(json_file["total"])
return total
"""根据区域的经纬度范围来构造学校查询的url"""
def create_url(self, temp_region, pagenumber):
# 把区域列表的值转为字符串连接起来,构造url
sstr_bounds = ','.join([str(_) for _ in temp_region])
uurl = self.ipaddress + self.address + '&' + "bounds=" + sstr_bounds + '&' + self.output + '&' + \
random.choice(self.ak_pool) \
+ self.page_para + str(pagenumber) + self.coord
return uurl
"""根据区域的经纬度范围来构造url"""
def create_buffer_url(self, lat, lon, pagenumber):
uurl = self.ipaddress + '网吧' + '&' + "location=" + str(lat) + ',' + str(
lon) + self.radius + '&' + self.output + '&' + \
random.choice(self.ak_pool) \
+ self.page_para + str(pagenumber) + self.coord
return uurl
"""将兴趣点总数超过400个的区域划分为4小块"""
def split_region(self, latmin, lonmin, latmax, lonmax):
# 中心经纬度
latmid = (latmax + latmin) / 2
lonmid = (lonmin + lonmax) / 2
# 划分的四块区域
leftupper_region = [latmid, lonmin, latmax, lonmid]
leftdown_region = [latmin, lonmin, latmid, lonmid]
rightupper_region = [latmid, lonmid, latmax, lonmax]
rightdown_region = [latmin, lonmid, latmid, lonmax]
# 将四块区域加入到待遍历列表中
self.total_bounds.append(leftupper_region)
self.total_bounds.append(leftdown_region)
self.total_bounds.append(rightupper_region)
self.total_bounds.append(rightdown_region)
"""获取武汉市内所有的学校"""
def get_school(self, totalbounds):
school_result = [] # 存储该区域的学校结果
for bounds in totalbounds:
# 如果区域的列表长度是5,证明该区域已经划分为小区域,故跳过
if len(bounds) == 5:
continue
# 否则就构造该区域的url
sch_temp_url = self.create_url(bounds, 0)
sch_temp_json_file = self.get_json_file(sch_temp_url)
# 该循环用于当前ak被限制时重新构造url并读取文件,只有读取到文件才跳出循环
while sch_temp_json_file is None:
sch_temp_url = self.create_url(bounds, 0)
sch_temp_json_file = self.get_json_file(sch_temp_url)
sch_tem_total = self.total_judge(sch_temp_json_file)
if sch_tem_total >= 400:
# 如果总数大于400 切分为四块
self.split_region(bounds[0], bounds[1], bounds[2], bounds[3])
bounds.append(1)
else:
# 小于400,直接读取json文件
# 获取结果总页数
if sch_tem_total % 20 == 0:
sch_page_num = sch_tem_total / 20
else:
sch_page_num = sch_tem_total / 20 + 1
# 遍历每一页的结果,,存储所有的学校结果
for page in range(0, sch_page_num):
sch_url = self.create_url(bounds, page)
sch_json_file = self.get_json_file(sch_url)
while sch_json_file is None:
sch_url = self.create_url(bounds, page)
sch_json_file = self.get_json_file(sch_url)
sch_iresult = self.read_json_file(sch_json_file)
# 将读取到的结果存入学校结果列表中
for i in range(0, len(sch_iresult)):
school_result.append(sch_iresult[i])
return school_result
"""获取各个学校周围的网吧"""
def get_wangba(self, school_result):
wangba_result = [] # 存储学校周围的网吧遍历结果
for sc_number in range(1, len(school_result), 2):
# 获取学校位置经纬度,以此为中心进行查询
lat = str(school_result[sc_number]['lat'])
lon = str(school_result[sc_number]['lng'])
wangba_url = self.create_buffer_url(lat, lon, 0)
wangba_jfile = self.get_json_file(wangba_url)
while wangba_jfile is None:
wangba_url = self.create_buffer_url(lat, lon, 0)
wangba_jfile = self.get_json_file(wangba_url)
wb_tem_total = self.total_judge(wangba_jfile)
print '正在遍历第' + str((sc_number + 1)/2) + '所学校'
# 如果没结果,则遍历下一所学校
if wb_tem_total == 0:
continue
elif wb_tem_total <= 20: # 如果总数小于20,页数设置为1
wb_page_nums = 1
elif wb_tem_total % 20 == 0: # 如果总数是20的整数倍,页数即为运算结果
wb_page_nums = wb_tem_total / 20
else:
wb_page_nums = wb_tem_total / 20 + 1 # 不是整数倍,则取商再加一
# 页数为一则直接获取该页的网吧结果并存入列表中
if wb_page_nums == 1:
wb_tem_result = self.read_json_file(wangba_jfile)
per_sch_wb = [school_result[sc_number - 1]]
for i in range(0, len(wb_tem_result)):
per_sch_wb.append(wb_tem_result[i]) # 存入列表
wangba_result.append(per_sch_wb)
continue
# 页数大于1则遍历每一页的结果,,存储所有的网吧结果到列表中
else:
for wbpage in range(0, wb_page_nums):
# 页数大于1则每一页都要重新构造url,故重新获取经纬度
lat = str(school_result[i]['lat'])
lon = str(school_result[i]['lng'])
wb_url = searchPoi.create_buffer_url(lat, lon, wbpage)
wb_json_file = searchPoi.get_json_file(wb_url)
# 防止ak限制取到空值
while wb_json_file is None:
wb_url = searchPoi.create_buffer_url(lat, lon, wbpage)
wb_json_file = searchPoi.get_json_file(wb_url)
wb_iresult = searchPoi.read_json_file(wb_json_file)
# 构造该列表用于存储学校和其周围网吧的信息
per_sch_wb = [school_result[sc_number - 1]]
for i in range(0, len(wb_iresult)):
per_sch_wb.append(wb_iresult[i])
# 将学校和周围网吧的信息存入总的网吧结果表中
wangba_result.append(per_sch_wb)
return wangba_result
"""将查询到的网吧结果写入到txt文件中"""
@staticmethod
def write2txt(wangba_result):
with open(u'02.txt', 'w+') as f:
result_str = '' # 以字符串的形式将网吧结果存储起来
# 设计输出的txt格式
for i in range(0, len(wangba_result)):
tmp_str = '\n' + str(i + 1) + ', ' + wangba_result[i][0] + '\n' # 打印学校名称
sstr = ""
k = 0
for j in range(1, len(wangba_result[i]), 2): # 打印网吧名称
s = wangba_result[i][j].encode("utf-8")
sstr += (str(i + 1) + '-' + str(k + 1) + ', ' + s + '\n')
k = k + 1
result_str += tmp_str + sstr # 将学校名称和网吧名称连接起来
f.write(result_str) # 将所有结果写入txt文件中
# 关闭文件,释放资源
f.close()
# 程序入口
if __name__ == "__main__":
# 新建查询网吧的类对象
searchPoi = SearchPOI()
# 获取该区域的学校信息
school_result = searchPoi.get_school(searchPoi.total_bounds)
print "该区域学校总数为" + str((len(school_result)+1)/2)
# 获取该区域所有学校周围的网吧信息
wangba_result = searchPoi.get_wangba(school_result)
# 将结果写入txt文件中
searchPoi.write2txt(wangba_result)
print "txt文件已写入到当前目录下!"