完整代码在文末
如我们在百度地图上常常看到小区外围有蓝色虚线围起来
现在采集这个小区的百度墨卡托坐标,然后通过百度墨卡托坐标->百度经纬度坐标->wgs84经纬度转换
1、首先通过抓包找到这个链接下看看返回值有profile_geo,后面的内容就是相关坐标值了
2、需要注意的是profile_geo后面的内容从|1-后面的一组一组数才是我们要的数据哈
3、代码解疑:因为怕多个城市都有某个小区,然后这里用城市+区县+小区名称控制小区,作为参数传入。
4、定位好相关包后处理aoi坐标值后开始解析并用列表存储
5、爬取后获取到的相关内容数据格式,后面的经纬度坐标值用json存储,方便入库,有需要的可以转成自己需要的格式
6、我拿100多个小区存储为excel格式数据用来测试,你们可以替换自己需要的清单,
相关完整代码如下:
import json
import requests
import math
import openpyxl
cookies = {
'BAIDUID': 'CE8934CBA4E80C11234CCCE8ED2E7F9A:FG=1',
'BAIDUID_BFESS': 'CE8934CBA4E80C11234CCCE8ED2E7F9A:FG=1',
'BIDUPSID': 'CE8934CBA4E80C11234CCCE8ED2E7F9A',
'PSTM': '1718876615',
'b2b_first': '1732266489',
'BAIDU_WISE_UID': 'wapp_1732611408396_808',
'__bid_n': '1938637333d92961113152',
'ZFY': 'AsSs8GKUSJOh6:BPvrcWk5gdFW6ZO5UUONIxCiobYv8A:C',
'H_PS_PSSID': '60277_61027_61217_60853_61367_61388_61420',
'H_WISE_SIDS': '60277_61027_61217_60853_61367_61388_61420',
'H_WISE_SIDS_BFESS': '60277_61027_61217_60853_61367_61388_61420',
'showLoginPopup': '1',
'BDUSS': 'Ut1MHEweGpPRmlCQkhsQnptc0k3U3RNTn42V3l4cVYtQVlTMEVhM3Q5NlJVNUJuRVFBQUFBJCQAAAAAAAAAAAEAAACJ-wT0xOrH4cjL0qrIz9XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJHGaGeRxmhnb',
'BDUSS_BFESS': 'Ut1MHEweGpPRmlCQkhsQnptc0k3U3RNTn42V3l4cVYtQVlTMEVhM3Q5NlJVNUJuRVFBQUFBJCQAAAAAAAAAAAEAAACJ-wT0xOrH4cjL0qrIz9XmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJHGaGeRxmhnb',
'M_LG_UID': '4093967241',
'M_LG_SALT': '1bd089ce930c0b51897b43dae73b8396',
'MCITY': '-261%3A',
'validate': '13001',
'ab_sr': '1.0.1_NDc5OTk4MDI1Zjc2NWVlMDNhM2NmMzA4NWEwMTUzYWUzOWFmN2E4ZjA1MjhkMTgzMzllYWM2ZDI5MTA1ZTQ4OThhNTQ4ZmUxODRhYzg5NjEzYjFiY2EyODIyMTkxMTgxZjIyNGY4MzNlZDA1Yzk4NjJiZTVmM2I1Yzk4NGVhNGU3OWM1NDg1NWFkZWU0YThjMzhlNzRhNGI5ZjkxNzIwZWZmZTFlM2ZiMjcyODQxOGUxNDAyMmEzNmRiZTE5YTE4',
}
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Acs-Token': '1735045611650_1735097456134_z8pTr5fZ7ZAB2ny/NMjmdxUIzfaUAwZQoREyNxNu0ANloscI/ExQvqJYGpN10jQuqEMiZVxQgzDopKG61YlROs4P8RrU0kqsI1V/2rvcEZPAibsrErF6nDjtZ7fHkgjtd76CROX3YcHde5BHc89c+YQBF32QC3djapCWrwlH7ghl4N6BHQS1Njlo6ddHFmdM7oIczLYXpZdEJre5q4W/7tTMHf/SElf7cOwG4daIhVD5jMaPb9lWPvQJ+Pe6Lo/Zb4qI1oknj3rxdg184obtRLAQSpfy3PZufGflDs4o/Qryv6McG62MVaNm48zlagYevXv+BfefiyvKiaw1jup65LOJ2brErybneEuJaacBDXoudzi3WDAgB/GU35NC2pyTu7mklLSTt+EETKNwurQG7UIK7BRVQjZrSo+sOZwIr2JGdsi2+lVf49DeB1SpLwVM/LuDUMTtiFSogjuU5r9nITJ9ZUD/PWgmnkePYh8entw=',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Referer': 'https://map.baidu.com/poi/%E4%B8%96%E5%A4%96%E6%A1%83%E6%BA%90%E5%B9%BF%E5%9C%BA/@11586647.43,3563616.3,19z?uid=3a09f8e6f4c1ecc749fd3fb7&ugc_type=3&ugc_ver=1&device_ratio=2&compat=1&pcevaname=pc4.1&querytype=detailConInfo&da_src=shareurl',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
def res(name):
# 构造参数字典
params = {
'newmap': '1',
'reqflag': 'pcmap',
'biz': '1',
'from': 'webmap',
'da_par': 'direct',
'qt': 's',
'wd': name,
'src': '0',
'pn': '0',
'sug': '0',
'l': '19',
'b': '(11586263.43,3563645.3;11587031.43,3563811.3)',
'auth': 'g3LKOLaXZdWAwWcew6D4Ny8B5ORYybzwuxNBHVTNEzTt15XCCXNA5Ax3wACV%3Dz8yvkGcuVtvvhguVtvyheuztx1NHwWvvCQMuVtvIPcuxtw8wkvyuw904v7uv%40vcuVtvc3CuVtvcPPuxEtHrQH2Y%3DMeGEnrR1VDCoPHB9AcvY1SGpuxNt3yw8J400IvrMhuTzzztHee%40ewzvf0wd0vyOCI7UOFAI',
'seckey': 'Q%2FIiEn%2BkZnmJoUuEZ47tk7uDoC7cYMXoWwzVzd6AEfg%3D',
}
# 发送请求
response = requests.get(
'https://map.baidu.com/',
params=params,
cookies=cookies,
headers=headers,
)
return response
def aoi_geom(content):
"""提取经纬度"""
data = content['content'][0]['profile_geo'].split('|1-', 1)[-1]
# 移除末尾的分号
data = data.strip(';')
# 拆分字符串为单个坐标
coordinates = data.split(',')
# 按照两个一组进行分组
result = [f"{coordinates[i]},{coordinates[i + 1]}" for i in range(0, len(coordinates), 2)]
return result
MCBAND = (12890594.86, 8362377.87, 5591021, 3481989.83, 1678043.12, 0)
MC2LL = [
[1.410526172116255e-8, 0.00000898305509648872, -1.9939833816331, 200.9824383106796, -187.2403703815547,
91.6087516669843, -23.38765649603339, 2.57121317296198, -0.03801003308653, 17337981.2],
[-7.435856389565537e-9, 0.000008983055097726239, -0.78625201886289, 96.32687599759846, -1.85204757529826,
-59.36935905485877, 47.40033549296737, -16.50741931063887, 2.28786674699375, 10260144.86],
[-3.030883460898826e-8, 0.00000898305509983578, 0.30071316287616, 59.74293618442277, 7.357984074871,
-25.38371002664745, 13.45380521110908, -3.29883767235584, 0.32710905363475, 6856817.37],
[-1.981981304930552e-8, 0.000008983055099779535, 0.03278182852591, 40.31678527705744, 0.65659298677277,
-4.44255534477492, 0.85341911805263, 0.12923347998204, -0.04625736007561, 4482777.06],
[3.09191371068437e-9, 0.000008983055096812155, 0.00006995724062, 23.10934304144901, -0.00023663490511,
-0.6321817810242, -0.00663494467273, 0.03430082397953, -0.00466043876332, 2555164.4],
[2.890871144776878e-9, 0.000008983055095805407, -3.068298e-8, 7.47137025468032, -0.00000353937994,
-0.02145144861037, -0.00001234426596, 0.00010322952773, -0.00000323890364, 826088.5]
]
x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626 # π
a = 6378245.0 # 长半轴
ee = 0.00669342162296594323 # 扁率
def convert_MCT_2_BD09(lon, lat):
"""百度墨卡托转百度经纬度"""
ax = None
for j in range(len(MCBAND)):
if lat >= MCBAND[j]:
ax = MC2LL[j]
break
if ax is None:
raise ValueError("Invalid latitude")
e = ax[0] + ax[1] * abs(lon)
i = abs(lat) / ax[9]
aw = ax[2] + ax[3] * i + ax[4] * i * i + ax[5] * i * i * i + ax[6] * i * i * i * i + ax[7] * i * i * i * i * i + ax[
8] * i * i * i * i * i * i
if lon < 0:
e *= -1
if lat < 0:
aw *= -1
return e, aw
def out_of_china(lng, lat):
"""
判断是否在国内,不在国内不做偏移
:param lng:
:param lat:
:return:
"""
if lng < 72.004 or lng > 137.8347:
return True
if lat < 0.8293 or lat > 55.8271:
return True
return False
def transformlat(lng, lat):
ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * math.sin(2.0 * lng * pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lat * pi) + 40.0 *
math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
math.sin(lat * pi / 30.0)) * 2.0 / 3.0
return ret
def transformlng(lng, lat):
ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * math.sin(2.0 * lng * pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lng * pi) + 40.0 * math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
return ret
def bd09togcj02(bd_lon, bd_lat):
"""
百度坐标系(BD09)转火星坐标系(GCJ02)
:param bd_lat:百度坐标纬度
:param bd_lon:百度坐标经度
:return:转换后的坐标列表形式
"""
x = bd_lon - 0.0065
y = bd_lat - 0.006
z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
gg_lng = z * math.cos(theta)
gg_lat = z * math.sin(theta)
return [gg_lng, gg_lat]
def gcj02towgs84(lng, lat):
"""
GCJ02(火星坐标系)转GPS84
:param lng:火星坐标系的经度
:param lat:火星坐标系纬度
:return:
"""
if out_of_china(lng, lat):
return lng, lat
dlat = transformlat(lng - 105.0, lat - 35.0)
dlng = transformlng(lng - 105.0, lat - 35.0)
radlat = lat / 180.0 * pi
magic = math.sin(radlat)
magic = 1 - ee * magic * magic
sqrtmagic = math.sqrt(magic)
dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
mglat = lat + dlat
mglng = lng + dlng
return [lng * 2 - mglng, lat * 2 - mglat]
def detalis(content):
polygon_json = ''
try:
address = content['content'][0]['addr']
except Exception as e:
address = '暂无地址信息'
try:
name = content['content'][0]['name']
uid = content['content'][0]['uid']
except Exception as e:
name = '暂无名称信息'
uid = '暂无唯一标识uid'
try:
phone = content['content'][0]['tel']
except Exception as e:
phone = '暂无电话信息'
try:
std_tag = content['content'][0]['std_tag']
except Exception as e:
std_tag = '暂无所属类型信息'
try:
result = aoi_geom(content)
for item in result:
lon, lat = item.split(',')
# print('米制坐标', lon, lat)
bd09ll_lon, bd09ll_lat = convert_MCT_2_BD09(float(lon), float(lat))
# print('百度经纬度', bd09ll_lon, bd09ll_lat)
gcpoint = bd09togcj02(109.43844654415825, 24.296011479290673)
www.append(gcj02towgs84(gcpoint[0], gcpoint[1]))
polygon = {
"type": "Polygon",
"coordinates": [www]
}
# 将对象转换为 JSON 字符串
polygon_json = json.dumps(polygon, ensure_ascii=False, indent=4)
except Exception as e:
result = '暂无aoi坐标信息'
polygon_json = None
return phone, std_tag, polygon_json
if __name__ == "__main__":
try:
filepath = "获取百度aoi的测试数据.xlsx" # 请替换为你的 Excel 文件路径
www = []
workbook = openpyxl.load_workbook(filepath)
sheet = workbook.active # 获取当前工作表
for row in range(2, sheet.max_row + 1):
uid = sheet.cell(row=row, column=1).value # 获取 id 值 (A列)
name = sheet.cell(row=row, column=2).value # 获取 name 值 (B列)
city = sheet.cell(row=row, column=3).value # 获取 address 值 (C列)
county = sheet.cell(row=row, column=4).value # 获取 address 值 (C列)
address = sheet.cell(row=row, column=5).value # 获取 address 值 (C列)
combined_string = city + county + name # 拼接
response = res(combined_string)
content = response.json()
phone, std_tag, polygon_json = detalis(content) # 获取数据
if polygon_json is None:
continue
else:
print(name, address, phone, std_tag)
compressed_json = json.dumps(json.loads(polygon_json), separators=(',', ':'))
sheet.cell(row=row, column=6).value = str(compressed_json) # 在 D 列写入 aoi坐标
www.clear() # 清空
# 保存修改后的 Excel 文件
workbook.save(r"C:\Users\Chambliss\Desktop\处理后的_百度aoi的测试数据.xlsx")
print("Excel 文件处理完成并保存为 '处理后的_百度aoi的测试数据.xlsx'")
except FileNotFoundError:
print(f"错误:文件 '{filepath}' 未找到。")