1、准备需要爬取的小区名称,存放在txt文本中
# 从文件中读取小区名称
def read_residential_names(file_path):
"""
从文件中读取小区名称
:param file_path: 文件路径
:return: 小区名称列表
"""
if not os.path.exists(file_path):
print(f"File not found: {file_path}")
return []
with open(file_path, "r", encoding="utf-8") as file:
names = [line.strip() for line in file.readlines() if line.strip()]
return names
2、需要根据住宅区名称和所在地区获取其UID
def get_residential_uid(residential_name, region, bmap_key):
"""
根据住宅区名称和所在地区获取其UID
:param residential_name: 住宅区名称
:param region: 地区
:param bmap_key: 百度地图API密钥
:return: UID或None
"""
bmap_localsearch_url = f"http://api.map.baidu.com/place/v2/search?query={residential_name}®ion={region}&output=json&city_limit=true&ak={bmap_key}"
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))
try:
response = s.get(bmap_localsearch_url, timeout=5, headers={"Connection": "close"})
data = response.json()
if data['status'] == 0 and len(data['results']) > 0:
for info in data['results']:
if '-' not in info['name']:
return info['uid']
print(f"No valid UID found for {residential_name} in {region}")
return None
else:
print(f"No results found for {residential_name} in {region}")
return None
except Exception as e:
print(f"Error in get_residential_uid: {e}\nURL: {bmap_localsearch_url}")
return None
3、根据UID获取住宅区的边界信息
def get_boundary_by_uid(uid, bmap_key):
"""
根据UID获取住宅区的边界信息
:param uid: 百度地图目标UID
:param bmap_key: 百度地图API密钥
:return: 边界坐标字符串或None
"""
bmap_boundary_url = f"http://map.baidu.com/?reqflag=pcmap&from=webmap&qt=ext&uid={uid}&ext_ver=new&l=18&ak={bmap_key}"
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))
try:
response = s.get(bmap_boundary_url, timeout=5, headers={"Connection": "close"})
data = response.json()
if 'content' in data and 'geo' in data['content']:
geo = data['content']['geo']
coordinates = []
for point in geo.split('|')[2].split('-')[1].split(','):
coordinates.append(point.strip(';'))
boundary = ';'.join([f"{coordinates[i]},{coordinates[i + 1]}" for i in range(0, len(coordinates), 2)])
return boundary
else:
print(f"No boundary information found for UID: {uid}")
return None
except Exception as e:
print(f"Error in get_boundary_by_uid: {e}\nURL: {bmap_boundary_url}")
return None
4、解析百度地图返回的geo数据,提取坐标点
def parse_geo_data(geo_data):
"""
解析百度地图返回的geo数据,提取坐标点
:param geo_data: 百度地图返回的geo字符串
:return: 包含(x, y)坐标对的列表
"""
if not geo_data or '|' not in geo_data:
return []
try:
# 提取详细坐标部分
coordinates = geo_data.split('|')[2].split('-')[1].split(',')
# 将坐标转换为(x, y)对
return [(float(coordinates[i].strip(';')), float(coordinates[i+1].strip(';')))
for i in range(0, len(coordinates)-1, 2)]
except Exception as e:
print(f"Error parsing geo data: {e}")
return []
5、将Web Mercator坐标转换为WGS-84经纬度坐标
def web_mercator_to_wgs84(x, y):
"""
将Web Mercator坐标转换为WGS-84经纬度坐标
:param x: Web Mercator X坐标
:param y: Web Mercator Y坐标
:return: WGS-84经纬度坐标 (lon, lat)
"""
transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
return transformer.transform(x, y)
6、将数据保存到CSV文件中
def save_to_csv(data, filename="output.csv"):
"""
将数据保存到CSV文件中
:param data: 包含坐标的字典
:param filename: 输出文件名
"""
# 获取文件的目录部分
directory = os.path.dirname(filename)
# 如果目录不为空,则创建目录
if directory:
os.makedirs(directory, exist_ok=True)
# 写入CSV文件
with open(filename, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Residential Name", "Longitude", "Latitude"]) # 写入表头
for name, coords in data.items():
for coord in coords.split(';'):
lon, lat = coord.split(',')
writer.writerow([name, lon, lat]) # 写入每一行数据
print(f"Data saved to {filename}")
7、主函数,bmap_key输入百度地图API密钥,region 输入默认查询地区, input_file 输入小区名称存储文件。
if __name__ == "__main__":
bmap_key = "***" # 替换为你的百度地图API密钥
region = "北京" # 默认查询地区
input_file = "**.txt" # 小区名称文件
output_file = "transformed_coordinates.csv" # 输出文件
完整代码下载
https://download.youkuaiyun.com/download/cc605523/90592963