百度地图小区边界爬取

数据服务生

已于 2025-04-11 09:37:48 修改

阅读量473

点赞数 5

文章标签：百度 dubbo

于 2025-04-10 20:05:49 首次发布

本文链接：https://blog.youkuaiyun.com/cc605523/article/details/147126367

版权

1、准备需要爬取的小区名称，存放在txt文本中

# 从文件中读取小区名称
def read_residential_names(file_path):
    """
    从文件中读取小区名称
    :param file_path: 文件路径
    :return: 小区名称列表
    """
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return []

    with open(file_path, "r", encoding="utf-8") as file:
        names = [line.strip() for line in file.readlines() if line.strip()]
    return names

2、需要根据住宅区名称和所在地区获取其UID

def get_residential_uid(residential_name, region, bmap_key):
    """
    根据住宅区名称和所在地区获取其UID
    :param residential_name: 住宅区名称
    :param region: 地区
    :param bmap_key: 百度地图API密钥
    :return: UID或None
    """
    bmap_localsearch_url = f"http://api.map.baidu.com/place/v2/search?query={residential_name}&region={region}&output=json&city_limit=true&ak={bmap_key}"
    s = requests.Session()
    s.mount('http://', HTTPAdapter(max_retries=3))
    s.mount('https://', HTTPAdapter(max_retries=3))

    try:
        response = s.get(bmap_localsearch_url, timeout=5, headers={"Connection": "close"})
        data = response.json()
        if data['status'] == 0 and len(data['results']) > 0:
            for info in data['results']:
                if '-' not in info['name']:
                    return info['uid']
            print(f"No valid UID found for {residential_name} in {region}")
            return None
        else:
            print(f"No results found for {residential_name} in {region}")
            return None
    except Exception as e:
        print(f"Error in get_residential_uid: {e}\nURL: {bmap_localsearch_url}")
        return None

3、根据UID获取住宅区的边界信息

def get_boundary_by_uid(uid, bmap_key):
    """
    根据UID获取住宅区的边界信息
    :param uid: 百度地图目标UID
    :param bmap_key: 百度地图API密钥
    :return: 边界坐标字符串或None
    """
    bmap_boundary_url = f"http://map.baidu.com/?reqflag=pcmap&from=webmap&qt=ext&uid={uid}&ext_ver=new&l=18&ak={bmap_key}"
    s = requests.Session()
    s.mount('http://', HTTPAdapter(max_retries=3))
    s.mount('https://', HTTPAdapter(max_retries=3))

    try:
        response = s.get(bmap_boundary_url, timeout=5, headers={"Connection": "close"})
        data = response.json()
        if 'content' in data and 'geo' in data['content']:
            geo = data['content']['geo']
            coordinates = []
            for point in geo.split('|')[2].split('-')[1].split(','):
                coordinates.append(point.strip(';'))
            boundary = ';'.join([f"{coordinates[i]},{coordinates[i + 1]}" for i in range(0, len(coordinates), 2)])
            return boundary
        else:
            print(f"No boundary information found for UID: {uid}")
            return None
    except Exception as e:
        print(f"Error in get_boundary_by_uid: {e}\nURL: {bmap_boundary_url}")
        return None

4、解析百度地图返回的geo数据，提取坐标点

def parse_geo_data(geo_data):
    """
    解析百度地图返回的geo数据，提取坐标点
    :param geo_data: 百度地图返回的geo字符串
    :return: 包含(x, y)坐标对的列表
    """
    if not geo_data or '|' not in geo_data:
        return []
    try:
        # 提取详细坐标部分
        coordinates = geo_data.split('|')[2].split('-')[1].split(',')
        # 将坐标转换为(x, y)对
        return [(float(coordinates[i].strip(';')), float(coordinates[i+1].strip(';'))) 
                for i in range(0, len(coordinates)-1, 2)]
    except Exception as e:
        print(f"Error parsing geo data: {e}")
        return []

5、将Web Mercator坐标转换为WGS-84经纬度坐标

def web_mercator_to_wgs84(x, y):
    """
    将Web Mercator坐标转换为WGS-84经纬度坐标
    :param x: Web Mercator X坐标
    :param y: Web Mercator Y坐标
    :return: WGS-84经纬度坐标 (lon, lat)
    """
    transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
    return transformer.transform(x, y)

6、将数据保存到CSV文件中

def save_to_csv(data, filename="output.csv"):
    """
    将数据保存到CSV文件中
    :param data: 包含坐标的字典
    :param filename: 输出文件名
    """
    # 获取文件的目录部分
    directory = os.path.dirname(filename)

    # 如果目录不为空，则创建目录
    if directory:
        os.makedirs(directory, exist_ok=True)

    # 写入CSV文件
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Residential Name", "Longitude", "Latitude"])  # 写入表头
        for name, coords in data.items():
            for coord in coords.split(';'):
                lon, lat = coord.split(',')
                writer.writerow([name, lon, lat])  # 写入每一行数据

    print(f"Data saved to {filename}")

7、主函数，bmap_key输入百度地图API密钥，region 输入默认查询地区， input_file 输入小区名称存储文件。

if __name__ == "__main__":
    bmap_key = "***"  # 替换为你的百度地图API密钥
    region = "北京"  # 默认查询地区
    input_file = "**.txt"  # 小区名称文件
    output_file = "transformed_coordinates.csv"  # 输出文件

完整代码下载
https://download.youkuaiyun.com/download/cc605523/90592963