矢量数据基本属性检查
矢量数据2000坐标系转wgs1984
矢量数据2000坐标系转高斯克吕格
矢量数据wgs1984转UTM
'''
属性打印
'''
# import geopandas as gpd
#
# # Shapefile 文件路径
# shapefile_path = r""
#
# # 读取 shapefile 数据
# gdf = gpd.read_file(shapefile_path)
#
# # 打印基本信息
# print("===== 基本属性 =====")
# print(f"数据总行数(要素数量): {len(gdf)}")
# print(f"字段(属性列): {list(gdf.columns)}")
# print(f"坐标参考系(CRS): {gdf.crs}")
# print(f"几何类型: {gdf.geom_type.unique()}")
#
# # 打印前 5 行数据
# print("\n===== 前 5 行数据 =====")
# print(gdf.head())
#
# # 打印空间范围
# print("\n===== 空间范围(Bounding Box) =====")
# print(gdf.total_bounds) # [minx, miny, maxx, maxy]
'''
经度范围是 80°E - 81°E,这个范围位于中国的 6°分带 内,
因此适合选择 CGCS2000 高斯-克吕格投影 中的中央经线为 81°E 的投影带
'''
# import geopandas as gpd
#
# # 读取原始数据
# shp_path = r""
# gdf = gpd.read_file(shp_path)
#
# # 假设原始数据是 CGCS2000 地理坐标系(EPSG:4490)
# gdf = gdf.set_crs("EPSG:4490", allow_override=True) # 如果原始数据没有投影定义,假设为 CGCS2000 地理坐标系
#
# # 转换为高斯-克吕格投影(中央经线 81°E,EPSG:4504)
# gdf_gauss_81 = gdf.to_crs(epsg=4504) # CGCS2000 / Gauss-Kruger CM 81E
# gdf_gauss_81.to_file(shp_path.replace(".shp", "_cgcs2000_to_gauss_81.shp"), encoding="utf-8")
#
# print("投影转换已完成,输出文件为:", shp_path.replace(".shp", "_to_gauss_81.shp"))
'''
坐标转化 2000到1984
'''
import geopandas as gpd
from pathlib import Path
# 原数据
# src = r""
#
# # 读取(当前是 EPSG:4490)
# gdf = gpd.read_file(src)
# print("原CRS:", gdf.crs) # 应为 EPSG:4490
#
# # 重投影到 WGS84 (EPSG:4326)
# gdf_wgs84 = gdf.to_crs(epsg=4326)
#
# # 生成输出路径:同目录加 _wgs84 后缀
# p = Path(src)
# dst = str(p.with_name(p.stem + "_cgcs2000towgs84.shp"))
#
# # 保存为 Shapefile(保留字段)
# gdf_wgs84.to_file(dst, driver="ESRI Shapefile", encoding="utf-8")
# print("已保存:", dst)
# print("新CRS:", gdf_wgs84.crs)
'''
wgs1984 加上UTM投影
'''
# import geopandas as gpd
#
# # 读取原始数据(WGS84 坐标系)
# shp_path = r""
# gdf = gpd.read_file(shp_path)
#
# # 假设原始数据已经是 WGS84(EPSG:4326)
# gdf = gdf.set_crs("EPSG:4326", allow_override=True)
#
# # 转换为 UTM 坐标系(UTM Zone 45N,EPSG:32645)
# gdf_utm = gdf.to_crs(epsg=32645)
#
# # 保存为新的 Shapefile
# output_path = shp_path.replace(".shp", "_to_utm_45N.shp")
# gdf_utm.to_file(output_path, encoding="utf-8")
#
# print("投影转换已完成,输出文件为:", output_path)
'''
先检查一波
'''
# import geopandas as gpd
# from shapely.validation import explain_validity
#
# # 源数据路径
# # src = r""
# src = r""
# gdf = gpd.read_file(src)
# print(f"读取完成:{len(gdf)} 要素;CRS = {gdf.crs}")
#
# # 检查非法几何
# for idx, geom in gdf.geometry.items():
# if not geom.is_valid:
# reason = explain_validity(geom)
# print(f"⚠️ 非法几何 -> 要素索引: {idx}, 问题: {reason}")
# -*- coding: utf-8 -*-
"""
矢量面数据拓扑检查(打印版)
- 仅打印汇总与明细,不导出文件
- 检查项:非法几何、带洞、极小面(面积阈值)、重复几何、同层重叠
- 面积/重叠计算自动切换到米制投影(estimate_utm_crs)
"""
#
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# # SRC = r""
# MIN_AREA_M2 = 10.0 # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
# # 若仍为空,则强制设为 WGS84
# gdf = gdf.set_crs(epsg=4326, allow_override=True)
# print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
# utm_crs = gdf.estimate_utm_crs()
# gdf_m = gdf.to_crs(utm_crs)
# print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
# print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
# gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
# if geom is None or geom.is_empty:
# return geom
# if not geom.is_valid:
# try:
# return shapely.make_valid(geom) # shapely>=2.0
# except Exception:
# return geom.buffer(0) # 兼容旧版
# return geom
#
# if FIX_INVALID_BEFORE:
# gdf["geometry"] = gdf["geometry"].apply(fix_geom)
# gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
# for i in invalid_idx[:20]: # 只展示前20个,避免刷屏
# print(f" - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
# if len(invalid_idx) > 20:
# print(f" ... 其余 {len(invalid_idx)-20} 个省略")
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
# if isinstance(geom, Polygon):
# return len(geom.interiors) > 0
# if isinstance(geom, MultiPolygon):
# return any(len(p.interiors) > 0 for p in geom.geoms)
# return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
# print(" - 索引(前20):", holes_idx[:20])
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
# print(" - 索引(前20):", small_idx[:20])
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
# print(" - 索引(前20):", dup_idx[:20])
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
# if geom is None or geom.is_empty:
# continue
# # 用包络构造多边形查询候选
# candidate_idx = sindex.query(box(*geom.bounds))
# for j in candidate_idx:
# if j <= i:
# continue
# g2 = gdf_m.geometry.iloc[j]
# if g2 is None or g2.is_empty:
# continue
# inter = geom.intersection(g2)
# if not inter.is_empty and inter.area > 0:
# # 保存 (i, j, 面积)
# overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
# # 按交叠面积从大到小排序
# overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
# # 打印全部重叠对(只显示索引,不限制前10)
# print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
# for k, (a, b, area) in enumerate(overlap_pairs, 1):
# print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
# # 如果只关心要素 ID,可以只打印索引号
# print("\n涉及重叠的要素 ID:")
# overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
# print(overlap_ids)
# else:
# print("未发现同层重叠。")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。如需一键修复(make_valid / 去极小面 / 处理重叠),我可以在这份脚本基础上再补一版“清洗版”。")
'''
拓扑检查第二版 把有问题的导出
'''
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0 # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
# # 若仍为空,则强制设为 WGS84
# gdf = gdf.set_crs(epsg=4326, allow_override=True)
# print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
# utm_crs = gdf.estimate_utm_crs()
# gdf_m = gdf.to_crs(utm_crs)
# print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
# print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
# gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
# if geom is None or geom.is_empty:
# return geom
# if not geom.is_valid:
# try:
# return shapely.make_valid(geom) # shapely>=2.0
# except Exception:
# return geom.buffer(0) # 兼容旧版
# return geom
#
# if FIX_INVALID_BEFORE:
# gdf["geometry"] = gdf["geometry"].apply(fix_geom)
# gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
# for i in invalid_idx[:20]: # 只展示前20个,避免刷屏
# print(f" - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
# if len(invalid_idx) > 20:
# print(f" ... 其余 {len(invalid_idx)-20} 个省略")
#
# # 导出非法几何要素
# if invalid_idx:
# gdf_invalid = gdf.iloc[invalid_idx]
# gdf_invalid.to_file(SRC.replace(".shp", "_invalid.shp"))
# print(f"非法几何已导出到 {SRC.replace('.shp', '_invalid.shp')}")
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
# if isinstance(geom, Polygon):
# return len(geom.interiors) > 0
# if isinstance(geom, MultiPolygon):
# return any(len(p.interiors) > 0 for p in geom.geoms)
# return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
# print(" - 索引(前20):", holes_idx[:20])
#
# # 导出带洞要素
# if holes_idx:
# gdf_holes = gdf.iloc[holes_idx]
# gdf_holes.to_file(SRC.replace(".shp", "_holes.shp"))
# print(f"带洞要素已导出到 {SRC.replace('.shp', '_holes.shp')}")
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
# print(" - 索引(前20):", small_idx[:20])
#
# # 导出极小面要素
# if small_idx:
# gdf_small = gdf.iloc[small_idx]
# gdf_small.to_file(SRC.replace(".shp", "_small.shp"))
# print(f"极小面已导出到 {SRC.replace('.shp', '_small.shp')}")
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
# print(" - 索引(前20):", dup_idx[:20])
#
# # 导出重复几何要素
# if dup_idx:
# gdf_dup = gdf.iloc[dup_idx]
# gdf_dup.to_file(SRC.replace(".shp", "_duplicates.shp"))
# print(f"重复几何已导出到 {SRC.replace('.shp', '_duplicates.shp')}")
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
# if geom is None or geom.is_empty:
# continue
# # 用包络构造多边形查询候选
# candidate_idx = sindex.query(box(*geom.bounds))
# for j in candidate_idx:
# if j <= i:
# continue
# g2 = gdf_m.geometry.iloc[j]
# if g2 is None or g2.is_empty:
# continue
# inter = geom.intersection(g2)
# if not inter.is_empty and inter.area > 0:
# # 保存 (i, j, 面积)
# overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
# # 按交叠面积从大到小排序
# overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
# # 打印全部重叠对(只显示索引,不限制前10)
# print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
# for k, (a, b, area) in enumerate(overlap_pairs, 1):
# print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
# # 如果只关心要素 ID,可以只打印索引号
# print("\n涉及重叠的要素 ID:")
# overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
# print(overlap_ids)
#
# # 导出重叠要素
# if overlap_ids:
# overlap_ids_gdf = gdf.iloc[overlap_ids]
# overlap_ids_gdf.to_file(SRC.replace(".shp", "_overlap.shp"))
# print(f"重叠要素已导出到 {SRC.replace('.shp', '_overlap.shp')}")
#
# else:
# print("未发现同层重叠。")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。如需一键修复(make_valid / 去极小面 / 处理重叠),我可以在这份脚本基础上再补一版“清洗版”。")
'''
拓扑关系第三版
非法几何、带洞要素、极小面、重复几何、重叠几何 已经经过处理并导出。
合理的要素(即没有任何问题的几何)被保留,并与修复后的数据一起导出到 "_cleaned.shp"。
'''
#
#
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
# import pandas as pd # 使用 pandas 进行数据合并
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0 # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
# # 若仍为空,则强制设为 WGS84
# gdf = gdf.set_crs(epsg=4326, allow_override=True)
# print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
# utm_crs = gdf.estimate_utm_crs()
# gdf_m = gdf.to_crs(utm_crs)
# print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
# print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
# gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
# if geom is None or geom.is_empty:
# return geom
# if not geom.is_valid:
# try:
# return shapely.make_valid(geom) # shapely>=2.0
# except Exception:
# return geom.buffer(0) # 兼容旧版
# return geom
#
# if FIX_INVALID_BEFORE:
# gdf["geometry"] = gdf["geometry"].apply(fix_geom)
# gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
# for i in invalid_idx[:20]: # 只展示前20个,避免刷屏
# print(f" - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
# if len(invalid_idx) > 20:
# print(f" ... 其余 {len(invalid_idx)-20} 个省略")
#
# # 导出非法几何要素
# gdf_invalid = gdf.iloc[invalid_idx]
# gdf_invalid.to_file(SRC.replace(".shp", "_invalid.shp"))
# print(f"非法几何已导出到 {SRC.replace('.shp', '_invalid.shp')}")
#
# # 处理非法几何(修复)
# gdf["geometry"] = gdf["geometry"].apply(fix_geom)
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
# if isinstance(geom, Polygon):
# return len(geom.interiors) > 0
# if isinstance(geom, MultiPolygon):
# return any(len(p.interiors) > 0 for p in geom.geoms)
# return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
# print(" - 索引(前20):", holes_idx[:20])
#
# # 导出带洞要素
# gdf_holes = gdf.iloc[holes_idx]
# gdf_holes.to_file(SRC.replace(".shp", "_holes.shp"))
# print(f"带洞要素已导出到 {SRC.replace('.shp', '_holes.shp')}")
#
# # 处理带洞要素(如果需要移除洞)
# def remove_holes(geom):
# if isinstance(geom, Polygon) and len(geom.interiors) > 0:
# return Polygon(geom.exterior) # 移除洞
# return geom
#
# gdf["geometry"] = gdf["geometry"].apply(remove_holes)
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
# print(" - 索引(前20):", small_idx[:20])
#
# # 导出极小面要素
# gdf_small = gdf.iloc[small_idx]
# gdf_small.to_file(SRC.replace(".shp", "_small.shp"))
# print(f"极小面已导出到 {SRC.replace('.shp', '_small.shp')}")
#
# # 处理极小面(删除)
# gdf = gdf.drop(gdf_small.index) # 删除极小面
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
# print(" - 索引(前20):", dup_idx[:20])
#
# # 导出重复几何要素
# gdf_dup = gdf.iloc[dup_idx]
# gdf_dup.to_file(SRC.replace(".shp", "_duplicates.shp"))
# print(f"重复几何已导出到 {SRC.replace('.shp', '_duplicates.shp')}")
#
# # 处理重复几何(删除)
# gdf = gdf.drop(gdf_dup.index) # 删除重复几何
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
# if geom is None or geom.is_empty:
# continue
# # 用包络构造多边形查询候选
# candidate_idx = sindex.query(box(*geom.bounds))
# for j in candidate_idx:
# if j <= i:
# continue
# g2 = gdf_m.geometry.iloc[j]
# if g2 is None or g2.is_empty:
# continue
# inter = geom.intersection(g2)
# if not inter.is_empty and inter.area > 0:
# # 保存 (i, j, 面积)
# overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
# # 按交叠面积从大到小排序
# overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
# # 打印全部重叠对(只显示索引,不限制前10)
# print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
# for k, (a, b, area) in enumerate(overlap_pairs, 1):
# print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
# # 处理重叠(合并重叠部分)
# overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
# gdf_overlap = gdf.iloc[overlap_ids]
# gdf_overlap = gdf_overlap.dissolve() # 合并重叠区域
# gdf_overlap.to_file(SRC.replace(".shp", "_merged_overlap.shp"))
# print(f"重叠要素已合并并导出到 {SRC.replace('.shp', '_merged_overlap.shp')}")
#
# else:
# print("未发现同层重叠。")
#
# # ========= 计算没有问题的要素 =========
# # 排除有问题的要素,得到没有问题的要素
# gdf_no_issues = gdf.iloc[~gdf.index.isin(invalid_idx + holes_idx + small_idx + dup_idx + [i for pair in overlap_pairs for i in pair[:2]])]
#
# # 导出没有问题的要素
# gdf_no_issues.to_file(SRC.replace(".shp", "_no_issues.shp"))
# print(f"没有问题的要素已导出到 {SRC.replace('.shp', '_no_issues.shp')}")
#
# # ========= 最终合并所有要素 =========
# # 合并处理后的问题要素和没有问题的要素
# gdf_clean = pd.concat([gdf_no_issues, gdf_invalid, gdf_holes, gdf_small, gdf_dup, gdf_overlap])
#
# # 导出最终“清理后的”要素
# gdf_clean.to_file(SRC.replace(".shp", "_cleaned.shp"))
# print(f"所有合理的要素已导出到 {SRC.replace('.shp', '_cleaned.shp')}")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。")
'''
再次检查处理过的数据(cleaned.shp),包括:
非法几何(自交、无效等)
带洞要素
极小面(面积小于阈值)
重复几何
重叠几何
删除这些有问题的要素,得到最终版本。
'''
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0 # 极小面阈值(平方米),按需调整
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
# # 若仍为空,则强制设为 WGS84
# gdf = gdf.set_crs(epsg=4326, allow_override=True)
# print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
#
# # 删除非法几何
# gdf = gdf.drop(gdf.index[invalid_idx]) # 删除非法几何
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
# if isinstance(geom, Polygon):
# return len(geom.interiors) > 0
# if isinstance(geom, MultiPolygon):
# return any(len(p.interiors) > 0 for p in geom.geoms)
# return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
#
# # 删除带洞要素
# gdf = gdf.drop(gdf.index[holes_idx]) # 删除带洞要素
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
#
# # 删除极小面
# gdf = gdf.drop(gdf.index[small_idx]) # 删除极小面
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
#
# # 删除重复几何
# gdf = gdf.drop(gdf.index[dup_idx]) # 删除重复几何
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf.sindex # 空间索引
#
# for i, geom in enumerate(gdf.geometry):
# if geom is None or geom.is_empty:
# continue
# # 用包络构造多边形查询候选
# candidate_idx = sindex.query(box(*geom.bounds))
# for j in candidate_idx:
# if j <= i:
# continue
# g2 = gdf.geometry.iloc[j]
# if g2 is None or g2.is_empty:
# continue
# inter = geom.intersection(g2)
# if not inter.is_empty and inter.area > 0:
# # 保存 (i, j, 面积)
# overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
# # 按交叠面积从大到小排序
# overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
# # 打印全部重叠对(只显示索引,不限制前10)
# print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
# for k, (a, b, area) in enumerate(overlap_pairs, 1):
# print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
# # 删除重叠要素
# overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
# gdf = gdf.drop(gdf.index[overlap_ids]) # 删除重叠要素
#
# else:
# print("未发现同层重叠。")
#
# # ========= 最终输出:删除问题要素后的结果 =========
# out_path = SRC.replace(".shp", "_final_cleaned.shp")
# gdf.to_file(out_path)
# print(f"所有问题已去除,最终版本已导出到 {out_path}")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。")

被折叠的 条评论
为什么被折叠?



