矢量面数据拓扑关系检查(二)

矢量数据基本属性检查
矢量数据2000坐标系转wgs1984
矢量数据2000坐标系转高斯克吕格
矢量数据wgs1984转UTM


'''
属性打印
'''

# import geopandas as gpd
#
# # Shapefile 文件路径
# shapefile_path = r""
#
# # 读取 shapefile 数据
# gdf = gpd.read_file(shapefile_path)
#
# # 打印基本信息
# print("===== 基本属性 =====")
# print(f"数据总行数(要素数量): {len(gdf)}")
# print(f"字段(属性列): {list(gdf.columns)}")
# print(f"坐标参考系(CRS): {gdf.crs}")
# print(f"几何类型: {gdf.geom_type.unique()}")
#
# # 打印前 5 行数据
# print("\n===== 前 5 行数据 =====")
# print(gdf.head())
#
# # 打印空间范围
# print("\n===== 空间范围(Bounding Box) =====")
# print(gdf.total_bounds)  # [minx, miny, maxx, maxy]





'''
经度范围是 80°E - 81°E,这个范围位于中国的 6°分带 内,
因此适合选择 CGCS2000 高斯-克吕格投影 中的中央经线为 81°E 的投影带
'''

# import geopandas as gpd
#
# # 读取原始数据
# shp_path = r""
# gdf = gpd.read_file(shp_path)
#
# # 假设原始数据是 CGCS2000 地理坐标系(EPSG:4490)
# gdf = gdf.set_crs("EPSG:4490", allow_override=True)  # 如果原始数据没有投影定义,假设为 CGCS2000 地理坐标系
#
# # 转换为高斯-克吕格投影(中央经线 81°E,EPSG:4504)
# gdf_gauss_81 = gdf.to_crs(epsg=4504)  # CGCS2000 / Gauss-Kruger CM 81E
# gdf_gauss_81.to_file(shp_path.replace(".shp", "_cgcs2000_to_gauss_81.shp"), encoding="utf-8")
#
# print("投影转换已完成,输出文件为:", shp_path.replace(".shp", "_to_gauss_81.shp"))







'''
坐标转化 2000到1984
'''

import geopandas as gpd
from pathlib import Path

# 原数据
# src = r""
#
# # 读取(当前是 EPSG:4490)
# gdf = gpd.read_file(src)
# print("原CRS:", gdf.crs)   # 应为 EPSG:4490
#
# # 重投影到 WGS84 (EPSG:4326)
# gdf_wgs84 = gdf.to_crs(epsg=4326)
#
# # 生成输出路径:同目录加 _wgs84 后缀
# p = Path(src)
# dst = str(p.with_name(p.stem + "_cgcs2000towgs84.shp"))
#
# # 保存为 Shapefile(保留字段)
# gdf_wgs84.to_file(dst, driver="ESRI Shapefile", encoding="utf-8")
# print("已保存:", dst)
# print("新CRS:", gdf_wgs84.crs)






'''
wgs1984 加上UTM投影
'''

# import geopandas as gpd
#
# # 读取原始数据(WGS84 坐标系)
# shp_path = r""
# gdf = gpd.read_file(shp_path)
#
# # 假设原始数据已经是 WGS84(EPSG:4326)
# gdf = gdf.set_crs("EPSG:4326", allow_override=True)
#
# # 转换为 UTM 坐标系(UTM Zone 45N,EPSG:32645)
# gdf_utm = gdf.to_crs(epsg=32645)
#
# # 保存为新的 Shapefile
# output_path = shp_path.replace(".shp", "_to_utm_45N.shp")
# gdf_utm.to_file(output_path, encoding="utf-8")
#
# print("投影转换已完成,输出文件为:", output_path)







'''
先检查一波
'''

# import geopandas as gpd
# from shapely.validation import explain_validity
#
# # 源数据路径
# # src = r""
# src = r""
# gdf = gpd.read_file(src)
# print(f"读取完成:{len(gdf)} 要素;CRS = {gdf.crs}")
#
# # 检查非法几何
# for idx, geom in gdf.geometry.items():
#     if not geom.is_valid:
#         reason = explain_validity(geom)
#         print(f"⚠️ 非法几何 -> 要素索引: {idx}, 问题: {reason}")



# -*- coding: utf-8 -*-
"""
矢量面数据拓扑检查(打印版)
- 仅打印汇总与明细,不导出文件
- 检查项:非法几何、带洞、极小面(面积阈值)、重复几何、同层重叠
- 面积/重叠计算自动切换到米制投影(estimate_utm_crs)
"""


#
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# # SRC = r""

# MIN_AREA_M2 = 10.0      # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True  # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
#     # 若仍为空,则强制设为 WGS84
#     gdf = gdf.set_crs(epsg=4326, allow_override=True)
#     print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
#     utm_crs = gdf.estimate_utm_crs()
#     gdf_m = gdf.to_crs(utm_crs)
#     print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
#     print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
#     gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
#     if geom is None or geom.is_empty:
#         return geom
#     if not geom.is_valid:
#         try:
#             return shapely.make_valid(geom)  # shapely>=2.0
#         except Exception:
#             return geom.buffer(0)            # 兼容旧版
#     return geom
#
# if FIX_INVALID_BEFORE:
#     gdf["geometry"] = gdf["geometry"].apply(fix_geom)
#     gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
#     for i in invalid_idx[:20]:  # 只展示前20个,避免刷屏
#         print(f"  - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
#     if len(invalid_idx) > 20:
#         print(f"  ... 其余 {len(invalid_idx)-20} 个省略")
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
#     if isinstance(geom, Polygon):
#         return len(geom.interiors) > 0
#     if isinstance(geom, MultiPolygon):
#         return any(len(p.interiors) > 0 for p in geom.geoms)
#     return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
#     print("  - 索引(前20):", holes_idx[:20])
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
#     print("  - 索引(前20):", small_idx[:20])
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
#     print("  - 索引(前20):", dup_idx[:20])
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex  # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
#     if geom is None or geom.is_empty:
#         continue
#     # 用包络构造多边形查询候选
#     candidate_idx = sindex.query(box(*geom.bounds))
#     for j in candidate_idx:
#         if j <= i:
#             continue
#         g2 = gdf_m.geometry.iloc[j]
#         if g2 is None or g2.is_empty:
#             continue
#         inter = geom.intersection(g2)
#         if not inter.is_empty and inter.area > 0:
#             # 保存 (i, j, 面积)
#             overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
#     # 按交叠面积从大到小排序
#     overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
#     # 打印全部重叠对(只显示索引,不限制前10)
#     print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
#     for k, (a, b, area) in enumerate(overlap_pairs, 1):
#         print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
#     # 如果只关心要素 ID,可以只打印索引号
#     print("\n涉及重叠的要素 ID:")
#     overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
#     print(overlap_ids)
# else:
#     print("未发现同层重叠。")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。如需一键修复(make_valid / 去极小面 / 处理重叠),我可以在这份脚本基础上再补一版“清洗版”。")



'''
拓扑检查第二版 把有问题的导出
'''


# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0      # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True  # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
#     # 若仍为空,则强制设为 WGS84
#     gdf = gdf.set_crs(epsg=4326, allow_override=True)
#     print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
#     utm_crs = gdf.estimate_utm_crs()
#     gdf_m = gdf.to_crs(utm_crs)
#     print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
#     print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
#     gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
#     if geom is None or geom.is_empty:
#         return geom
#     if not geom.is_valid:
#         try:
#             return shapely.make_valid(geom)  # shapely>=2.0
#         except Exception:
#             return geom.buffer(0)            # 兼容旧版
#     return geom
#
# if FIX_INVALID_BEFORE:
#     gdf["geometry"] = gdf["geometry"].apply(fix_geom)
#     gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
#     for i in invalid_idx[:20]:  # 只展示前20个,避免刷屏
#         print(f"  - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
#     if len(invalid_idx) > 20:
#         print(f"  ... 其余 {len(invalid_idx)-20} 个省略")
#
# # 导出非法几何要素
# if invalid_idx:
#     gdf_invalid = gdf.iloc[invalid_idx]
#     gdf_invalid.to_file(SRC.replace(".shp", "_invalid.shp"))
#     print(f"非法几何已导出到 {SRC.replace('.shp', '_invalid.shp')}")
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
#     if isinstance(geom, Polygon):
#         return len(geom.interiors) > 0
#     if isinstance(geom, MultiPolygon):
#         return any(len(p.interiors) > 0 for p in geom.geoms)
#     return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
#     print("  - 索引(前20):", holes_idx[:20])
#
# # 导出带洞要素
# if holes_idx:
#     gdf_holes = gdf.iloc[holes_idx]
#     gdf_holes.to_file(SRC.replace(".shp", "_holes.shp"))
#     print(f"带洞要素已导出到 {SRC.replace('.shp', '_holes.shp')}")
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
#     print("  - 索引(前20):", small_idx[:20])
#
# # 导出极小面要素
# if small_idx:
#     gdf_small = gdf.iloc[small_idx]
#     gdf_small.to_file(SRC.replace(".shp", "_small.shp"))
#     print(f"极小面已导出到 {SRC.replace('.shp', '_small.shp')}")
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
#     print("  - 索引(前20):", dup_idx[:20])
#
# # 导出重复几何要素
# if dup_idx:
#     gdf_dup = gdf.iloc[dup_idx]
#     gdf_dup.to_file(SRC.replace(".shp", "_duplicates.shp"))
#     print(f"重复几何已导出到 {SRC.replace('.shp', '_duplicates.shp')}")
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex  # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
#     if geom is None or geom.is_empty:
#         continue
#     # 用包络构造多边形查询候选
#     candidate_idx = sindex.query(box(*geom.bounds))
#     for j in candidate_idx:
#         if j <= i:
#             continue
#         g2 = gdf_m.geometry.iloc[j]
#         if g2 is None or g2.is_empty:
#             continue
#         inter = geom.intersection(g2)
#         if not inter.is_empty and inter.area > 0:
#             # 保存 (i, j, 面积)
#             overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
#     # 按交叠面积从大到小排序
#     overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
#     # 打印全部重叠对(只显示索引,不限制前10)
#     print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
#     for k, (a, b, area) in enumerate(overlap_pairs, 1):
#         print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
#     # 如果只关心要素 ID,可以只打印索引号
#     print("\n涉及重叠的要素 ID:")
#     overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
#     print(overlap_ids)
#
#     # 导出重叠要素
#     if overlap_ids:
#         overlap_ids_gdf = gdf.iloc[overlap_ids]
#         overlap_ids_gdf.to_file(SRC.replace(".shp", "_overlap.shp"))
#         print(f"重叠要素已导出到 {SRC.replace('.shp', '_overlap.shp')}")
#
# else:
#     print("未发现同层重叠。")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。如需一键修复(make_valid / 去极小面 / 处理重叠),我可以在这份脚本基础上再补一版“清洗版”。")


'''
拓扑关系第三版

非法几何、带洞要素、极小面、重复几何、重叠几何 已经经过处理并导出。

合理的要素(即没有任何问题的几何)被保留,并与修复后的数据一起导出到 "_cleaned.shp"。

'''

#
#
# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
# import pandas as pd  # 使用 pandas 进行数据合并
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0      # 极小面阈值(平方米),按需调整
# FIX_INVALID_BEFORE = True  # 为防止个别非法几何在叠置时抛错,建议保持 True
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
#     # 若仍为空,则强制设为 WGS84
#     gdf = gdf.set_crs(epsg=4326, allow_override=True)
#     print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 构造米制数据副本,用于面积/重叠判定 =========
# try:
#     utm_crs = gdf.estimate_utm_crs()
#     gdf_m = gdf.to_crs(utm_crs)
#     print(f"用于量测的米制CRS:{utm_crs}")
# except Exception as e:
#     print("自动估算UTM失败,将在原CRS下计算(单位为度,面积阈值需改用degree²):", e)
#     gdf_m = gdf.copy()
#
# # ========= 可选:先修复几何,避免叠置时报错 =========
# def fix_geom(geom):
#     if geom is None or geom.is_empty:
#         return geom
#     if not geom.is_valid:
#         try:
#             return shapely.make_valid(geom)  # shapely>=2.0
#         except Exception:
#             return geom.buffer(0)            # 兼容旧版
#     return geom
#
# if FIX_INVALID_BEFORE:
#     gdf["geometry"] = gdf["geometry"].apply(fix_geom)
#     gdf_m["geometry"] = gdf_m["geometry"].apply(fix_geom)
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
# if invalid_idx:
#     for i in invalid_idx[:20]:  # 只展示前20个,避免刷屏
#         print(f"  - 索引 {i}: {explain_validity(gdf.geometry.iloc[i])}")
#     if len(invalid_idx) > 20:
#         print(f"  ... 其余 {len(invalid_idx)-20} 个省略")
#
# # 导出非法几何要素
# gdf_invalid = gdf.iloc[invalid_idx]
# gdf_invalid.to_file(SRC.replace(".shp", "_invalid.shp"))
# print(f"非法几何已导出到 {SRC.replace('.shp', '_invalid.shp')}")
#
# # 处理非法几何(修复)
# gdf["geometry"] = gdf["geometry"].apply(fix_geom)
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
#     if isinstance(geom, Polygon):
#         return len(geom.interiors) > 0
#     if isinstance(geom, MultiPolygon):
#         return any(len(p.interiors) > 0 for p in geom.geoms)
#     return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
# if holes_idx:
#     print("  - 索引(前20):", holes_idx[:20])
#
# # 导出带洞要素
# gdf_holes = gdf.iloc[holes_idx]
# gdf_holes.to_file(SRC.replace(".shp", "_holes.shp"))
# print(f"带洞要素已导出到 {SRC.replace('.shp', '_holes.shp')}")
#
# # 处理带洞要素(如果需要移除洞)
# def remove_holes(geom):
#     if isinstance(geom, Polygon) and len(geom.interiors) > 0:
#         return Polygon(geom.exterior)  # 移除洞
#     return geom
#
# gdf["geometry"] = gdf["geometry"].apply(remove_holes)
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf_m.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
# if small_idx:
#     print("  - 索引(前20):", small_idx[:20])
#
# # 导出极小面要素
# gdf_small = gdf.iloc[small_idx]
# gdf_small.to_file(SRC.replace(".shp", "_small.shp"))
# print(f"极小面已导出到 {SRC.replace('.shp', '_small.shp')}")
#
# # 处理极小面(删除)
# gdf = gdf.drop(gdf_small.index)  # 删除极小面
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
# if dup_idx:
#     print("  - 索引(前20):", dup_idx[:20])
#
# # 导出重复几何要素
# gdf_dup = gdf.iloc[dup_idx]
# gdf_dup.to_file(SRC.replace(".shp", "_duplicates.shp"))
# print(f"重复几何已导出到 {SRC.replace('.shp', '_duplicates.shp')}")
#
# # 处理重复几何(删除)
# gdf = gdf.drop(gdf_dup.index)  # 删除重复几何
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf_m.sindex  # 空间索引
#
# for i, geom in enumerate(gdf_m.geometry):
#     if geom is None or geom.is_empty:
#         continue
#     # 用包络构造多边形查询候选
#     candidate_idx = sindex.query(box(*geom.bounds))
#     for j in candidate_idx:
#         if j <= i:
#             continue
#         g2 = gdf_m.geometry.iloc[j]
#         if g2 is None or g2.is_empty:
#             continue
#         inter = geom.intersection(g2)
#         if not inter.is_empty and inter.area > 0:
#             # 保存 (i, j, 面积)
#             overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
#     # 按交叠面积从大到小排序
#     overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
#     # 打印全部重叠对(只显示索引,不限制前10)
#     print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
#     for k, (a, b, area) in enumerate(overlap_pairs, 1):
#         print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
#     # 处理重叠(合并重叠部分)
#     overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
#     gdf_overlap = gdf.iloc[overlap_ids]
#     gdf_overlap = gdf_overlap.dissolve()  # 合并重叠区域
#     gdf_overlap.to_file(SRC.replace(".shp", "_merged_overlap.shp"))
#     print(f"重叠要素已合并并导出到 {SRC.replace('.shp', '_merged_overlap.shp')}")
#
# else:
#     print("未发现同层重叠。")
#
# # ========= 计算没有问题的要素 =========
# # 排除有问题的要素,得到没有问题的要素
# gdf_no_issues = gdf.iloc[~gdf.index.isin(invalid_idx + holes_idx + small_idx + dup_idx + [i for pair in overlap_pairs for i in pair[:2]])]
#
# # 导出没有问题的要素
# gdf_no_issues.to_file(SRC.replace(".shp", "_no_issues.shp"))
# print(f"没有问题的要素已导出到 {SRC.replace('.shp', '_no_issues.shp')}")
#
# # ========= 最终合并所有要素 =========
# # 合并处理后的问题要素和没有问题的要素
# gdf_clean = pd.concat([gdf_no_issues, gdf_invalid, gdf_holes, gdf_small, gdf_dup, gdf_overlap])
#
# # 导出最终“清理后的”要素
# gdf_clean.to_file(SRC.replace(".shp", "_cleaned.shp"))
# print(f"所有合理的要素已导出到 {SRC.replace('.shp', '_cleaned.shp')}")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。")


'''
再次检查处理过的数据(cleaned.shp),包括:

非法几何(自交、无效等)

带洞要素

极小面(面积小于阈值)

重复几何

重叠几何

删除这些有问题的要素,得到最终版本。
'''



# import geopandas as gpd
# from shapely.validation import explain_validity
# from shapely.geometry import Polygon, MultiPolygon, box
# import shapely
#
# # ========= 基本参数 =========
# SRC = r""
# MIN_AREA_M2 = 10.0      # 极小面阈值(平方米),按需调整
#
# # ========= 读取与CRS =========
# gdf = gpd.read_file(SRC)
# print(f"读取完成:{len(gdf)} 要素;原CRS = {gdf.crs}")
#
# if gdf.crs is None:
#     # 若仍为空,则强制设为 WGS84
#     gdf = gdf.set_crs(epsg=4326, allow_override=True)
#     print("未检测到CRS,已强制设置为 EPSG:4326(WGS84)")
#
# # ========= 检查1:非法几何 =========
# invalid_idx = [i for i, geom in gdf.geometry.items() if not geom.is_valid]
# print(f"\n[非法几何] 数量:{len(invalid_idx)}")
#
# # 删除非法几何
# gdf = gdf.drop(gdf.index[invalid_idx])  # 删除非法几何
#
# # ========= 检查2:带洞要素 =========
# def has_holes(geom):
#     if isinstance(geom, Polygon):
#         return len(geom.interiors) > 0
#     if isinstance(geom, MultiPolygon):
#         return any(len(p.interiors) > 0 for p in geom.geoms)
#     return False
#
# holes_idx = [i for i, geom in gdf.geometry.items() if has_holes(geom)]
# print(f"\n[带洞要素] 数量:{len(holes_idx)}")
#
# # 删除带洞要素
# gdf = gdf.drop(gdf.index[holes_idx])  # 删除带洞要素
#
# # ========= 检查3:极小面(面积阈值)=========
# small_idx = [i for i, geom in gdf.geometry.items() if geom is not None and not geom.is_empty and geom.area < MIN_AREA_M2]
# print(f"\n[极小面(<{MIN_AREA_M2} m²)] 数量:{len(small_idx)}")
#
# # 删除极小面
# gdf = gdf.drop(gdf.index[small_idx])  # 删除极小面
#
# # ========= 检查4:重复几何(完全重复)=========
# dup_mask = gdf.geometry.duplicated(keep=False)
# dup_idx = list(gdf.index[dup_mask])
# print(f"\n[完全重复几何] 数量:{len(dup_idx)}")
#
# # 删除重复几何
# gdf = gdf.drop(gdf.index[dup_idx])  # 删除重复几何
#
# # ========= 检查5:同层重叠(交叠面积>0)=========
# print("\n[同层重叠] 正在扫描,请稍候 ...")
# overlap_pairs = []
# sindex = gdf.sindex  # 空间索引
#
# for i, geom in enumerate(gdf.geometry):
#     if geom is None or geom.is_empty:
#         continue
#     # 用包络构造多边形查询候选
#     candidate_idx = sindex.query(box(*geom.bounds))
#     for j in candidate_idx:
#         if j <= i:
#             continue
#         g2 = gdf.geometry.iloc[j]
#         if g2 is None or g2.is_empty:
#             continue
#         inter = geom.intersection(g2)
#         if not inter.is_empty and inter.area > 0:
#             # 保存 (i, j, 面积)
#             overlap_pairs.append((i, j, float(inter.area)))
#
# print(f"[同层重叠] 共发现 {len(overlap_pairs)} 对要素发生重叠")
# if overlap_pairs:
#     # 按交叠面积从大到小排序
#     overlap_pairs.sort(key=lambda x: x[2], reverse=True)
#
#     # 打印全部重叠对(只显示索引,不限制前10)
#     print("发生重叠的要素对 (idx_i, idx_j, overlap_m2):")
#     for k, (a, b, area) in enumerate(overlap_pairs, 1):
#         print(f"{k:04d}) {a} , {b} , {area:.3f}")
#
#     # 删除重叠要素
#     overlap_ids = sorted(set([i for pair in overlap_pairs for i in pair[:2]]))
#     gdf = gdf.drop(gdf.index[overlap_ids])  # 删除重叠要素
#
# else:
#     print("未发现同层重叠。")
#
# # ========= 最终输出:删除问题要素后的结果 =========
# out_path = SRC.replace(".shp", "_final_cleaned.shp")
# gdf.to_file(out_path)
# print(f"所有问题已去除,最终版本已导出到 {out_path}")
#
# # ========= 汇总 =========
# print("\n—— 拓扑检查汇总 ——")
# print(f"非法几何:{len(invalid_idx)}")
# print(f"带洞要素:{len(holes_idx)}")
# print(f"极小面(<{MIN_AREA_M2} m²):{len(small_idx)}")
# print(f"完全重复几何:{len(dup_idx)}")
# print(f"同层重叠对数:{len(overlap_pairs)}")
#
# print("\n✅ 完成。")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值