原题地址 。
首先给出关键函数,一个多边形包含另一个多边形:
SELECT ST_Contains(ST_Polygonize([ ST_GeomFromText('LINESTRING(0 0, 0 10, 10 10, 10 0, 0 0)')]),ST_Polygonize([ ST_GeomFromText('LINESTRING(0 0, 0 1, 1 1, 1 0, 0 0)')]));
true
思路是把每个可能的矩形与整个多边形比较,看后者是否包含前者,如果是则计算面积,最后排序,因为对spatial插件不熟悉,让DeepSeek辅助实现,
利用DuckDB spatial插件编写SQL求解多边形内最大矩形面积 ,把每个可能的矩形与整个多边形比较,看后者是否包含前者,如果是则计算面积,最后排序,关键判断函数用法是ST_Contains(ST_Polygonize([ ST_GeomFromText(‘LINESTRING(0 0, 0 10, 10 10, 10 0, 0 0)’)]),ST_Polygonize([ ST_GeomFromText(‘LINESTRING(0 0, 0 1, 1 1, 1 0, 0 0)’)],先从外部文件读入逐行坐标,形成点,将多边形提前保存好,矩形根据对角顶点算出。
他写了调用以上插件的python程序
import duckdb
from itertools import combinations
def create_polygon_wkt(points):
"""将点列表转换为多边形WKT格式"""
# 闭合多边形:将第一个点重复添加到末尾
wkt_points = [f"{x} {y}" for x, y in points]
wkt_points.append(wkt_points[0]) # 闭合多边形
return f"POLYGON(({', '.join(wkt_points)}))"
def create_rectangle_wkt(x1, y1, x2, y2):
"""从对角顶点创建矩形WKT"""
# 确保矩形是轴对齐的
min_x = min(x1, x2)
max_x = max(x1, x2)
min_y = min(y1, y2)
max_y = max(y1, y2)
# 矩形顶点(顺时针或逆时针顺序)
points = [
(min_x, min_y),
(max_x, min_y),
(max_x, max_y),
(min_x, max_y),
(min_x, min_y) # 闭合
]
wkt_points = [f"{x} {y}" for x, y in points]
return f"POLYGON(({', '.join(wkt_points)}))"
def main():
# 读取多边形顶点坐标
points = []
with open('2509-input.txt', 'r') as f:
for line in f:
line = line.strip()
if line:
x, y = map(int, line.split(','))
points.append((x, y))
if len(points) < 3:
print("多边形至少需要3个顶点")
return
# 创建DuckDB连接并加载spatial扩展
conn = duckdb.connect()
#conn.execute("INSTALL spatial;")
conn.execute("LOAD spatial;")
# 创建多边形WKT
polygon_wkt = create_polygon_wkt(points)
print(f"多边形WKT: {polygon_wkt}")
# 将多边形注册为表
conn.execute(f"""
CREATE TEMP TABLE polygons AS
SELECT 1 as id, ST_GeomFromText('{polygon_wkt}') as geom
""")
# 获取所有顶点对作为可能的矩形对角顶点
n = len(points)
rectangles = []
for i in range(n):
for j in range(i + 1, n):
x1, y1 = points[i]
x2, y2 = points[j]
# 排除x或y坐标相同的点对(不能形成矩形)
if x1 == x2 or y1 == y2:
continue
# 计算矩形面积
min_x = min(x1, x2)
max_x = max(x1, x2)
min_y = min(y1, y2)
max_y = max(y1, y2)
area = (max_x - min_x+1) * (max_y - min_y+1)
# 创建矩形WKT
rect_wkt = create_rectangle_wkt(x1, y1, x2, y2)
rectangles.append({
'id': len(rectangles) + 1,
'rect_wkt': rect_wkt,
'vertex1': (x1, y1),
'vertex2': (x2, y2),
'area': area
})
print(f"生成了 {len(rectangles)} 个可能的矩形")
# 批量检查矩形是否在多边形内
valid_rectangles = []
for rect in rectangles:
# 使用ST_Contains检查矩形是否在多边形内
query = f"""
SELECT
ST_Contains(
(SELECT geom FROM polygons WHERE id = 1),
ST_GeomFromText('{rect['rect_wkt']}')
) as is_contained
"""
result = conn.execute(query).fetchone()
if result and result[0]: # 矩形完全在多边形内
valid_rectangles.append(rect)
print(f"找到 {len(valid_rectangles)} 个完全在多边形内的矩形")
# 按面积排序
if valid_rectangles:
valid_rectangles.sort(key=lambda x: x['area'], reverse=True)
print("\n最大的矩形(按面积排序):")
print("-" * 60)
for idx, rect in enumerate(valid_rectangles[:10], 1): # 显示前10个
print(f"{idx}. 面积: {rect['area']}")
print(f" 对角顶点: {rect['vertex1']} - {rect['vertex2']}")
print(f" 矩形WKT: {rect['rect_wkt']}")
print()
print(f"\n最大矩形面积: {valid_rectangles[0]['area']}")
print(f"对角顶点: {valid_rectangles[0]['vertex1']} 和 {valid_rectangles[0]['vertex2']}")
else:
print("没有找到完全在多边形内的矩形")
# 关闭连接
conn.close()
if __name__ == "__main__":
main()
安装插件
D load httpfs;
D install spatial;
D load spatial;
安装python DuckDB 模块
python3 pip.pyz install duckdb -U --break-system-packages -i https://pypi.tuna.tsinghua.edu.cn/simple
因为要拼接字符串,所以单个矩形执行一次查询语句,导致执行效率不太高,但也2分半钟完成了计算。
for rect in rectangles:
# 使用ST_Contains检查矩形是否在多边形内
query = f"""
SELECT
ST_Contains(
(SELECT geom FROM polygons WHERE id = 1),
ST_GeomFromText('{rect['rect_wkt']}')
) as is_contained
"""
time python3 2509duckdb.txt > 2509duckdb-res.txt
real 2m5.490s
user 2m34.732s
sys 0m12.180s
又让他把矩形也提前存好,一次性查询出结果,仍要1分40秒,经过排查,确定性能问题出在executemany插入语句,其实还是逐条插入,
把conn.executemany(“”"
INSERT INTO rectangles VALUES (?, ?, ?, ?, ?, ?, ?)
“”", rectangles_data)改写为insert into t value(1,4),(2,3),(7,1)这种insert格式,用execute执行.
修改以后,缩短到13秒,看到查询用了标量子查询,改成表关联,反而慢了,要18秒
import duckdb
from itertools import combinations
def create_polygon_wkt(points):
"""将点列表转换为多边形WKT格式"""
# 闭合多边形:将第一个点重复添加到末尾
wkt_points = [f"{x} {y}" for x, y in points]
wkt_points.append(wkt_points[0]) # 闭合多边形
return f"POLYGON(({', '.join(wkt_points)}))"
def create_rectangle_wkt(x1, y1, x2, y2):
"""从对角顶点创建矩形WKT"""
# 确保矩形是轴对齐的
min_x = min(x1, x2)
max_x = max(x1, x2)
min_y = min(y1, y2)
max_y = max(y1, y2)
# 矩形顶点(顺时针或逆时针顺序)
points = [
(min_x, min_y),
(max_x, min_y),
(max_x, max_y),
(min_x, max_y),
(min_x, min_y) # 闭合
]
wkt_points = [f"{x} {y}" for x, y in points]
return f"POLYGON(({', '.join(wkt_points)}))"
def main():
# 读取多边形顶点坐标
points = []
with open('2509-input.txt', 'r') as f:
for line in f:
line = line.strip()
if line:
x, y = map(int, line.split(','))
points.append((x, y))
if len(points) < 3:
print("多边形至少需要3个顶点")
return
# 创建DuckDB连接并加载spatial扩展
conn = duckdb.connect("aoc2509.db")
conn.execute("INSTALL spatial;")
conn.execute("LOAD spatial;")
# 创建多边形WKT并注册为表
polygon_wkt = create_polygon_wkt(points)
#print(f"多边形WKT: {polygon_wkt}")
# 创建多边形表
conn.execute(f"""
CREATE /*TEMP*/ TABLE polygons AS
SELECT 1 as id, ST_GeomFromText('{polygon_wkt}') as geom
""")
# 生成所有可能的矩形并批量插入到临时表
n = len(points)
rectangles_data = []
for i in range(n):
for j in range(i + 1, n):
x1, y1 = points[i]
x2, y2 = points[j]
# 排除x或y坐标相同的点对(不能形成矩形)
if x1 == x2 or y1 == y2:
continue
# 计算矩形面积
min_x = min(x1, x2)
max_x = max(x1, x2)
min_y = min(y1, y2)
max_y = max(y1, y2)
area = (max_x - min_x+1) * (max_y - min_y+1)
# 创建矩形WKT
rect_wkt = create_rectangle_wkt(x1, y1, x2, y2)
rectangles_data.append((len(rectangles_data) + 1, rect_wkt, area, x1, y1, x2, y2))
print(f"生成了 {len(rectangles_data)} 个可能的矩形")
# 批量插入矩形数据
if rectangles_data:
# 创建矩形表
conn.execute("""
CREATE /*TEMP*/ TABLE rectangles (
rect_id INTEGER,
rect_wkt TEXT,
area BIGINT,
x1 INTEGER,
y1 INTEGER,
x2 INTEGER,
y2 INTEGER
)
""")
# 批量插入数据
#conn.executemany("""
# INSERT INTO rectangles VALUES (?, ?, ?, ?, ?, ?, ?)
#""", rectangles_data)
# 构建批量插入的VALUES子句
values_clauses = []
for rect_id, rect_wkt, area, x1, y1, x2, y2 in rectangles_data:
# 转义rect_wkt中的单引号
rect_wkt_escaped = rect_wkt.replace("'", "''")
values_clauses.append(f"({rect_id}, '{rect_wkt_escaped}', {area}, {x1}, {y1}, {x2}, {y2})")
# 生成完整的INSERT语句
insert_sql = f"INSERT INTO rectangles VALUES {', '.join(values_clauses)}"
# 执行单个INSERT语句
conn.execute(insert_sql)
# 使用单个SQL查询找出最大面积且完全在多边形内的矩形
query = """
WITH valid_rectangles AS (
SELECT
r.rect_id,
r.rect_wkt,
r.area,
r.x1, r.y1, r.x2, r.y2,
ST_GeomFromText(r.rect_wkt) as rect_geom
FROM rectangles r
WHERE ST_Contains(
(SELECT geom FROM polygons WHERE id = 1),
ST_GeomFromText(r.rect_wkt)
) = TRUE
)
SELECT
rect_id,
area,
x1, y1, x2, y2,
rect_wkt
FROM valid_rectangles
WHERE area = (SELECT MAX(area) FROM valid_rectangles)
ORDER BY area DESC, rect_id
"""
query2 = """
WITH valid_rectangles AS (
SELECT
r.rect_id,
r.rect_wkt,
r.area,
r.x1, r.y1, r.x2, r.y2,
ST_GeomFromText(r.rect_wkt) as rect_geom
FROM rectangles r,polygons
WHERE id = 1 and ST_Contains(
geom ,
ST_GeomFromText(r.rect_wkt)
)
)
SELECT
rect_id,
area,
x1, y1, x2, y2,
rect_wkt
FROM valid_rectangles
WHERE area = (SELECT MAX(area) FROM valid_rectangles)
ORDER BY area DESC, rect_id
"""
result = conn.execute(query).fetchall()
if result:
print(f"\n最大矩形面积: {result[0][1]}")
print("满足条件的最大矩形:")
print("-" * 60)
for row in result:
rect_id, area, x1, y1, x2, y2, rect_wkt = row
print(f"矩形ID: {rect_id}")
print(f"面积: {area}")
print(f"对角顶点: ({x1},{y1}) - ({x2},{y2})")
print(f"矩形WKT: {rect_wkt[:80]}..." if len(rect_wkt) > 80 else f"矩形WKT: {rect_wkt}")
print()
print(f"共找到 {len(result)} 个最大矩形(面积相同)")
else:
print("没有找到完全在多边形内的矩形")
else:
print("没有生成任何矩形")
# 关闭连接
conn.close()
if __name__ == "__main__":
main()
用explain analyze 查询执行计划可见,相关子查询是BLOCKWISE_NL_JOIN,1秒。表关联是EXTENSION,3秒。
就差在这里。
参考
不得不说,DeepSeek的工程能力还是很强,语焉不详的需求结合他已有的知识,就能输出效率不错的程序,甚至连数据库特定的高效写法也掌握了。
补记,张泽鹏先生用shapely计算几何库实现的程序
import sys
from shapely import Polygon
points = [[int(index) for index in point.strip().split(',')] for point in sys.stdin]
polygon = Polygon(points)
largest = 0
for index, (x1, y1) in enumerate(points[:-1]):
for x2, y2 in points[index + 1:]:
rectangle = Polygon([[x1, y1], [x1, y2], [x2, y2], [x2, y1]])
area = (abs(x1 - x2) + 1) * (abs(y1 - y2) + 1)
if largest < area and polygon.contains(rectangle):
largest = area
print(largest)
安装库
python3 pip.pyz install shapely --break-system-packages -i https://pypi.tuna.tsinghua.edu.cn/simple
执行
time cat 2509-input.txt |python3 2509shapely.txt
1513792010
real 0m9.484s
user 0m9.872s
sys 0m0.016s

989

被折叠的 条评论
为什么被折叠?



