利用DeepSeek辅助DuckDB spatial插件求解Advent of Code 2025第9题 最大矩形面积 第2部分

编程达人挑战赛·第5期 10w+人浏览 402人参与

原题地址
首先给出关键函数,一个多边形包含另一个多边形:

SELECT ST_Contains(ST_Polygonize([ ST_GeomFromText('LINESTRING(0 0, 0 10, 10 10, 10 0, 0 0)')]),ST_Polygonize([ ST_GeomFromText('LINESTRING(0 0, 0 1, 1 1, 1 0, 0 0)')]));

true 

思路是把每个可能的矩形与整个多边形比较,看后者是否包含前者,如果是则计算面积,最后排序,因为对spatial插件不熟悉,让DeepSeek辅助实现,

利用DuckDB spatial插件编写SQL求解多边形内最大矩形面积 ,把每个可能的矩形与整个多边形比较,看后者是否包含前者,如果是则计算面积,最后排序,关键判断函数用法是ST_Contains(ST_Polygonize([ ST_GeomFromText(‘LINESTRING(0 0, 0 10, 10 10, 10 0, 0 0)’)]),ST_Polygonize([ ST_GeomFromText(‘LINESTRING(0 0, 0 1, 1 1, 1 0, 0 0)’)],先从外部文件读入逐行坐标,形成点,将多边形提前保存好,矩形根据对角顶点算出。

他写了调用以上插件的python程序

import duckdb
from itertools import combinations

def create_polygon_wkt(points):
    """将点列表转换为多边形WKT格式"""
    # 闭合多边形:将第一个点重复添加到末尾
    wkt_points = [f"{x} {y}" for x, y in points]
    wkt_points.append(wkt_points[0])  # 闭合多边形
    return f"POLYGON(({', '.join(wkt_points)}))"

def create_rectangle_wkt(x1, y1, x2, y2):
    """从对角顶点创建矩形WKT"""
    # 确保矩形是轴对齐的
    min_x = min(x1, x2)
    max_x = max(x1, x2)
    min_y = min(y1, y2)
    max_y = max(y1, y2)
    
    # 矩形顶点(顺时针或逆时针顺序)
    points = [
        (min_x, min_y),
        (max_x, min_y),
        (max_x, max_y),
        (min_x, max_y),
        (min_x, min_y)  # 闭合
    ]
    
    wkt_points = [f"{x} {y}" for x, y in points]
    return f"POLYGON(({', '.join(wkt_points)}))"

def main():
    # 读取多边形顶点坐标
    points = []
    with open('2509-input.txt', 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                x, y = map(int, line.split(','))
                points.append((x, y))
    
    if len(points) < 3:
        print("多边形至少需要3个顶点")
        return
    
    # 创建DuckDB连接并加载spatial扩展
    conn = duckdb.connect()
    #conn.execute("INSTALL spatial;")
    conn.execute("LOAD spatial;")
    
    # 创建多边形WKT
    polygon_wkt = create_polygon_wkt(points)
    print(f"多边形WKT: {polygon_wkt}")
    
    # 将多边形注册为表
    conn.execute(f"""
        CREATE TEMP TABLE polygons AS 
        SELECT 1 as id, ST_GeomFromText('{polygon_wkt}') as geom
    """)
    
    # 获取所有顶点对作为可能的矩形对角顶点
    n = len(points)
    rectangles = []
    
    for i in range(n):
        for j in range(i + 1, n):
            x1, y1 = points[i]
            x2, y2 = points[j]
            
            # 排除x或y坐标相同的点对(不能形成矩形)
            if x1 == x2 or y1 == y2:
                continue
            
            # 计算矩形面积
            min_x = min(x1, x2)
            max_x = max(x1, x2)
            min_y = min(y1, y2)
            max_y = max(y1, y2)
            area = (max_x - min_x+1) * (max_y - min_y+1)
            
            # 创建矩形WKT
            rect_wkt = create_rectangle_wkt(x1, y1, x2, y2)
            
            rectangles.append({
                'id': len(rectangles) + 1,
                'rect_wkt': rect_wkt,
                'vertex1': (x1, y1),
                'vertex2': (x2, y2),
                'area': area
            })
    
    print(f"生成了 {len(rectangles)} 个可能的矩形")
    
    # 批量检查矩形是否在多边形内
    valid_rectangles = []
    
    for rect in rectangles:
        # 使用ST_Contains检查矩形是否在多边形内
        query = f"""
        SELECT 
            ST_Contains(
                (SELECT geom FROM polygons WHERE id = 1),
                ST_GeomFromText('{rect['rect_wkt']}')
            ) as is_contained
        """
        
        result = conn.execute(query).fetchone()
        
        if result and result[0]:  # 矩形完全在多边形内
            valid_rectangles.append(rect)
    
    print(f"找到 {len(valid_rectangles)} 个完全在多边形内的矩形")
    
    # 按面积排序
    if valid_rectangles:
        valid_rectangles.sort(key=lambda x: x['area'], reverse=True)
        
        print("\n最大的矩形(按面积排序):")
        print("-" * 60)
        
        for idx, rect in enumerate(valid_rectangles[:10], 1):  # 显示前10个
            print(f"{idx}. 面积: {rect['area']}")
            print(f"   对角顶点: {rect['vertex1']} - {rect['vertex2']}")
            print(f"   矩形WKT: {rect['rect_wkt']}")
            print()
        
        print(f"\n最大矩形面积: {valid_rectangles[0]['area']}")
        print(f"对角顶点: {valid_rectangles[0]['vertex1']}{valid_rectangles[0]['vertex2']}")
    else:
        print("没有找到完全在多边形内的矩形")
    
    # 关闭连接
    conn.close()

if __name__ == "__main__":
    main()

安装插件

D load httpfs;
D install spatial;
D load spatial;

安装python DuckDB 模块

python3 pip.pyz install duckdb -U --break-system-packages -i https://pypi.tuna.tsinghua.edu.cn/simple

因为要拼接字符串,所以单个矩形执行一次查询语句,导致执行效率不太高,但也2分半钟完成了计算。

   for rect in rectangles:
        # 使用ST_Contains检查矩形是否在多边形内
        query = f"""
        SELECT 
            ST_Contains(
                (SELECT geom FROM polygons WHERE id = 1),
                ST_GeomFromText('{rect['rect_wkt']}')
            ) as is_contained
        """
time python3 2509duckdb.txt > 2509duckdb-res.txt

real	2m5.490s
user	2m34.732s
sys	0m12.180s

又让他把矩形也提前存好,一次性查询出结果,仍要1分40秒,经过排查,确定性能问题出在executemany插入语句,其实还是逐条插入,

把conn.executemany(“”"
INSERT INTO rectangles VALUES (?, ?, ?, ?, ?, ?, ?)
“”", rectangles_data)改写为insert into t value(1,4),(2,3),(7,1)这种insert格式,用execute执行.

修改以后,缩短到13秒,看到查询用了标量子查询,改成表关联,反而慢了,要18秒

import duckdb
from itertools import combinations

def create_polygon_wkt(points):
    """将点列表转换为多边形WKT格式"""
    # 闭合多边形:将第一个点重复添加到末尾
    wkt_points = [f"{x} {y}" for x, y in points]
    wkt_points.append(wkt_points[0])  # 闭合多边形
    return f"POLYGON(({', '.join(wkt_points)}))"

def create_rectangle_wkt(x1, y1, x2, y2):
    """从对角顶点创建矩形WKT"""
    # 确保矩形是轴对齐的
    min_x = min(x1, x2)
    max_x = max(x1, x2)
    min_y = min(y1, y2)
    max_y = max(y1, y2)
    
    # 矩形顶点(顺时针或逆时针顺序)
    points = [
        (min_x, min_y),
        (max_x, min_y),
        (max_x, max_y),
        (min_x, max_y),
        (min_x, min_y)  # 闭合
    ]
    
    wkt_points = [f"{x} {y}" for x, y in points]
    return f"POLYGON(({', '.join(wkt_points)}))"

def main():
    # 读取多边形顶点坐标
    points = []
    with open('2509-input.txt', 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                x, y = map(int, line.split(','))
                points.append((x, y))
    
    if len(points) < 3:
        print("多边形至少需要3个顶点")
        return
    
    # 创建DuckDB连接并加载spatial扩展
    conn = duckdb.connect("aoc2509.db")
    conn.execute("INSTALL spatial;")
    conn.execute("LOAD spatial;")
    
    # 创建多边形WKT并注册为表
    polygon_wkt = create_polygon_wkt(points)
    #print(f"多边形WKT: {polygon_wkt}")
    
    # 创建多边形表
    conn.execute(f"""
        CREATE /*TEMP*/ TABLE polygons AS 
        SELECT 1 as id, ST_GeomFromText('{polygon_wkt}') as geom
    """)
    
    # 生成所有可能的矩形并批量插入到临时表
    n = len(points)
    rectangles_data = []
    
    for i in range(n):
        for j in range(i + 1, n):
            x1, y1 = points[i]
            x2, y2 = points[j]
            
            # 排除x或y坐标相同的点对(不能形成矩形)
            if x1 == x2 or y1 == y2:
                continue
            
            # 计算矩形面积
            min_x = min(x1, x2)
            max_x = max(x1, x2)
            min_y = min(y1, y2)
            max_y = max(y1, y2)
            area = (max_x - min_x+1) * (max_y - min_y+1)
            
            # 创建矩形WKT
            rect_wkt = create_rectangle_wkt(x1, y1, x2, y2)
            
            rectangles_data.append((len(rectangles_data) + 1, rect_wkt, area, x1, y1, x2, y2))
    
    print(f"生成了 {len(rectangles_data)} 个可能的矩形")
    
    # 批量插入矩形数据
    if rectangles_data:
        # 创建矩形表
        conn.execute("""
            CREATE /*TEMP*/ TABLE rectangles (
                rect_id INTEGER,
                rect_wkt TEXT,
                area BIGINT,
                x1 INTEGER,
                y1 INTEGER,
                x2 INTEGER,
                y2 INTEGER
            )
        """)
        
        # 批量插入数据
        #conn.executemany("""
        #    INSERT INTO rectangles VALUES (?, ?, ?, ?, ?, ?, ?)
        #""", rectangles_data)

        # 构建批量插入的VALUES子句
        values_clauses = []
        for rect_id, rect_wkt, area, x1, y1, x2, y2 in rectangles_data:
            # 转义rect_wkt中的单引号
            rect_wkt_escaped = rect_wkt.replace("'", "''")
            values_clauses.append(f"({rect_id}, '{rect_wkt_escaped}', {area}, {x1}, {y1}, {x2}, {y2})")
    
        # 生成完整的INSERT语句
        insert_sql = f"INSERT INTO rectangles VALUES {', '.join(values_clauses)}"
    
        # 执行单个INSERT语句
        conn.execute(insert_sql)        
        # 使用单个SQL查询找出最大面积且完全在多边形内的矩形
        query = """
        WITH valid_rectangles AS (
            SELECT 
                r.rect_id,
                r.rect_wkt,
                r.area,
                r.x1, r.y1, r.x2, r.y2,
                ST_GeomFromText(r.rect_wkt) as rect_geom
            FROM rectangles r
            WHERE ST_Contains(
                (SELECT geom FROM polygons WHERE id = 1),
                ST_GeomFromText(r.rect_wkt)
            ) = TRUE
        )
        SELECT 
            rect_id,
            area,
            x1, y1, x2, y2,
            rect_wkt
        FROM valid_rectangles
        WHERE area = (SELECT MAX(area) FROM valid_rectangles)
        ORDER BY area DESC, rect_id
        """

        query2 = """
        WITH valid_rectangles AS (
            SELECT 
                r.rect_id,
                r.rect_wkt,
                r.area,
                r.x1, r.y1, r.x2, r.y2,
                ST_GeomFromText(r.rect_wkt) as rect_geom
            FROM rectangles r,polygons
            WHERE id = 1 and ST_Contains(
                geom ,
                ST_GeomFromText(r.rect_wkt)
            ) 
        )
        SELECT 
            rect_id,
            area,
            x1, y1, x2, y2,
            rect_wkt
        FROM valid_rectangles
        WHERE area = (SELECT MAX(area) FROM valid_rectangles)
        ORDER BY area DESC, rect_id
        """        
        result = conn.execute(query).fetchall()
        
        if result:
            print(f"\n最大矩形面积: {result[0][1]}")
            print("满足条件的最大矩形:")
            print("-" * 60)
            
            for row in result:
                rect_id, area, x1, y1, x2, y2, rect_wkt = row
                print(f"矩形ID: {rect_id}")
                print(f"面积: {area}")
                print(f"对角顶点: ({x1},{y1}) - ({x2},{y2})")
                print(f"矩形WKT: {rect_wkt[:80]}..." if len(rect_wkt) > 80 else f"矩形WKT: {rect_wkt}")
                print()
            
            print(f"共找到 {len(result)} 个最大矩形(面积相同)")
        else:
            print("没有找到完全在多边形内的矩形")
    else:
        print("没有生成任何矩形")
    
    # 关闭连接
    conn.close()

if __name__ == "__main__":
    main()

用explain analyze 查询执行计划可见,相关子查询是BLOCKWISE_NL_JOIN,1秒。表关联是EXTENSION,3秒。
就差在这里。
参考
不得不说,DeepSeek的工程能力还是很强,语焉不详的需求结合他已有的知识,就能输出效率不错的程序,甚至连数据库特定的高效写法也掌握了。

补记,张泽鹏先生用shapely计算几何库实现的程序

import sys
from shapely import Polygon

points = [[int(index) for index in point.strip().split(',')] for point in sys.stdin]
polygon = Polygon(points)

largest = 0
for index, (x1, y1) in enumerate(points[:-1]):
    for x2, y2 in points[index + 1:]:
        rectangle = Polygon([[x1, y1], [x1, y2], [x2, y2], [x2, y1]])
        area = (abs(x1 - x2) + 1) * (abs(y1 - y2) + 1)
        if largest < area and polygon.contains(rectangle):
            largest = area

print(largest)

安装库

python3 pip.pyz install shapely --break-system-packages -i https://pypi.tuna.tsinghua.edu.cn/simple

执行

time cat 2509-input.txt |python3 2509shapely.txt
1513792010

real	0m9.484s
user	0m9.872s
sys	0m0.016s
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值