DeepSeek辅助编写转换DuckDB json格式执行计划到PostgreSQL格式的Python程序-优快云博客

DuckDB执行计划默认是方块形式，复杂的计划很难辨认，它还支持json格式，不过对人类同样不太可读，所以让DeepSeek照着PostgreSQL格式的执行计划编写了一个转换程序，代码如下:

import json

def json_to_postgresql_plan(json_str):
    # 解析JSON
    plan_data = json.loads(json_str)
    
    # 递归处理执行计划节点
    def process_node(node, level=0):
        indent = " " * (level * 2)
        result = []
        
        node_name = node["name"]
        extra_info = node.get("extra_info", {})
        
        # 构建节点描述
        node_desc = f"{indent}{node_name}"
        
        # 添加额外信息
        info_parts = []
        
        # 处理投影信息
        if "Projections" in extra_info:
            projections = extra_info["Projections"]
            if projections:
                info_parts.append(f"Projections: {', '.join(projections)}")
        
        # 处理分组信息
        if "Groups" in extra_info:
            groups = extra_info["Groups"]
            if groups:
                if isinstance(groups, list):
                    info_parts.append(f"Group Key: {', '.join(groups)}")
                else:
                    info_parts.append(f"Group Key: {groups}")
        
        # 处理聚合信息
        if "Aggregates" in extra_info and extra_info["Aggregates"]:
            info_parts.append(f"Aggregates: {extra_info['Aggregates']}")
        
        # 处理预估基数
        if "Estimated Cardinality" in extra_info:
            cardinality = extra_info["Estimated Cardinality"]
            info_parts.append(f"Estimated Rows: {cardinality}")
        
        # 如果有额外信息，添加到节点描述中
        if info_parts:
            node_desc += f"  ({', '.join(info_parts)})"
        
        result.append(node_desc)
        
        # 递归处理子节点
        for child in node.get("children", []):
            result.extend(process_node(child, level + 1))
        
        return result
    
    # 生成完整的执行计划
    plan_lines = ["QUERY PLAN", "-" * 50]
    for node in plan_data:
        plan_lines.extend(process_node(node))
    
    return "\n".join(plan_lines)

def convert_json_file_to_plan(json_file_path):
    """
    从外部JSON文件读取执行计划并转换为PostgreSQL格式
    
    Args:
        json_file_path (str): JSON文件路径
        
    Returns:
        str: PostgreSQL格式的执行计划
    """
    try:
        # 读取JSON文件
        with open(json_file_path, 'r', encoding='utf-8') as file:
            json_content = file.read()
        
        # 转换为PostgreSQL格式
        postgresql_plan = json_to_postgresql_plan(json_content)
        return postgresql_plan
        
    except FileNotFoundError:
        return f"错误：找不到文件 {json_file_path}"
    except json.JSONDecodeError as e:
        return f"错误：JSON格式不正确 - {e}"
    except Exception as e:
        return f"错误：处理文件时发生异常 - {e}"

def convert_json_file_to_plan_and_save(json_file_path, output_file_path=None):
    """
    从外部JSON文件读取执行计划，转换为PostgreSQL格式，并可选择保存到文件
    
    Args:
        json_file_path (str): 输入的JSON文件路径
        output_file_path (str, optional): 输出的文本文件路径
        
    Returns:
        str: PostgreSQL格式的执行计划
    """
    postgresql_plan = convert_json_file_to_plan(json_file_path)
    
    # 输出到控制台
    # print(postgresql_plan)
    
    # 如果指定了输出文件，则保存到文件
    if output_file_path:
        try:
            with open(output_file_path, 'w', encoding='utf-8') as file:
                file.write(postgresql_plan)
            print(f"\n执行计划已保存到: {output_file_path}")
        except Exception as e:
            print(f"保存文件时出错: {e}")
    
    return postgresql_plan

# 使用示例
if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1:
        json_file = sys.argv[1]
        output_file = sys.argv[2] if len(sys.argv) > 2 else None
        
        if output_file:
            convert_json_file_to_plan_and_save(json_file, output_file)
        else:
            plan = convert_json_file_to_plan(json_file)
            print(plan)
    else:
        print("使用方法: python script.py <json文件路径> [输出文件路径]")

使用步骤
1.在DuckDB中生成json格式执行计划

.mode list
.output plan.json
.explain (format json) select count(*) from (values(1),(2))t(a);
.output
.exit

然后手工编辑plan.json文件，去掉开头的非json格式部分。例如

[
    {
        "name": "UNGROUPED_AGGREGATE",
        "children": [
            {
                "name": "PROJECTION",
                "children": [
                    {
                        "name": "COLUMN_DATA_SCAN",
                        "children": [],
                        "extra_info": {
                            "Estimated Cardinality": "2"
                        }
                    }
                ],
                "extra_info": {
                    "Projections": "42",
                    "Estimated Cardinality": "2"
                }
            }
        ],
        "extra_info": {
            "Aggregates": "count_star()"
        }
    }
]

2.使用python执行程序

python convplan.py plan.json planpg.txt

输出的planpg.txt如下

QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE  (Aggregates: count_star())
  PROJECTION  (Projections: 4, 2, Estimated Rows: 2)
    COLUMN_DATA_SCAN  (Estimated Rows: 2)

这样就容易阅读多了。左侧是操作名称，括号中是补充信息。而执行过程是从缩进较深的操作逐步到缩进较浅的操作。