DeepSeek辅助编写转换DuckDB explain_analyze_json 格式执行计划到postgresql_plan 的程序

最新推荐文章于 2025-11-29 03:46:46 发布

原创最新推荐文章于 2025-11-29 03:46:46 发布 · 349 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#json #postgresql #数据库 #duckdb #执行计划 #DeepSeek #人工智能

编程语言软件工程同时被 2 个专栏收录

263 篇文章

订阅专栏

数据库

174 篇文章

订阅专栏

explain analyze的json格式执行计划的标签命名与explain 的json计划不同，我让DeepSeek单独为它编写一个转换到postgresql格式的程序，输出第一层的children的children下所有信息，其余功能不变。
他给出的程序如下：

import json
import sys

def explain_analyze_json_to_postgresql_plan(json_str):
    """
    将 EXPLAIN ANALYZE 的 JSON 格式转换为 PostgreSQL 格式
    """
    try:
        plan_data = json.loads(json_str)
    except json.JSONDecodeError as e:
        return f"错误：JSON格式不正确 - {e}"
    
    # 递归处理执行计划节点
    def process_node(node, level=0):
        indent = " " * (level * 2)
        result = []
        
        # 使用 operator_name 或 operator_type 作为节点名称
        node_name = node.get("operator_name") or node.get("operator_type", "UNKNOWN")
        
        # 构建节点描述
        node_desc = f"{indent}{node_name}"
        
        # 添加额外信息
        info_parts = []
        
        # 处理时间信息
        if "operator_timing" in node and node["operator_timing"] > 0:
            timing = node["operator_timing"]
            info_parts.append(f"time={timing:.6f}")
        
        # 处理行数信息
        if "operator_cardinality" in node:
            cardinality = node["operator_cardinality"]
            info_parts.append(f"rows={cardinality}")
        
        # 处理投影信息
        extra_info = node.get("extra_info", {})
        if "Projections" in extra_info:
            projections = extra_info["Projections"]
            info_parts.append(f"Projections: {projections}")
        
        # 处理聚合信息
        if "Aggregates" in extra_info and extra_info["Aggregates"]:
            info_parts.append(f"Aggregates: {extra_info['Aggregates']}")
        
        # 处理预估基数
        if "Estimated Cardinality" in extra_info:
            estimated = extra_info["Estimated Cardinality"]
            info_parts.append(f"Estimated: {estimated}")
        
        # 处理扫描行数
        if "operator_rows_scanned" in node and node["operator_rows_scanned"] > 0:
            scanned = node["operator_rows_scanned"]
            info_parts.append(f"scanned={scanned}")
        
        # 如果有额外信息，添加到节点描述中
        if info_parts:
            node_desc += f"  ({', '.join(info_parts)})"
        
        result.append(node_desc)
        
        # 递归处理子节点
        for child in node.get("children", []):
            result.extend(process_node(child, level + 1))
        
        return result
    
    # 生成完整的执行计划
    plan_lines = ["QUERY PLAN", "-" * 50]
    
    # 获取第一层的 children 的 children（跳过 EXPLAIN_ANALYZE 外层）
    if "children" in plan_data and len(plan_data["children"]) > 0:
        explain_analyze_node = plan_data["children"][0]
        if "children" in explain_analyze_node and len(explain_analyze_node["children"]) > 0:
            # 处理所有内层节点
            for inner_node in explain_analyze_node["children"]:
                plan_lines.extend(process_node(inner_node))
        else:
            plan_lines.append("No execution plan found in the JSON data")
    else:
        plan_lines.append("Invalid JSON structure for EXPLAIN ANALYZE")
    
    return "\n".join(plan_lines)

def convert_explain_analyze_json_file(json_file_path, output_file_path=None):
    """
    从外部 EXPLAIN ANALYZE JSON 文件读取执行计划并转换为 PostgreSQL 格式
    
    Args:
        json_file_path (str): JSON文件路径
        output_file_path (str, optional): 输出文件路径
        
    Returns:
        str: PostgreSQL格式的执行计划
    """
    try:
        # 读取JSON文件
        with open(json_file_path, 'r', encoding='utf-8') as file:
            json_content = file.read()
        
        # 转换为PostgreSQL格式
        postgresql_plan = explain_analyze_json_to_postgresql_plan(json_content)
        
        # 输出到控制台
        if output_file_path:
            # 保存到文件
            with open(output_file_path, 'w', encoding='utf-8') as file:
                file.write(postgresql_plan)
            print(f"执行计划已保存到: {output_file_path}")
        else:
            # 输出到屏幕
            print(postgresql_plan)
        
        return postgresql_plan
        
    except FileNotFoundError:
        error_msg = f"错误：找不到文件 {json_file_path}"
        print(error_msg)
        return error_msg
    except Exception as e:
        error_msg = f"错误：处理文件时发生异常 - {e}"
        print(error_msg)
        return error_msg

# 修改后的main部分
if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("使用方法: python script.py <explain_analyze_json文件路径> [pg计划输出文件路径]")
        sys.exit(1)
    
    json_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    
    convert_explain_analyze_json_file(json_file, output_file)

使用步骤
1.生成explain_analyze计划，注意生成json格式时，analyze要写在括号中

.mode list
.output planb.json
explain (analyze, format json) select count(*) from (values(1),(2))t(a);
.output
.exit

生成的json文件没有标题，如下所示：

{
    "query_name": "",
    "blocked_thread_time": 0.0,
    "system_peak_buffer_memory": 0,
    "system_peak_temp_dir_size": 0,
    "cpu_time": 0.0,
    "extra_info": {},
    "cumulative_cardinality": 0,
    "cumulative_rows_scanned": 0,
    "result_set_size": 0,
    "latency": 0.0,
    "rows_returned": 0,
    "total_bytes_read": 0,
    "total_bytes_written": 0,
    "children": [
        {
            "cpu_time": 0.0,
            "extra_info": {},
            "cumulative_cardinality": 0,
            "operator_name": "EXPLAIN_ANALYZE",
            "operator_type": "EXPLAIN_ANALYZE",
            "operator_cardinality": 0,
            "cumulative_rows_scanned": 0,
            "operator_rows_scanned": 0,
            "operator_timing": 0.0,
            "result_set_size": 0,
            "total_bytes_read": 0,
            "total_bytes_written": 0,
            "children": [
                {
                    "cpu_time": 0.0,
                    "extra_info": {
                        "Aggregates": "count_star()"
                    },
                    "cumulative_cardinality": 0,
                    "operator_name": "UNGROUPED_AGGREGATE",
                    "operator_type": "UNGROUPED_AGGREGATE",
                    "operator_cardinality": 1,
                    "cumulative_rows_scanned": 0,
                    "operator_rows_scanned": 0,
                    "operator_timing": 0.0000059,
                    "result_set_size": 8,
                    "total_bytes_read": 0,
                    "total_bytes_written": 0,
                    "children": [
                        {
                            "cpu_time": 0.0,
                            "extra_info": {
                                "Projections": "42",
                                "Estimated Cardinality": "2"
                            },
                            "cumulative_cardinality": 0,
                            "operator_name": "PROJECTION",
                            "operator_type": "PROJECTION",
                            "operator_cardinality": 2,
                            "cumulative_rows_scanned": 0,
                            "operator_rows_scanned": 0,
                            "operator_timing": 6e-7,
                            "result_set_size": 8,
                            "total_bytes_read": 0,
                            "total_bytes_written": 0,
                            "children": [
                                {
                                    "cpu_time": 0.0,
                                    "extra_info": {
                                        "Estimated Cardinality": "2"
                                    },
                                    "cumulative_cardinality": 0,
                                    "operator_name": "COLUMN_DATA_SCAN",
                                    "operator_type": "COLUMN_DATA_SCAN",
                                    "operator_cardinality": 2,
                                    "cumulative_rows_scanned": 0,
                                    "operator_rows_scanned": 0,
                                    "operator_timing": 0.0000024,
                                    "result_set_size": 8,
                                    "total_bytes_read": 0,
                                    "total_bytes_written": 0,
                                    "children": []
                                }
                            ]
                        }
                    ]
                }
            ]
        }
    ]
}

2.转换

C:\d>python convplana.py planb.json planbpg.txt
执行计划已保存到: planbpg.txt

保存的计划内容如下

QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE  (time=0.000006, rows=1, Aggregates: count_star())
  PROJECTION  (time=0.000001, rows=2, Projections: 42, Estimated: 2)
    COLUMN_DATA_SCAN  (time=0.000002, rows=2, Estimated: 2)

将其与如下explain计划比较,

QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE  (Aggregates: count_star())
  PROJECTION  (Projections: 4, 2, Estimated Rows: 2)
    COLUMN_DATA_SCAN  (Estimated Rows: 2)

可见explain_analyze计划提供了实际执行时间和实际读取行数，更准确。略作修改还能提取到更多json中的信息。