DROP TABLE IF EXISTS t_presto_query_event;
CREATE TABLE "presto"."t_presto_query_event" (
"id" int8 NOT NULL DEFAULT nextval('t_presto_query_event_id_seq'::regclass),
"query_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL,
"transaction_id" varchar(255) COLLATE "pg_catalog"."default",
"query" text COLLATE "pg_catalog"."default",
"query_state" varchar(255) COLLATE "pg_catalog"."default",
"query_type" varchar(255) COLLATE "pg_catalog"."default",
"uri" varchar(255) COLLATE "pg_catalog"."default",
"user" varchar(255) COLLATE "pg_catalog"."default",
"principal" varchar(255) COLLATE "pg_catalog"."default",
"remote_client_address" varchar(255) COLLATE "pg_catalog"."default",
"user_agent" varchar(255) COLLATE "pg_catalog"."default",
"client_info" varchar(255) COLLATE "pg_catalog"."default",
"client_tags" varchar(255) COLLATE "pg_catalog"."default",
"source" varchar(255) COLLATE "pg_catalog"."default",
"catalog" varchar(255) COLLATE "pg_catalog"."default",
"schema" varchar(255) COLLATE "pg_catalog"."default",
"resource_group" varchar(255) COLLATE "pg_catalog"."default",
"session_properties" varchar(255) COLLATE "pg_catalog"."default",
"server_address" varchar(255) COLLATE "pg_catalog"."default",
"server_version" varchar(255) COLLATE "pg_catalog"."default",
"environment" varchar(255) COLLATE "pg_catalog"."default",
"author" varchar(255) COLLATE "pg_catalog"."default",
"department" varchar(255) COLLATE "pg_catalog"."default",
"job_id" varchar(255) COLLATE "pg_catalog"."default",
"cpu_time" int8,
"retried_cpu_time" int8,
"wall_time" int8,
"queued_time" int8,
"analysis_time" int8,
"peak_running_tasks" int4,
"peak_user_memory_bytes" int8,
"peak_total_non_revocable_memory_bytes" int8,
"peak_task_user_memory" int8,
"peak_task_total_memory" int8,
"peak_node_total_memory" int8,
"total_bytes" int8,
"total_rows" int8,
"output_bytes" int8,
"output_rows" int8,
"written_output_bytes" int8,
"written_output_rows" int8,
"written_intermediate_bytes" int8,
"spilled_bytes" int8,
"cumulative_memory" numeric(38,2),
"completed_splits" int4,
"complete" bool,
"output_catalog" varchar(255) COLLATE "pg_catalog"."default",
"output_schema" varchar(255) COLLATE "pg_catalog"."default",
"output_table" varchar(255) COLLATE "pg_catalog"."default",
"output_connectorOutputMetadata" text COLLATE "pg_catalog"."default",
"output_jsonLengthLimitExceeded" bool,
"error_name" varchar(255) COLLATE "pg_catalog"."default",
"error_type" varchar(255) COLLATE "pg_catalog"."default",
"error_stack" text COLLATE "pg_catalog"."default",
"failure_type" varchar(255) COLLATE "pg_catalog"."default",
"failure_message" text COLLATE "pg_catalog"."default",
"failure_task" varchar(255) COLLATE "pg_catalog"."default",
"failure_host" varchar(255) COLLATE "pg_catalog"."default",
"failed_tasks" text COLLATE "pg_catalog"."default",
"warning" text COLLATE "pg_catalog"."default",
"submit_time" timestamp(6) NOT NULL,
"execution_start_time" timestamp(6) NOT NULL,
"end_time" timestamp(6) NOT NULL,
"execution_time" int8,
"scheduled_time" int8,
"create_time" timestamp(6) NOT NULL DEFAULT now(),
"update_time" timestamp(6) NOT NULL DEFAULT now(),
CONSTRAINT "t_presto_query_event_pkey" PRIMARY KEY ("query_id")
)
;
ALTER TABLE "presto"."t_presto_query_event"
OWNER TO "presto";
COMMENT ON COLUMN "presto"."t_presto_query_event"."id" IS '自增主键';
COMMENT ON COLUMN "presto"."t_presto_query_event"."query_id" IS '查询ID';
COMMENT ON COLUMN "presto"."t_presto_query_event"."transaction_id" IS '事务ID';
COMMENT ON COLUMN "presto"."t_presto_query_event"."query" IS '查询sql';
COMMENT ON COLUMN "presto"."t_presto_query_event"."query_state" IS '查询状态:FINISHED,FAILED';
COMMENT ON COLUMN "presto"."t_presto_query_event"."query_type" IS '查询类型:DATA_DEFINITION,DELETE,DESCRIBE,EXPLAIN,INSERT,SELECT';
COMMENT ON COLUMN "presto"."t_presto_query_event"."uri" IS '查询URL';
COMMENT ON COLUMN "presto"."t_presto_query_event"."remote_client_address" IS '客户端地址';
COMMENT ON COLUMN "presto"."t_presto_query_event"."user_agent" IS '用户代理';
COMMENT ON COLUMN "presto"."t_presto_query_event"."client_info" IS '客户端信息';
COMMENT ON COLUMN "presto"."t_presto_query_event"."client_tags" IS '客户端标签';
COMMENT ON COLUMN "presto"."t_presto_query_event"."source" IS '查询提交来源';
COMMENT ON COLUMN "presto"."t_presto_query_event"."catalog" IS '当前catalog';
COMMENT ON COLUMN "presto"."t_presto_query_event"."schema" IS '当前schema';
COMMENT ON COLUMN "presto"."t_presto_query_event"."resource_group" IS '资源组';
COMMENT ON COLUMN "presto"."t_presto_query_event"."session_properties" IS '会话属性';
COMMENT ON COLUMN "presto"."t_presto_query_event"."server_address" IS '服务端地址';
COMMENT ON COLUMN "presto"."t_presto_query_event"."server_version" IS 'presto 集群版本';
COMMENT ON COLUMN "presto"."t_presto_query_event"."environment" IS '环境';
COMMENT ON COLUMN "presto"."t_presto_query_event"."author" IS '查询提交人';
COMMENT ON COLUMN "presto"."t_presto_query_event"."department" IS '查询提交人所属部门';
COMMENT ON COLUMN "presto"."t_presto_query_event"."job_id" IS '定时任务ID';
COMMENT ON COLUMN "presto"."t_presto_query_event"."written_output_bytes" IS '写出数据的总大小';
COMMENT ON COLUMN "presto"."t_presto_query_event"."written_output_rows" IS '写出数据的总行数';
COMMENT ON COLUMN "presto"."t_presto_query_event"."cumulative_memory" IS '查询累计内存';
COMMENT ON COLUMN "presto"."t_presto_query_event"."completed_splits" IS '已完成的split数量';
COMMENT ON COLUMN "presto"."t_presto_query_event"."complete" IS '查询是否完成';
COMMENT ON COLUMN "presto"."t_presto_query_event"."output_catalog" IS '查询输出的catalog';
COMMENT ON COLUMN "presto"."t_presto_query_event"."output_schema" IS '查询输出的schema';
COMMENT ON COLUMN "presto"."t_presto_query_event"."output_table" IS '查询输出的表';
COMMENT ON COLUMN "presto"."t_presto_query_event"."error_name" IS '异常名称';
COMMENT ON COLUMN "presto"."t_presto_query_event"."error_type" IS '异常类型';
COMMENT ON COLUMN "presto"."t_presto_query_event"."error_stack" IS '异常栈信息';
COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_type" IS '查询失败类型';
COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_message" IS '异常的message';
COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_task" IS '失败的任务';
COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_host" IS '失败的host';
COMMENT ON COLUMN "presto"."t_presto_query_event"."failed_tasks" IS '失败的任务';
COMMENT ON COLUMN "presto"."t_presto_query_event"."warning" IS '警告信息(通常为空)';
COMMENT ON COLUMN "presto"."t_presto_query_event"."submit_time" IS '查询提交的时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."execution_start_time" IS '查询开始执行时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."end_time" IS '查询完成时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."execution_time" IS '执行消耗时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."scheduled_time" IS '调度时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."create_time" IS '创建时间';
COMMENT ON COLUMN "presto"."t_presto_query_event"."update_time" IS '修改时间';
性能指标分析SQL如下:
with a as
(SELECT to_char(create_time, 'yyyy-MM-dd') AS day_time,
case
when execution_time/(60*1000) >= 0 and execution_time/(60*1000) < 1 then 'A[0-1)分钟'
when execution_time/(60*1000) >= 1 and execution_time/(60*1000) < 5 then 'B[1-5)分钟'
when execution_time/(60*1000) >= 5 and execution_time/(60*1000) < 10 then 'C[5-10)分钟'
when execution_time/(60*1000) >= 10 and execution_time/(60*1000) < 15 then 'D[10-15)分钟'
when execution_time/(60*1000) >= 15 and execution_time/(60*1000) < 30 then 'E[15-30)分钟'
when execution_time/(60*1000) >= 30 then 'G[30-∞)分钟'
end AS time_period,
coalesce(sum(execution_time),0) AS total_execution_time,
coalesce(sum(case when query_state='FINISHED' then execution_time end)/60000,0) AS total_success_execution_time,
coalesce(sum(case when query_state='FAILED' then execution_time end)/60000,0) AS total_fail_execution_time,
coalesce(count(1),0) AS query_total_num,
coalesce(sum(case when query_state='FINISHED' then 1 end),0) AS success_num,
coalesce(sum(case when query_state='FAILED' then 1 end),0) AS failed_num,
coalesce(sum(execution_time)/count(1)/60000,0) AS avg_total_execution_time,
coalesce((sum(case when query_state='FINISHED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_success_execution_time,
coalesce((sum(case when query_state='FAILED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_fail_execution_time
FROM t_presto_query_event
where source in ('pangu-query','pangu-schedule','pangu-airflow')
group by 1,2
)
select *,sum(query_total_num)over(partition by day_time ) query_total_num_all from a
union ALL
SELECT to_char(create_time, 'yyyy-MM-dd') AS day_time,
case
when execution_time/(60*1000) >= 0 then '总体'
end AS time_period,
coalesce(sum(execution_time),0) AS total_execution_time,
coalesce(sum(case when query_state='FINISHED' then execution_time end)/60000,0) AS total_success_execution_time,
coalesce(sum(case when query_state='FAILED' then execution_time end)/60000,0) AS total_fail_execution_time,
coalesce(count(1),0) AS query_total_num,
coalesce(sum(case when query_state='FINISHED' then 1 end),0) AS success_num,
coalesce(sum(case when query_state='FAILED' then 1 end),0) AS failed_num,
coalesce(sum(execution_time)/count(1)/60000,0) AS avg_total_execution_time,
coalesce((sum(case when query_state='FINISHED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_success_execution_time,
coalesce((sum(case when query_state='FAILED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_fail_execution_time,
coalesce(count(1),0) AS query_total_num_all
FROM t_presto_query_event
where source in ('pangu-query','pangu-schedule','pangu-airflow')
group by 1,2
SQL里的随机数函数:
SELECT ceil(random()*(10-1)+1) as num;
SELECT floor(random()*(10000-1)+1) as num;
SELECT trunc(random()*(10000-1)+1) as num;
-- 分时集群信息:
-- 分时集群信息
select to_char(create_time, 'yyyy-MM-dd HH24') AS day_time,
max(RUNNING_QUERIES) AS MAX_RUNNING_QUERIES,max(QUEUED_QUERIES) AS MAX_QUEUED_QUERIES,
MIN(ACTIVE_WORKERS) from t_cluster_info
where create_time>='2021-06-14 00:00' and create_time <='2021-06-22 00:00' group by 1
-- 查出xiaosong的数据集中的sql语句
SELECT A.user_id,B.user_name,A.category_name,A.dataset_name,A.data_json :: json -> 'query' AS query_sql
FROM
dashboard_dataset A LEFT JOIN dashboard_user B ON A.user_id=B.user_id
WHERE
B.user_name= 'xiaosong' and del = FALSE;
-- 查出数据源Id,sql语句
SELECT A.dateset_id
( A.data_json :: json -> 'datasource' ) :: TEXT AS datasource_id,
( A.data_json :: json -> 'query' ) :: TEXT AS SQL,
B.user_name
FROM
dashboard_dataset A
LEFT JOIN dashboard_user B ON A.user_id = B.user_id
WHERE
( A.data_json :: json -> 'datasource' ) :: TEXT = '1';
本文档展示了创建Presto查询事件表的SQL语句,包括表结构、字段注释及数据类型。同时,提供了性能指标分析的SQL示例,用于统计查询的执行时间和成功率等。此外,还包含查询集群信息和特定用户数据集的SQL片段。
878

被折叠的 条评论
为什么被折叠?



