2021-07-12 日常SQL笔记

本文档展示了创建Presto查询事件表的SQL语句,包括表结构、字段注释及数据类型。同时,提供了性能指标分析的SQL示例,用于统计查询的执行时间和成功率等。此外,还包含查询集群信息和特定用户数据集的SQL片段。
	DROP TABLE IF EXISTS t_presto_query_event;
	CREATE TABLE "presto"."t_presto_query_event" (
	  "id" int8 NOT NULL DEFAULT nextval('t_presto_query_event_id_seq'::regclass),
	  "query_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL,
	  "transaction_id" varchar(255) COLLATE "pg_catalog"."default",
	  "query" text COLLATE "pg_catalog"."default",
	  "query_state" varchar(255) COLLATE "pg_catalog"."default",
	  "query_type" varchar(255) COLLATE "pg_catalog"."default",
	  "uri" varchar(255) COLLATE "pg_catalog"."default",
	  "user" varchar(255) COLLATE "pg_catalog"."default",
	  "principal" varchar(255) COLLATE "pg_catalog"."default",
	  "remote_client_address" varchar(255) COLLATE "pg_catalog"."default",
	  "user_agent" varchar(255) COLLATE "pg_catalog"."default",
	  "client_info" varchar(255) COLLATE "pg_catalog"."default",
	  "client_tags" varchar(255) COLLATE "pg_catalog"."default",
	  "source" varchar(255) COLLATE "pg_catalog"."default",
	  "catalog" varchar(255) COLLATE "pg_catalog"."default",
	  "schema" varchar(255) COLLATE "pg_catalog"."default",
	  "resource_group" varchar(255) COLLATE "pg_catalog"."default",
	  "session_properties" varchar(255) COLLATE "pg_catalog"."default",
	  "server_address" varchar(255) COLLATE "pg_catalog"."default",
	  "server_version" varchar(255) COLLATE "pg_catalog"."default",
	  "environment" varchar(255) COLLATE "pg_catalog"."default",
	  "author" varchar(255) COLLATE "pg_catalog"."default",
	  "department" varchar(255) COLLATE "pg_catalog"."default",
	  "job_id" varchar(255) COLLATE "pg_catalog"."default",
	  "cpu_time" int8,
	  "retried_cpu_time" int8,
	  "wall_time" int8,
	  "queued_time" int8,
	  "analysis_time" int8,
	  "peak_running_tasks" int4,
	  "peak_user_memory_bytes" int8,
	  "peak_total_non_revocable_memory_bytes" int8,
	  "peak_task_user_memory" int8,
	  "peak_task_total_memory" int8,
	  "peak_node_total_memory" int8,
	  "total_bytes" int8,
	  "total_rows" int8,
	  "output_bytes" int8,
	  "output_rows" int8,
	  "written_output_bytes" int8,
	  "written_output_rows" int8,
	  "written_intermediate_bytes" int8,
	  "spilled_bytes" int8,
	  "cumulative_memory" numeric(38,2),
	  "completed_splits" int4,
	  "complete" bool,
	  "output_catalog" varchar(255) COLLATE "pg_catalog"."default",
	  "output_schema" varchar(255) COLLATE "pg_catalog"."default",
	  "output_table" varchar(255) COLLATE "pg_catalog"."default",
	  "output_connectorOutputMetadata" text COLLATE "pg_catalog"."default",
	  "output_jsonLengthLimitExceeded" bool,
	  "error_name" varchar(255) COLLATE "pg_catalog"."default",
	  "error_type" varchar(255) COLLATE "pg_catalog"."default",
	  "error_stack" text COLLATE "pg_catalog"."default",
	  "failure_type" varchar(255) COLLATE "pg_catalog"."default",
	  "failure_message" text COLLATE "pg_catalog"."default",
	  "failure_task" varchar(255) COLLATE "pg_catalog"."default",
	  "failure_host" varchar(255) COLLATE "pg_catalog"."default",
	  "failed_tasks" text COLLATE "pg_catalog"."default",
	  "warning" text COLLATE "pg_catalog"."default",
	  "submit_time" timestamp(6) NOT NULL,
	  "execution_start_time" timestamp(6) NOT NULL,
	  "end_time" timestamp(6) NOT NULL,
	  "execution_time" int8,
	  "scheduled_time" int8,
	  "create_time" timestamp(6) NOT NULL DEFAULT now(),
	  "update_time" timestamp(6) NOT NULL DEFAULT now(),
	  CONSTRAINT "t_presto_query_event_pkey" PRIMARY KEY ("query_id")
	)
	;

	ALTER TABLE "presto"."t_presto_query_event" 
	  OWNER TO "presto";

	COMMENT ON COLUMN "presto"."t_presto_query_event"."id" IS '自增主键';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."query_id" IS '查询ID';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."transaction_id" IS '事务ID';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."query" IS '查询sql';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."query_state" IS '查询状态:FINISHED,FAILED';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."query_type" IS '查询类型:DATA_DEFINITION,DELETE,DESCRIBE,EXPLAIN,INSERT,SELECT';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."uri" IS '查询URL';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."remote_client_address" IS '客户端地址';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."user_agent" IS '用户代理';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."client_info" IS '客户端信息';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."client_tags" IS '客户端标签';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."source" IS '查询提交来源';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."catalog" IS '当前catalog';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."schema" IS '当前schema';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."resource_group" IS '资源组';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."session_properties" IS '会话属性';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."server_address" IS '服务端地址';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."server_version" IS 'presto 集群版本';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."environment" IS '环境';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."author" IS '查询提交人';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."department" IS '查询提交人所属部门';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."job_id" IS '定时任务ID';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."written_output_bytes" IS '写出数据的总大小';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."written_output_rows" IS '写出数据的总行数';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."cumulative_memory" IS '查询累计内存';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."completed_splits" IS '已完成的split数量';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."complete" IS '查询是否完成';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."output_catalog" IS '查询输出的catalog';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."output_schema" IS '查询输出的schema';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."output_table" IS '查询输出的表';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."error_name" IS '异常名称';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."error_type" IS '异常类型';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."error_stack" IS '异常栈信息';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_type" IS '查询失败类型';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_message" IS '异常的message';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_task" IS '失败的任务';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."failure_host" IS '失败的host';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."failed_tasks" IS '失败的任务';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."warning" IS '警告信息(通常为空)';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."submit_time" IS '查询提交的时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."execution_start_time" IS '查询开始执行时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."end_time" IS '查询完成时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."execution_time" IS '执行消耗时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."scheduled_time" IS '调度时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."create_time" IS '创建时间';

	COMMENT ON COLUMN "presto"."t_presto_query_event"."update_time" IS '修改时间';

  性能指标分析SQL如下:

with a as 
(SELECT to_char(create_time, 'yyyy-MM-dd') AS day_time,
case  
when  execution_time/(60*1000) >= 0 and execution_time/(60*1000) < 1 then 'A[0-1)分钟'
when  execution_time/(60*1000) >= 1 and execution_time/(60*1000) <  5 then 'B[1-5)分钟'
when  execution_time/(60*1000) >= 5 and execution_time/(60*1000) < 10 then 'C[5-10)分钟'
when  execution_time/(60*1000) >= 10 and execution_time/(60*1000) < 15 then 'D[10-15)分钟'
when  execution_time/(60*1000) >= 15 and execution_time/(60*1000) < 30 then 'E[15-30)分钟'
when  execution_time/(60*1000) >= 30 then 'G[30-∞)分钟'
end AS time_period,
coalesce(sum(execution_time),0) AS total_execution_time,
coalesce(sum(case when query_state='FINISHED' then execution_time end)/60000,0) AS total_success_execution_time,
coalesce(sum(case when query_state='FAILED' then execution_time end)/60000,0) AS total_fail_execution_time,
coalesce(count(1),0) AS query_total_num,
coalesce(sum(case when query_state='FINISHED' then 1 end),0) AS success_num,
coalesce(sum(case when query_state='FAILED' then 1 end),0) AS failed_num,
coalesce(sum(execution_time)/count(1)/60000,0) AS avg_total_execution_time,
coalesce((sum(case when query_state='FINISHED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_success_execution_time,
coalesce((sum(case when query_state='FAILED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_fail_execution_time
FROM t_presto_query_event
where source in ('pangu-query','pangu-schedule','pangu-airflow')
group by 1,2
)
select *,sum(query_total_num)over(partition by day_time ) query_total_num_all  from a 
union ALL
SELECT to_char(create_time, 'yyyy-MM-dd') AS day_time,
case  
when  execution_time/(60*1000) >= 0 then '总体'
end AS time_period,
coalesce(sum(execution_time),0) AS total_execution_time,
coalesce(sum(case when query_state='FINISHED' then execution_time end)/60000,0) AS total_success_execution_time,
coalesce(sum(case when query_state='FAILED' then execution_time end)/60000,0) AS total_fail_execution_time,
coalesce(count(1),0) AS query_total_num,
coalesce(sum(case when query_state='FINISHED' then 1 end),0) AS success_num,
coalesce(sum(case when query_state='FAILED' then 1 end),0) AS failed_num,
coalesce(sum(execution_time)/count(1)/60000,0) AS avg_total_execution_time,
coalesce((sum(case when query_state='FINISHED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_success_execution_time,
coalesce((sum(case when query_state='FAILED' then execution_time end)/sum(case when query_state='FINISHED' then 1 end))/60000,0) AS avg_fail_execution_time,
coalesce(count(1),0) AS query_total_num_all
FROM t_presto_query_event
where source in ('pangu-query','pangu-schedule','pangu-airflow')
group by 1,2

 SQL里的随机数函数:

SELECT ceil(random()*(10-1)+1) as num;
SELECT floor(random()*(10000-1)+1) as num;
SELECT trunc(random()*(10000-1)+1) as num;

 -- 分时集群信息:

-- 分时集群信息
select to_char(create_time, 'yyyy-MM-dd HH24') AS day_time,
max(RUNNING_QUERIES) AS MAX_RUNNING_QUERIES,max(QUEUED_QUERIES) AS MAX_QUEUED_QUERIES,
MIN(ACTIVE_WORKERS)  from t_cluster_info  
where create_time>='2021-06-14 00:00' and create_time <='2021-06-22 00:00' group by 1
-- 查出xiaosong的数据集中的sql语句
SELECT A.user_id,B.user_name,A.category_name,A.dataset_name,A.data_json :: json -> 'query' AS query_sql
FROM
        dashboard_dataset A LEFT JOIN dashboard_user B ON A.user_id=B.user_id 
WHERE
        B.user_name= 'xiaosong' and del = FALSE;
-- 查出数据源Id,sql语句
SELECT A.dateset_id
	( A.data_json :: json -> 'datasource' ) :: TEXT AS datasource_id,
	( A.data_json :: json -> 'query' ) :: TEXT AS SQL,
	B.user_name
FROM
	dashboard_dataset A
	 LEFT JOIN dashboard_user B ON A.user_id = B.user_id
WHERE
	( A.data_json :: json -> 'datasource' ) :: TEXT = '1';

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

莱恩大数据

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值