表分区是解决一些因单表过大引用的性能问题的方式,比如某张表过大就会造成查询变慢,可能分区是一种解决方案。一般建议当单表大小超过内存就可以考虑表分区了。
目录
1,继承式分区,分为触发器(trigger)和规则(rule)两种方式
1,继承式分区,分为触发器(trigger)和规则(rule)两种方式
触发器的方式
1)创建表
CREATE TABLE "public"."track_info_rule_partition" (
"id" serial,
"object_type" int2 NOT NULL DEFAULT 0,
"object_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
"object_value" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL,
"action_start_time" TIMESTAMP ( 0 ) NOT NULL,
"action_end_time" TIMESTAMP ( 0 ) NOT NULL,
"action_type" int2 NOT NULL,
"action_geom" TEXT COLLATE "pg_catalog"."default" NOT NULL,
"geom_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
"geom_number" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
"geom_address" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
"ctime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ),
"utime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ),
"is_del" int2 NOT NULL DEFAULT 0,
"geom_station_start" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
"geom_station_end" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING,
CONSTRAINT "track_info_rule_partition_pkey" PRIMARY KEY ( "id" )
)
2)创建子表
DO $$ DECLARE
base TEXT;
sqlstring TEXT;
i INT;
BEGIN
base = 'create table track_info_rule_partition_%s(check(action_start_time >= ''%s'' and action_start_time < ''%s'')) inherits (track_info_rule_partition)';
FOR i IN 0..30
loop
sqlstring = format (
base,
to_char( '2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL, 'YYYYMMDD' ),
'2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL,
'2022-05-15' :: DATE + ( i + 1 || ' day' ) :: INTERVAL
);
EXECUTE sqlstring;
END loop;
END $$ LANGUAGE plpgsql;
3)创建触发器函数
CREATE
OR REPLACE FUNCTION ins_or_upd_on_track_info_trigger_part ( ) RETURNS TRIGGER AS $$ DECLARE
sqlstring TEXT;
BEGIN
sqlstring = 'INSERT INTO track_info_trigger_partition_%s values($1.*)';
sqlstring := format ( sqlstring, to_char( NEW.action_start_time, 'YYYYMMDD' ) );
EXECUTE sqlstring USING NEW;
RETURN NULL;
END $$ LANGUAGE plpgsql;
4)创建触发器
CREATE TRIGGER tg_on_track_info_in_or_upd BEFORE INSERT ON track_info_trigger_partition FOR EACH ROW EXECUTE FUNCTION ins_or_upd_on_track_info_trigger_part ( );
5)执行插入验证观察数据是否分布在子表和数据是否正常分布
规则的方式
1)创建大表
CREATE TABLE "public"."track_info_rule_partition" ( "id" serial, "object_type" int2 NOT NULL DEFAULT 0, "object_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "object_value" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL, "action_start_time" TIMESTAMP ( 0 ) NOT NULL, "action_end_time" TIMESTAMP ( 0 ) NOT NULL, "action_type" int2 NOT NULL, "action_geom" TEXT COLLATE "pg_catalog"."default" NOT NULL, "geom_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_number" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_address" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "ctime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "utime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "is_del" int2 NOT NULL DEFAULT 0, "geom_station_start" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_station_end" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, CONSTRAINT "track_info_rule_partition_pkey" PRIMARY KEY ( "id" ) )
2)创建子表
DO $$ DECLARE
base TEXT;
sqlstring TEXT;
i INT;
BEGIN
base = 'create table track_info_rule_partition_%s(check(action_start_time >= ''%s'' and action_start_time < ''%s'')) inherits (track_info_rule_partition)';
FOR i IN 0..30
loop
sqlstring = format (
base,
to_char( '2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL, 'YYYYMMDD' ),
'2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL,
'2022-05-15' :: DATE + ( i + 1 || ' day' ) :: INTERVAL
);
EXECUTE sqlstring;
END loop;
END $$ LANGUAGE plpgsql;
3)创建规则
DO $$ DECLARE
base TEXT;
sqlstring TEXT;
i INT;
BEGIN
base = 'CREATE OR REPLACE RULE track_info_rule_partition_%s AS ON INSERT TO track_info_rule_partition
WHERE action_start_time >= ''%s'' and action_start_time < ''%s''
DO INSTEAD
INSERT INTO track_info_rule_partition_%s VALUES(NEW.*)';
FOR i IN 0..30
loop
sqlstring = format (
base,
to_char( '2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL, 'YYYYMMDD' ),
'2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL,
'2022-05-15' :: DATE + ( i + 1 || ' day' ) :: INTERVAL,
to_char( '2022-05-15' :: DATE + ( i || ' day' ) :: INTERVAL, 'YYYYMMDD' )
);
EXECUTE sqlstring;
END loop;
END $$ LANGUAGE plpgsql;
4)执行插入验证观察数据是否分布在子表和数据是否正常分布
2,声明式,分为list,range,hash三种方式
list方式
1)创建list分区父表
CREATE TABLE "public"."track_info_list_partition" ( "id" serial, "object_type" int2 NOT NULL DEFAULT 0, "object_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "object_value" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL, "action_start_time" TIMESTAMP ( 0 ) NOT NULL, "action_end_time" TIMESTAMP ( 0 ) NOT NULL, "action_type" int2 NOT NULL, "action_geom" TEXT COLLATE "pg_catalog"."default" NOT NULL, "geom_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_number" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_address" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "ctime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "utime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "is_del" int2 NOT NULL DEFAULT 0, "geom_station_start" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_station_end" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, CONSTRAINT "track_info_list_partition_pkey" PRIMARY KEY ( "id", "action_type" ) ) PARTITION BY list ( action_type );
2)创建分区子表
do
$$
declare base text; sqlstring text; i int;
begin
base = 'create table track_info_list_partition_%s partition of track_info_list_partition for values in (''%s'')';
for i in 0..9 loop
sqlstring = format(base, i, i);
execute sqlstring;
end loop;
end
$$language plpgsql;
3)执行插入验证观察数据是否分布在子表和数据是否正常分布
range方式
1)创建大表
CREATE TABLE "public"."track_info_range_partition" ( "id" serial, "object_type" int2 NOT NULL DEFAULT 0, "object_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "object_value" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL, "action_start_time" TIMESTAMP ( 0 ) NOT NULL, "action_end_time" TIMESTAMP ( 0 ) NOT NULL, "action_type" int2 NOT NULL, "action_geom" TEXT COLLATE "pg_catalog"."default" NOT NULL, "geom_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_number" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_address" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "ctime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "utime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "is_del" int2 NOT NULL DEFAULT 0, "geom_station_start" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_station_end" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, CONSTRAINT "track_info_range_partition_pkey" PRIMARY KEY ( "id", "action_start_time" ) ) PARTITION BY range ( action_start_time );
2)创建分区子表
DO
$$ DECLARE
base TEXT;
date_start TEXT = '2022-05-15';
sqlstring TEXT;
i INT;
BEGIN
base = 'create table track_info_range_partition_%s partition of track_info_range_partition for values from (''%s'') to (''%s'')';
FOR i IN 0..30
loop
sqlstring = format (
base,
to_char( date_start :: timestamptz + ( i || ' day' ) :: INTERVAL, 'YYYYMMDD' ),
date_start :: timestamptz + ( i || ' day' ) :: INTERVAL,
date_start :: timestamptz + ( i + 1 || ' day' ) :: INTERVAL
);
EXECUTE sqlstring;
END loop;
END $$ LANGUAGE plpgsql;
3)执行插入验证观察数据是否分布在子表和数据是否正常分布
hash方式
1)创建大表
CREATE TABLE "public"."track_info_hash_partition" ( "id" serial, "object_type" int2 NOT NULL DEFAULT 0, "object_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "object_value" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL, "action_start_time" TIMESTAMP ( 0 ) NOT NULL, "action_end_time" TIMESTAMP ( 0 ) NOT NULL, "action_type" int2 NOT NULL, "action_geom" TEXT COLLATE "pg_catalog"."default" NOT NULL, "geom_name" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_number" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_address" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "ctime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "utime" TIMESTAMP ( 0 ) NOT NULL DEFAULT now( ), "is_del" int2 NOT NULL DEFAULT 0, "geom_station_start" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, "geom_station_end" VARCHAR ( 255 ) COLLATE "pg_catalog"."default" NOT NULL DEFAULT '' :: CHARACTER VARYING, CONSTRAINT "track_info_hash_partition_pkey" PRIMARY KEY ( "id", "geom_name" ) ) PARTITION BY hash ( geom_name );
2)创建分区子表
DO
$$ DECLARE
base TEXT;
sqlstring TEXT;
i INT;
BEGIN
base = 'create table track_info_hash_partition_%s partition of track_info_hash_partition for values with(modulus 10, remainder %s)';
FOR i IN 0..9
loop
sqlstring = format ( base, i, i );
EXECUTE sqlstring;
END loop;
END $$ LANGUAGE plpgsql;
3)执行插入验证观察数据是否分布在子表和数据是否正常分布
3,分区表的维护
继承式分区表的维护,主要为 INHERIT 与 NO INHERIT。语法如下:
ALTER TABLE track_info_trigger_partition_20220520 NO INHERIT track_info_trigger_partition;
ALTER TABLE track_info_trigger_partition_20220520 INHERIT track_info_trigger_partition;
NO INHERIT的情况下,不可以通过父表对子表进行CRUD,INHERIT之后可以正常的CRUD。
声明式分区表的维护,主要为 ATTACH 与 DETACH。语法如下:
ALTER TABLE track_info_list_partition DETACH PARTITION track_info_list_partition_4;
ALTER TABLE track_info_list_partition ATTACH PARTITION track_info_list_partition_4 FOR VALUES in (4)
DETACH的情况下,不可以通过父表对子表进行CRUD,ATTACH之后可以正常的CRUD。

本文详细介绍了继承式分区(触发器与规则)和声明式分区(list, range, hash)的方法,包括创建表、子表、触发器/规则及维护操作。通过实例展示如何将大数据表拆分成更易管理的部分,提升查询效率。
808

被折叠的 条评论
为什么被折叠?



