/usr/dif/7.1.0-0/flink/bin/sql-client.sh
CREATE CATALOG paimon_hive_catalog
WITH ( ‘type’=‘paimon’,
‘metastore’ = ‘hive’,
‘hive-conf-dir’=‘/etc/hive/conf/’,
‘hadoop-conf-dir’ = ‘/etc/hive/conf’);
use catalog paimon_hive_catalog;
use ${DB};
CREATE TABLE mes_st2_hold
(
XXXXXXXX
p_dt string comment ‘月分区’,
PRIMARY KEY (p_dt, id, sn) NOT ENFORCED
)
PARTITIONED BY (
p_dt
)
WITH (
– 设置分桶 单个分桶文件建议大小200M -1G,根据数据量调整
‘bucket’ = ‘2’,
– 依照主键去重
‘merge-engine’ = ‘deduplicate’,
– 依照时间字段去重(新数据更新老数据) 根据业务调整
‘sequence.field’ = ‘last_updated_dt’,
– Hive HMS分区同步
‘metastore.partitioned-table’ = ‘true’,
– 快照保留1天 根据业务调整
‘snapshot.time-retained’ = ‘1 d’,
– 快照最多保留个数 根据业务调整
‘snapshot.num-retained.max’ = ‘50’,
– 小文件合并策略,当存在10个level 0级文件则触发minor合并
‘num-sorted-run.compaction-trigger’ = ‘10,
– 启用Deletion Vectors是使用文件索引的前提之一
‘deletion-vectors’ = ‘true’,
– 配置位图索引
‘file-index.bitmap.columns’ = ‘batch_no’’);
1700

被折叠的 条评论
为什么被折叠?



