flink-paimon建表建议

原创已于 2025-11-22 16:09:38 修改 · 310 阅读

7 ·

CC 4.0 BY-SA版权

文章标签：

#flink #大数据

于 2025-11-18 14:12:01 首次发布

/usr/dif/7.1.0-0/flink/bin/sql-client.sh

CREATE CATALOG paimon_hive_catalog
WITH ( ‘type’=‘paimon’,
‘metastore’ = ‘hive’,
‘hive-conf-dir’=‘/etc/hive/conf/’,
‘hadoop-conf-dir’ = ‘/etc/hive/conf’);
use catalog paimon_hive_catalog;
use ${DB};

CREATE TABLE mes_st2_hold
(
XXXXXXXX
p_dt string comment ‘月分区’,
PRIMARY KEY (p_dt, id, sn) NOT ENFORCED
)
PARTITIONED BY (
p_dt
)
WITH (
– 设置分桶单个分桶文件建议大小200M -1G，根据数据量调整
‘bucket’ = ‘2’,
– 依照主键去重
‘merge-engine’ = ‘deduplicate’,
– 依照时间字段去重（新数据更新老数据）根据业务调整
‘sequence.field’ = ‘last_updated_dt’,
– Hive HMS分区同步
‘metastore.partitioned-table’ = ‘true’,
– 快照保留1天根据业务调整
‘snapshot.time-retained’ = ‘1 d’,
– 快照最多保留个数根据业务调整
‘snapshot.num-retained.max’ = ‘50’,
– 小文件合并策略，当存在10个level 0级文件则触发minor合并
‘num-sorted-run.compaction-trigger’ = ‘10,
– 启用Deletion Vectors是使用文件索引的前提之一
‘deletion-vectors’ = ‘true’,
– 配置位图索引
‘file-index.bitmap.columns’ = ‘batch_no’’);