Flink创建paimon表
/usr/dif/7.1.0-0/flink/bin/sql-client.sh
CREATE CATALOG paimon_hive_catalog
WITH ( ‘type’=‘paimon’,
‘metastore’ = ‘hive’,
‘hive-conf-dir’=‘/etc/hive/conf/’,
‘hadoop-conf-dir’ = ‘/etc/hive/conf’);
use catalog paimon_hive_catalog;
use ${DB};
kyuubi创建paimon表
/usr/dif/7.1.0-0/kyuubi/bin/beeline -u ‘jdbc:hive2://wf-gd2-bpit-dp-nn-17-70:2181,wf-gd2-bpit-dp-nn-17-71:2181,wf-gd2-bpit-dp-dn-17-80:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=kyuubi;principal=ocdp/_HOST@GOERTEK.COM;#spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog;spark.sql.catalog.spark_catalog=org.apache.paimon.spark.SparkGenericCatalog;spark.sql.catalog.paimon.warehouse=hdfs://goertekwf/apps/hive/warehouse;spark.sql.catalog.paimon.metastore=hive;spark.sql.catalog.paimon.uri=thrift://wf-gd2-bpit-dp-dn-17-86:9083,thrift://wf-gd2-bpit-dp-nn-17-70:9083,thrift://wf-gd2-bpit-dp-nn-17-71:9083;spark.hadoop.hive.metastore.uris=thrift://wf-gd2-bpit-dp-dn-17-86:9083,thrift://wf-gd2-bpit-dp-nn-17-70:9083,thrift://wf-gd2-bpit-dp-nn-17-71:9083;spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions;spark.yarn.queue=default;spark.executor.cores=2;spark.driver.cores=1;spark.executor.instances=2;spark.executor.memory=2g’
spark-sql创建paimon表
/usr/dif/7.1.0-0/spark/bin/spark-sql
–conf spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog
–conf spark.sql.catalog.paimon.warehouse=hdfs://goertekwf/apps/hive/warehouse
–conf spark.sql.catalog.paimon.metastore=hive
–conf spark.sql.catalog.paimon.uri=thrift://wf-gd2-bpit-dp-dn-17-86:9083,thrift://wf-gd2-bpit-dp-nn-17-70:9083,thrift://wf-gd2-bpit-dp-nn-17-71:9083
–conf spark.hadoop.hive.metastore.uris=thrift://wf-gd2-bpit-dp-dn-17-86:9083,thrift://wf-gd2-bpit-dp-nn-17-70:9083,thrift://wf-gd2-bpit-dp-nn-17-71:9083
–conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
use XXX数据库;
CREATE TABLE test999 (
id STRING COMMENT ‘ID’,
dt STRING COMMENT ‘日期’
)
USING paimon
PARTITIONED BY (dt)
TBLPROPERTIES (
– 设置分桶 单个分桶文件建议大小200M -1G,根据数据量调整
‘bucket’ = ‘2’,
– 依照主键去重
‘merge-engine’ = ‘deduplicate’,
– 设置主键(Spark-SQL 中主键通过 TBLPROPERTIES 指定)
‘primary-key’ = ‘dt,id’,
– Hive HMS分区同步
‘metastore.partitioned-table’ = ‘true’,
– 快照保留1天 根据业务调整
‘snapshot.time-retained’ = ‘1 d’,
– 快照最多保留个数 根据业务调整
‘snapshot.num-retained.max’ = ‘50’,
– 小文件合并策略,当存在10个level 0级文件则触发minor合并
‘num-sorted-run.compaction-trigger’ = ‘10’
);
XXXXXXXXXXXXXXXXXXXXXXXXXX
Spark on Hive(kyuubi)使用paimon
/usr/dif/7.1.0-0/kyuubi/bin/beeline -u ‘jdbc:hive2://XXX:2181,XXX:2181,XXX:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=kyuubi;principal=ocdp/_HOST@XXX.COM;#spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog;spark.sql.catalog.spark_catalog=org.apache.paimon.spark.SparkGenericCatalog;spark.sql.catalog.paimon.warehouse=hdfs://XXX/apps/hive/warehouse;spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions;spark.yarn.queue=XXX;spark.executor.cores=4;spark.driver.cores=2;spark.executor.instances=10;spark.executor.memory=8g’
XXXXXXXXXXXXXXXXXXXXXXXXXX
Spark Jar使用paimon
/usr/dif/7.1.0-0/spark/bin/spark-submit
–master yarn
–deploy-mode client
–driver-memory 1G
–num-executors 2
–executor-cores 2
–executor-memory 4G
–name “SparkODSJob”
–class com.goertek.it.main.gmes.OdsGMesHiveMain
–conf spark.sql.catalog.spark_catalog=org.apache.paimon.spark.SparkGenericCatalog
–conf spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog
–conf spark.sql.catalog.paimon.warehouse=hdfs://goertekwf/apps/hive/warehouse
–conf spark.sql.catalog.paimon.metastore=hive
–conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
–conf spark.yarn.principal=wf_bpit_mes@XXX.COM
–conf spark.yarn.keytab=/data01/dataos/apps/keytabs/wf_bpit_mes.keytab
–conf ‘spark.driver.extraJavaOptions=-Djava.security.krb5.conf=/etc/krb5.conf’
–conf ‘spark.driverEnv.KRB5_CONFIG=/etc/krb5.conf’
hdfs:///spark_jar/spark3-1.0-SNAPSHOT.jar
“test” “152” “165” “${batchNo}”
1608

被折叠的 条评论
为什么被折叠?



