设置配置:
set hive.support.concurrency=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on=true;
set hive.compactor.worker.threads=1;
修改表存储格式(store)为orc,表分桶,属性tblproperties('transactional'='true'),例子如下:
create table test.test(
barcode string,
del string,
qty int",
)
PARTITIONED BY (dt string)
CLUSTERED BY (qty) --根据某个字段分桶
INTO 7 BUCKETS
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as orc tblproperties('transactional'='true');