Hive 区分cluster by、distribute by + sort by、order by以及创建表带有clustered by和sort by
本文主要根据一些具体SQL实例来介绍说明cluster by、distribute by + sort by和order by的区别。同时通过实例说明create table指定clustered by 和sort by。
create table table(c1 int,c2 int,c3 int);
insert into table dual select 1,2,3 from (select count(*) from dual)a;
insert into table dual select 1, 3, 4 from (select count(*) from dual)a;
insert into table dual select 1, 1, 5 from (select count(*) from dual)a;
insert into table dual select 1, 2, 2 from (select count(*) from dual)a;
insert into table dual select 1, 1, 2 from (select count(*) from dual)a;
hive> select * from dual;
OK
1 2 3
1 3 4
1 1 5
1 2 2
1 1 2
Time taken: 0.105 seconds, Fetched: 5 row(s)
create table t1_cluster (c1 int,c2 int,c3 int) clustered by (c2) sorted by (c3) into 2 buckets;
insert overwrite table t1_cluster select * from dual;
hive> select * f