CREATE TABLE t_2(id int,skuid string,price float,amount int)
partitioned by (day string,city string)
row format delimited fields terminated by ‘,’;
LOAD DATA LOCAL INPATH ‘/root/t2.1’ into TABLE t_2 PARTITION(day=‘2018-04-15’,city=‘beijing’);
CREATE EXTERNAL TABLE t_2_ex(id int,skuid string,price float,amount int)
partitioned by (day string)
row format delimited fields terminated by ‘,’
LOCATION ‘/xx/yy’;
LOAD DATA LOCAL INPATH ‘/root/t2.1’ into TABLE t_2_ex PARTITION(day=‘2018-04-15’);
ALTER TABLE ADD PARTITION(day=‘2018-04-16’) LOCATION ‘/2018-04-16’;
create table t_3(id int,name string,age int,score int,sex string)
row format delimited fields terminated by ‘,’;
load data local inpath ‘/root/t_3.dat’ into table t_3;
/* * **** 华丽的分割线:开始讲join **/
create table a(id int,name string)
row format delimited
fields terminated by ‘,’;
1,zs
2,ls
3,ww
load data local inpath ‘/root/a.dat’ into table a;
load data local inpath ‘/root/t_name.txt’ into table a;
create table b(id int,nickname string)
row format delimited
fields terminated by ‘,’;
1,zzz
3,www
4,sss
load data local inpath ‘/root/b.dat’ into table b;
–两表的数据:
0: jdbc:hive2://cts03:10000> select * from a;
±------±--------±-+
| a.id | a.name |
±------±--------±-+
| 1 | zs |
| 2 | ls |
| 3 | ww |
±------±--------±-+
3 rows selected (0.144 seconds)
0: jdbc:hive2://cts03:10000> select * from b;
±------±------------±-+
| b.id | b.nickname |
±------±------------±-+
| 1 | zzz |
| 3 | www |
| 4 | sss |
±------±------------±-+
– 笛卡尔积:
0: jdbc:hive2://cts03:10000> select a.,b. from a join b;
±------±--------±------±------------±-+
| a.id | a.name | b.id | b.nickname |
±------±--------±------±------------±-+
| 1 | zs | 1 | zzz |
| 2 | ls | 1 | zzz |
| 3 | ww | 1 | zzz |
| 1 | zs | 3 | www |
| 2 | ls | 3 | www |
| 3 | ww | 3 | www |
| 1 | zs | 4 | sss |
| 2 | ls | 4 | sss |
| 3 | ww | 4 | sss |
±------±--------±------±------------±-+
–内连接:
0: jdbc:hive2://cts03:10000> select a.,b. from a join b on a.id=b.id;
±------±--------±------±------------±-+
| a.id | a.name | b.id | b.nickname |
±------±--------±------±------------±-+
| 1 | zs | 1 | zzz |
| 3 | ww | 3 | www |
±------±--------±------±------------±-+
–左外连接:左表数据全返回给我作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.,b. from a left join b on a.id=b.id;
±------±--------±------±------------±-+
| a.id | a.name | b.id | b.nickname |
±------±--------±------±------------±-+
| 1 | zs | 1 | zzz |
| 2 | ls | NULL | NULL |
| 3 | ww | 3 | www |
±------±--------±------±------------±-+
–右外连接:右表的数据劝返回作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.,b. from a right join b on a.id=b.id;
±------±--------±------±------------±-+
| a.id | a.name | b.id | b.nickname |
±------±--------±------±------------±-+
| 1 | zs | 1 | zzz |
| 3 | ww | 3 | www |
| NULL | NULL | 4 | sss |
±------±--------±------±------------±-+
–全外连接:两表的数据都需要返回作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.,b. from a full join b on a.id=b.id;
±------±--------±------±------------±-+
| a.id | a.name | b.id | b.nickname |
±------±--------±------±------------±-+
| 1 | zs | 1 | zzz |
| 2 | ls | NULL | NULL |
| 3 | ww | 3 | www |
| NULL | NULL | 4 | sss |
±------±--------±------±------------±-+
–左半连接(hive特有):按照内连接的规律连,但是只返回左半部分作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.* from a left semi join b on a.id=b.id;
±------±--------+
| a.id | a.name |
±------±--------+
| 1 | zs |
| 3 | ww |
±------±--------+
– 左半连接是在mapreduce这种运算模型下对in子句的更高效实现 ----mysql的语法
select id,name from a where id in(select distinct id from b);