hive join方式总结
join类型
[INNER] JOIN 内联
LEFT JOIN 左连
RIGHT JOIN 右连
FULL OUTER JOIN 全外连
LEFT SEMI JOIN 左半连
CROSS JOIN 笛卡尔

数据准备
- cl_tmp.tmp_tm_join_testA
| id | name |
|---|---|
| 1 | 张三 |
| 2 | 李四 |
| 3 | 王五 |
- cl_tmp.tmp_tm_join_testB
| id | age |
|---|---|
| 2 | 26 |
| 3 | 28 |
| 4 | 29 |
create table cl_tmp.tmp_tm_join_testA(id int,name string);
insert INTO cl_tmp.tmp_tm_join_testA values(1,'张三'),(2,'李四'),(3,'王五');
create table cl_tmp.tmp_tm_join_testB(id int,age int);
insert INTO cl_tmp.tmp_tm_join_testB values(2,26),(3,28),(4,29);
数据关联
- INNER JOIN
# 取 a b 表交集
select * from cl_tmp.tmp_tm_join_testA a INNER JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id;

- LEFT JOIN
# 补充a表中b表列值
select * from cl_tmp.tmp_tm_join_testA a LEFT JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id;

# 获取a表中存在,b表中不存在
select * from cl_tmp.tmp_tm_join_testA a LEFT JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id where b.id is null;

- RIGHT JOIN
# 获取b表数据
select * from cl_tmp.tmp_tm_join_testA a RIGHT JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id;

# 获取b表中存在,a表中不存在的数据: b - a
select * from cl_tmp.tmp_tm_join_testA a RIGHT JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id where a.id is null;

- FULL [OUTER] JOIN
# a表∪b表
select * from cl_tmp.tmp_tm_join_testA a FULL OUTER JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id;

# (a - b) ∪ (b - a)
select * from cl_tmp.tmp_tm_join_testA a FULL OUTER JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id
where a.id is null or b.id is null ;
- LEFT SEMI JOIN
# 同INNER JOIN
select * from cl_tmp.tmp_tm_join_testA a LEFT SEMI JOIN cl_tmp.tmp_tm_join_testB b ON a.id = b.id;

- CROSS JOIN
# 笛卡尔集
select * from cl_tmp.tmp_tm_join_testA a CROSS JOIN cl_tmp.tmp_tm_join_testB b ;

参考blog
Hive中的各种join关系和使用 https://blog.youkuaiyun.com/leying521/article/details/93197951
本文详细介绍了Hive中各种JOIN操作的使用方法,包括INNER JOIN、LEFT JOIN、RIGHT JOIN、FULL OUTER JOIN、LEFT SEMI JOIN及CROSS JOIN,并通过具体实例展示了不同JOIN类型的应用场景。
3053

被折叠的 条评论
为什么被折叠?



