分区表:将数据按照查询的维度分文件夹管理
没有分区表
create table tb_order2(
oid int ,
dt string ,
cost double
)
row format delimited fields terminated by “,” ;
load data local inpath “/hive/data/06-18.txt” into table tb_order2 ;
load data local inpath “/hive/data/06-19.txt” into table tb_order2 ;
select * from tb_order where dt=‘2020-06-18’ ; --两个文件
使用分区表
create table tb_p_order(
oid int ,
dt string ,
cost double
)
partitioned by (dy string)
row format delimited fields terminated by “,” ;
load data local inpath “/hive/data/06-18.txt” into table tb_p_order partition(dy=“06-18”);
load data local inpath “/hive/data/06-19.txt” into table tb_p_order partition(dy=“06-19”);
0: jdbc:hive2://linux01:10000> select * from tb_p_order where dy="06-18";
±----------------±---------------±-----------------±---------------+
| tb_p_order.oid | tb_p_order.dt | tb_p_order.cost | tb_p_order.dy |
±----------------±---------------±-----------------±---------------+
| 1 | 2020-06-18 | 200.0 | 06-18 |
| 2 | 2020-06-18 | 200.0 | 06-18 |
| 3 | 2020-06-18 | 100.0 | 06-18 |
| 3 | 2020-06-18 | 200.0 | 06-18 |
| 4 | 2020-06-18 | 200.0 | 06-18 |
| 5 | 2020-06-18 | 20.0 | 06-18 |
| 6 | 2020-06-18 | 100.0 | 06-18 |
| 7 | 2020-06-18 | 200.0 | 06-18 |
| 8 | 2020-06-18 | 200.0 | 06-18 |
| 9 | 2020-06-18 | 100.0 | 06-18 |
| 10 | 2020-06-18 | 200.0 | 06-18 |
±----------------±---------------±-----------------±---------------+
使用分区表可以提高查询效率:(以静态分区表为例)
动态分区:
user.txt
u001 ZSS 23 M beijing
u002 YMM 33 F nanjing
u003 LSS 43 M beijing
u004 ZY 23 F beijing
u005 ZM 23 M beijing
u006 CL 23 M dongjing
u007 LX 23 F beijing
u008 YZ 23 M beijing
u009 YM 23 F nanjing
u010 XM 23 M beijing
u011 XD 23 F beijing
u012 LH 23 M dongjing
u013 FTM 23 F dongjing
按照某个字段的值的内容讲数据分文件夹管理 方便按照这个维度查询
1 需要加载的数据
2 建普通表
3 导入数据
create table if not exists tb_user(
uid string ,
name string ,
age int ,
gender string ,
address string
)
row format delimited fields terminated by " " ;
load data local inpath “/hive/data/user.txt” into table tb_user ;
4 创建分区表
create table if not exists tb_p_user(
uid string ,
name string ,
age int ,
gender string ,
address string
)
partitioned by (addr string)
row format delimited fields terminated by " " ;
5 开始动态设置
开启动态分区功能
set 参数=值 ;
set hive.exec.dynamic.partition=true ;
set hive.exec.dynamic.partition.mode=nonstrick; 可以从普通表中导入数据
6 向动态分区表中导入数据
普通表5个字段
分区表 5个主字段 1 个分区字段
插入数据的时候字段个数类型一致 最后一个字段就是分区字段
insert into tb_p_user partition(addr)
select uid , name , age , gender , address , address from tb_user ;