Apache Hive 建表操作的简单描述

最新推荐文章于 2022-09-21 21:11:25 发布

原创最新推荐文章于 2022-09-21 21:11:25 发布 · 145 阅读

0 ·

CC 4.0 BY-SA版权

本文介绍了Apache Hive中各种类型的表创建方法，包括内部表、外部表、分区表及分桶表，并演示了如何加载数据及查询。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Apache Hive 建表操作的简单描述

客户端连接hive
[root@bigdata-02 bin]# ./beeline 
Beeline version 1.2.1 by Apache Hive
beeline> ! connect jdbc:hive2://bigdata-01:10000
Connecting to jdbc:hive2://bigdata-01:10000
Enter username for jdbc:hive2://bigdata-01:10000: root
Enter password for jdbc:hive2://bigdata-01:10000: ******
Connected to: Apache Hive (version 1.2.1)
Driver: Hive JDBC (version 1.2.1)
Transaction isolation: TRANSACTION_REPEAtable_READ
0: jdbc:hive2://bigdata-01:10000>

create database hive_test;
show databases;
use hive_test;

创建表
create table t_a1(id int,name string) row format delimited fields terminated by ',';
加载数据  如果在本地加local  如果不在本地 不加local   load data只针对内部表
load data local inpath '/root/1.txt' into table t_a1

hadoop fs -put 1.txt /user/hive/warehouse/hive_test.db/t_a1  

1.txt
1,张学友
2,刘德华
3,黎明
4,郭富城

0: jdbc:hive2://bigdata-01:10000> select * from t_a1;
+----------+------------+--+
| t_a1.id  | t_a1.name  |
+----------+------------+--+
| 1        | 张学友        |
| 2        | 刘德华        |
| 3        | 黎明         |
| 4        | 郭富城        |
+----------+------------+--+
4 rows selected (1.358 seconds)



//创建外部表
create external table t_a2(id int,name string) row format delimited fields terminated by ',' location '/test/';


hadoop fs -mkdir /test
hadoop fs -put 1.txt /test

0: jdbc:hive2://bigdata-01:10000> select * from t_a2;
+----------+------------+--+
| t_a2.id  | t_a2.name  |
+----------+------------+--+
| 1        | 张学友        |
| 2        | 刘德华        |
| 3        | 黎明         |
| 4        | 郭富城        |
+----------+------------+--+
4 rows selected (0.638 seconds)



区别 
内部表的数据文件必须放到 指定的位置
外部表的数据文件 可以自己指定位置
外部表 drop table t_a2 后 数据文件依然存在 内部表 直接连表带数据文件一起删除


//分区表
create table t_user(id int,name string,area string) partitioned by(region string) row format delimited fields terminated by ',';
//加载数据
 load data local inpath '/root/beijing.txt' into table t_user partition(region='beijing');
 load data local inpath '/root/shanghai.txt' into table t_user partition(region='shanghai');

0: jdbc:hive2://bigdata-01:10000> select * from t_user;
+----------+------------+------------+--------------+--+
| t_user.id  | t_user.name  | t_user.area  | t_user.region  |
+----------+------------+------------+--------------+--+
| 1        | 张学友        | 北京         | beijing      |
| 2        | 刘德华        | 北京         | beijing      |
| 3        | 黎明         | 北京         | beijing      |
| 4        | 郭富城        | 北京         | beijing      |
| 5        | 诸葛亮        | 上海         | shanghai     |
| 6        | 司马懿        | 上海         | shanghai     |
| 7        | 周瑜         | 上海         | shanghai     |
+----------+------------+------------+--------------+--+
7 rows selected (0.445 seconds)
 
 
 
 
//多分区 
create table day_hour_table (id int, content string) partitioned by (dt string, hour string);
load data local inpath '/root/900101_08.txt' into table day_hour_table PARTITION(dt='1990-01-01', hour='08');



//分桶表
开启分桶功能：set hive.enforce.bucketing = true;
设置reduce个数等于分桶的个数：set mapreduce.job.reduces=4;
创建表
create table stu_buck(Sno int,Sname string,Sex string,Sage int,Sdept string) clustered by(Sno) into 4 buckets row format delimited fields terminated by ',';
加载方式:
1,首先创建一个普通的过渡中间表 把对应的文件映射上去
create table student(Sno int,Sname string,Sex string,Sage int,Sdept string) row format delimited fields terminated by ',';
hadoop fs -put students.txt /user/hive/warehouse/hive_test.db/student
2,真正映射分桶表(insert+select)
insert overwrite table stu_buck select * from student cluster by(Sno);


测试的时候可以设置本地模式
set hive.exec.mode.local.auto=true;