hive创建表的三种方式
第一种:普通方式
创建语句
create table if not exists student( num int, name string ) row format delimited fields terminated by'\t' stored as textfile;
从本地加载数据:
load data local inpath '/opt/datas/student.txt' into table student;
- 从hdfs加载数据,先把数据上传到hdfs
load data inpath '/student.txt' into table student;
- 第二种:子查询方式,运行Mapreduce程序
- create table stu_as as select name from student;
- create table stu_as as select name from student;
- 第三种:like方式,有表结构没有数据
- create table stu_like like student;
- create table stu_like like student;
- 清空与删除
- 清空一张表:truncate table student;
- 删除一张表:drop table if exists student;
创建表的类型
测试数据
- 先创建db_emp数据库:
create database if not exists db_emp;
员工表与部门表都是管理表
一、员工表 create table emp( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int ) row format delimited fields terminated by '\t'; 二、部门表 create table dept( deptno int, dname string, loc string ) row format delimited fields terminated by '\t';
加载数据
load data local inpath '/opt/datas/emp.txt' into table emp; load data local inpath '/opt/datas/dept.txt' into table dept;
- 重新加载:
load data local inpath '/opt/datas/emp.txt' overwrite into table emp;
- 先创建db_emp数据库:
- 创建管理表
- 删除时即使删除元数据,也删除文件夹
create table emp1(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
row format delimited fields terminated by '\t'
LOCATION '/user/hive/warehouse/db_emp.db/emp';
- 删除表emp1后hdfs中emp也没有了,show tables中能看到
- 删除时即使删除元数据,也删除文件夹
- 创建外部表
- 删除表只是删除元数据,hdfs文件夹还在
create EXTERNAL table dept_ext1(
deptno int,
dname string,
loc string
)
row format delimited fields terminated by '\t'
LOCATION '/user/hive/warehouse/db_emp.db/dept';
- 删除表只是删除元数据,hdfs文件夹还在
- 创建分区表
- 分区表中的字段是虚拟逻辑的
create table emp_part(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
partitioned by (date string)
row format delimited fields terminated by '\t';
- 分区表中的字段是虚拟逻辑的
- 分区表加载数据
- 直接加载数据会报错,需要指定分区。
- 加载数据
load data local inpath '/opt/datas/emp.txt' into table emp_part
partition (date='20161111');
load data local inpath '/opt/datas/emp.txt' into table emp_part
partition (date='20161110');
- hdfs文件系统:LOCATION ‘/user/hive/warehouse/db_emp.db/emp’;
- 直接加载数据会报错,需要指定分区。
- 过滤查询
select * from emp_part where date='20161111';
可以用多个字段分区
语句
create table emp_part2( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int ) partitioned by (date string,hour string) row format delimited fields terminated by '\t'; load data local inpath '/opt/datas/emp.txt' into table emp_part2 partition (date='20161111',hour='11'); load data local inpath '/opt/datas/emp.txt' into table emp_part2 partition (date='20161110',hour='10');
- 查询:select * from emp_part2 where date=’20161110’ and hour=’10’;