为了方便hdfs可视化网页查看,需要提前关闭防火墙:
firewalld的基本使用
启动: systemctl start firewalld.service
查看状态: systemctl status firewalld.service
禁止开机启动: systemctl disable firewalld.serive
停止: systemctl stop firewalld.service
1.基本数据类型:int,string ,boolean ,double等
create table person (pid int,pname string,married boolean,salary double);
desc person;
create table test1(vname varchar(20), cname char(20));2.复杂的数据类型:array, map, struct等
create table student(sid int, sname string, grade array<float>);{1,cj,[88,99,100]}
create table student1(sid int,sname string,grade map<string,float>);
{1,cj,<'语文',88>}
create table student2(sid int,sname string, grade array<map<string,float>>);
{1,cj,[<'语文',88>,<'数学',99>]}
结构类型:
create table student3(sid int,info struct<name:string,age:int,sex:string>);
{1,{'cj',24,'男'}}
3.时间数据类型:date 0.12.0开始使用,timestamp
4.hive的数据存储
基于hdfs没有专门的数据存储格式
存储结构主要包括:数据库,文件,表,视图
可以直接加载文本文件(.txt等)
5.表:内部表,分区表,外部表,桶表
(1)内部表:
#创建表create table t1(tid int,tname string,age int);
#创建表并指定表的路径
create table t2(tid int,tname string,age int)location '/myfile/hive/t2';
#创建表并指定分隔符
create table t3(tid int,tname string,age int) row format delimited fields terminated by ',';
#创建表并将sample_data数据填入t4表中
create table t4 as select * from sample_data;
#查看t4表中数据
hdfs dfs -cat /user/hive/warehouse/t4/000000_0
#创建表并将sample_data数据填入t5表中且指定分隔符
create table t5 row format delimited fields terminated by ',' as select * from sample_data;
添加新的列:
alter table t1 add coulumns(english int);
desc t1;
drop table t1; #放入hive回收站/user/root/.Trash/Current
(2)分区表
#性别分区并且分隔符为逗号create table partition_table(sid int,sname string) partitioned by(gender string) row format delimited terminated by ',';
desc partition_table;
#插入表信息
insert into table partition_table partition(gender='M') select sid,sname from sample_data where gender='M';
#生成SQL语句的执行计划
explain select * from sample_data where gender='M';
explain select * from partition_table where gender='M';
(3)外部表:指向已经在HDFS中存在的数据,可以创建partition
外部表只是一个过程,加载数据和创建表同时完成,并不会移动到数据苍鹭目录中,只是和外部数据建立一个链接,当删除外部表是,仅仅删除该链接#上传本地文件到hdfs
hdfs dfs -put student01.txt /input
#创建外部表
create external table external_student(sid int,sname string,age int)row format delimited fields terminated by ',' location '/input';
hdfs dfs -rm /input/student03.txt
select * from external_student;
(4)桶表:经过hash运算,相同值放在同一个桶表
create table bucket_table(sid int,sname string, age int) clustered by(sname) into 5 buckets;(5)视图:虚表,是一个逻辑概念,建立在已有表的基础上,视图赖以建立的表成为基表
好处:简化复杂查询create view empinfo
as
select e.empne,e.ename,e.sal,e.sal*12 annalsal,d.dname
from emp e,dept d
where e.deptno=d.deptno;