Hive与Hbase整合
一.Hive与Hbase对比
https://blog.youkuaiyun.com/qq_32736999/article/details/83832376
Hive与HBase各自有着不同的功能,但是归根接地,Hive与HBase的数据最终都是存储在HDFS上面的,为了节省磁盘的存储空间,一般不会将一份数据存储到多个地方。所以,我们可以直接将数据存入HBase,然后通过Hive整合HBase直接使用SQL语句分析HBase里面的数据即可。
二Hive与Hbase整合
需求一:将hive分析结果的数据,保存到HBase当中去
1.拷贝hbase的五个依赖jar包到hive的lib目录下
hbase的jar包都在/export/servers/hbase-1.2.0-cdh5.14.0/lib,我们需要拷贝五个jar包名字如下:
hbase-client-1.2.0-cdh5.14.0.jar
hbase-hadoop2-compat-1.2.0-cdh5.14.0.jar
hbase-hadoop-compat-1.2.0-cdh5.14.0.jar
hbase-it-1.2.0-cdh5.14.0.jar
hbase-server-1.2.0-cdh5.14.0.jar
hive安装在node03机器上,我们直接在node03执行以下命令,通过创建软连接的方式来进行jar包的依赖:
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-client-1.2.0-cdh5.14.0.jar /export/servers/hive-1.1.0-cdh5.14.0/lib/hbase-client-1.2.0-cdh5.14.0.jar
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-hadoop2-compat-1.2.0-cdh5.14.0.jar /export/servers/hive-1.1.0-cdh5.14.0/lib/hbase-hadoop2-compat-1.2.0-cdh5.14.0.jar
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-hadoop-compat-1.2.0-cdh5.14.0.jar /export/servers/hive-1.1.0-cdh5.14.0/lib/hbase-hadoop-compat-1.2.0-cdh5.14.0.jar
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-it-1.2.0-cdh5.14.0.jar /export/servers/hive-1.1.0-cdh5.14.0/lib/hbase-it-1.2.0-cdh5.14.0.jar
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0.jar /export/servers/hive-1.1.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0.jar
#Spark对Hbase-on-Hive的支持
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/guava-12.0.1.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/htrace-core-3.2.0-incubating.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-common-1.2.0-cdh5.14.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-common-1.2.0-cdh5.14.0-tests.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-client-1.2.0-cdh5.14.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-server-1.2.0-cdh5.14.0-tests.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/hbase-protocol-1.2.0-cdh5.14.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hbase-1.2.0-cdh5.14.0/lib/metrics-core-2.2.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
ln -s /export/servers/hive-1.1.0-cdh5.14.0/lib/hive-hbase-handler-1.1.0-cdh5.14.0.jar /export/servers/spark-2.2.0-bin-2.6.0-cdh5.14.0/jars/
2.修改hive-site.xml配置文件
#修改node03上hive-site.xml配置文件
cd /export/servers/hive-1.1.0-cdh5.14.0/conf
vim hive-site.xml
#添加如下配置
<property>
<name>hive.zookeeper.quorum</name>
<value>node01,node02,node03</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>node01,node02,node03</value>
</property>
3.修改hive-env.sh配置文件
#修改node03上hive-env.sh的配置文件
cd /export/servers/hive-1.1.0-cdh5.14.0/conf
vim hive-env.sh
export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0
export HBASE_HOME=/export/servers/hbase-1.2.0-cdh5.14.0
export HIVE_CONF_DIR=/export/servers/hive-1.1.0-cdh5.14.0/conf
4.进入hive客户端并建表
①进入hive客户端(要确保Zookeeper集群、Hadoop集群、Hbase集群已启动)
cd /export/servers/hive-1.1.0-cdh5.14.0/
nohup bin/hive --service metastore &
nohup bin/hive --service hiveserver2 2>&1 &
bin/beeline
# !connect jdbc:hive2://node03:10000
②创建数据库和外部表
create database hbasedb;
use hbasedb;
create external table if not exists score(
id int,
cname string,
score int
)row format delimited fields terminated by '\t'
stored as textfile;
③准备数据
cd ~
vim hive_hbase.txt
1 zhangsan 80
2 lisi 60
3 wangwu 30
4 zhaoliu 70
④加载至表内
load data local inpath '/root/hive_hbase.txt' into table score;
select * from score;
5.创建hive管理表与hbase进行映射,hive管理表当中的数据,都会存储到hbase上面去。
create table hbase_score(
id int,
cname string,
score int
)stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = "cf:name,cf:score")
tblproperties("hbase.table.name" = "hbase_score");
# 建表成功后再hbase中也创建了一个hbase_score表
6.在hive客户端中向表中插入数据
insert overwrite table hbase_score select * from score;
select * from hbase_score;
#在hive中查询hbase_score结果为
+-----------------+--------------------+--------------------+--+
| hbase_score.id | hbase_score.cname | hbase_score.score |
+-----------------+--------------------+--------------------+--+
| 1 | zhangsan | 80 |
| 2 | lisi | 60 |
| 3 | wangwu | 30 |
| 4 | zhaoliu | 70 |
+-----------------+--------------------+--------------------+--+
在hbase客户端中查结果为
#查询命令为
scan 'hbase_score'
#查询结果为
ROW COLUMN+CELL
1 column=cf:name, timestamp=1567094300857, value=zhangsan
1 column=cf:score, timestamp=1567094300857, value=80
2 column=cf:name, timestamp=1567094300857, value=lisi
2 column=cf:score, timestamp=1567094300857, value=60
3 column=cf:name, timestamp=1567094300857, value=wangwu
3 column=cf:score, timestamp=1567094300857, value=30
4 column=cf:name, timestamp=1567094300857, value=zhaoliu
4 column=cf:score, timestamp=1567094300857, value=70
需求二:创建hive外部表,映射HBase中的表模型
1.进入HBase客户端,创建表并手动插入加载一些数据
create 'hbase_hive_score',{ NAME =>'cf'}
put 'hbase_hive_score','1','cf:name','zhangsan'
put 'hbase_hive_score','1','cf:score', '95'
put 'hbase_hive_score','2','cf:name','lisi'
put 'hbase_hive_score','2','cf:score', '96'
put 'hbase_hive_score','3','cf:name','wangwu'
put 'hbase_hive_score','3','cf:score', '97'
2.进入hive客户端,按如下命令创建hive外部表
create external table hbase_to_hive_score(
id int,
name string,
score int
)stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" = ":key,cf:name,cf:score")
tblproperties("hbase.table.name" = "hbase_hive_score");