hive和hbase整合使用hbase客服端API实现的
我这里使用的版本是apache-hive-0.13.1-bin.tar.gz和hbase-0.96.2-hadoop2.tar.gz
1.将hbase中lib目录中的ls hbase- 查出的jar包全部导入hive中的lib目录下,再将htrace-core-2.04.jar 查出来的包导入hive的lib目录下,如图
2.启动hive和hbase
3.建表
CREATE TABLE pv (
rowid INT,year int,month int,day int,page string,count int
) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:year,cf:month,cf:day,cf:page,cf:count")
TBLPROPERTIES ("hbase.table.name" = "pv");
4.创建数据表
a.创建mylog
create external table mylog(remote_addr string,remote_user string,
time_local string,request string,
status string,body_bytes_sent string,
http_referer string,http_user_agent string,
http_x_forwarded_for string)
partitioned by(year int,month int,day int)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as TEXTFILE;
b.添加分区
alter table mylog add partition(year=2017,month=08,day=30);
c.数据
192.168.22.1,-,30/Aug/2017:03:53:12,GET /mybigdata-0.0.1-SNAPSHOT/phone/huawei.html HTTP/1.0,304,0,-,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0,-
d.创建中间表
create table moddle_t(key int,year int,month int,day int,page string,count int) row format delimited fields terminated by ',';
f.中间表写入数据
insert into table moddle_t select 1,year,month,day,request,100 from mylog;
e.向pv表写入数据
1.set hive.hbase.bulk=true;
2.insert into table pv select *from moddle_t;
成功后为:红线框中的数据