一、 Update操作验证
1. 创建关联副表,用于主键关联update主表
drop table if exists cust_tab01 ;
create table if not exists cust_tab01
(
cust_id string,
col_01_01 string,
col_01_02 string,
col_01_03 string,
col_01_04 string,
col_01_05 string
)
row format delimited fields terminated by',' lines terminated by '\n'
stored as textfile ;
drop table if exists cust_tab02 ;
create table if not exists cust_tab02
(
cust_id string,
col_02_01 string,
col_03_02 string,
col_04_03 string,
col_05_04 string,
col_06_05 string
)
row format delimited fields terminated by',' lines terminated by '\n'
stored as textfile ;
2. 模拟数据,上传HDFS文件
/*********************************************************************/
hdfs dfs -put /data/testfile/tab01.txt/user/hive/warehouse/cust_tab01
hdfs dfs -put /data/testfile/tab02.txt/user/hive/warehouse/cust_tab02
/*********************************************************************/
#查询验证副表内容
[bd-131:21000] >select * from cust_tab01 ;
Query: select * fromcust_tab01
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
| cust_id | col_01_01 | col_01_02 | col_01_03 | col_01_04 | col_01_05 |
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
| 1000000001 |val_01_01_1000000001_x | val_01_02_1000000001 | val_01_03_1000000001 |val_01_04_1000000001 | val_01_05_1000000001 |
| 1000000002 |val_01_01_1000000002_x | val_01_02_1000000002 | val_01_03_1000000002 |val_01_04_1000000002 | val_01_05_1000000002 |
| 1000000003 |val_01_01_1000000003_y | val_01_02_1000000003 | val_01_03_1000000003 |val_01_04_1000000003 | val_01_05_1000000003 |
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
[bd-131:21000] >select * from cust_tab02 ;
Query: select * fromcust_tab02
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
| cust_id |col_02_01 | col_03_02 | col_04_03 | col_05_04 | col_06_05 |
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
| 1000000001 | val_02_01_1000000001_8 |val_02_02_1000000001 | val_02_03_1000000001 | val_02_04_1000000001 |val_02_05_1000000001 |
| 1000000002 | val_02_01_1000000002_8 |val_02_02_1000000002 | val_02_03_1000000002 | val_02_04_1000000002 | val_02_05_1000000002|
| 1000000003 | val_02_01_1000000003_8 |val_02_02_1000000003 | val_02_03_1000000003 | val_02_04_1000000003 |val_02_05_1000000003 |
+------------+------------------------+----------------------+----------------------+----------------------+----------------------+
3. 用副表关联更新主表
虽然是update操作,但这个时候就不能直接用update语法了,因不支持,而是insert,底层是h base,会自动进行rowkey值的覆盖。
[bd-131:21000]> insert into hbase_cust_info( cust_id,col_01_01,col_01_02,col_02_01 )select a.cust_id,a.col_01_01,a.col_01_02,b.col_02_01 from cust_tab01 a,cust_tab02 b where a.cust_id=b.cust_id ;
再来验证主表数据是否更新:
[bd-131:21000] > select cust_id,col_01_01,col_01_02,col_01_03,col_02_01,col_02_02 from hbase_cust_info ;
Query: select cust_id,col_01_01,col_01_02,col_01_03,col_02_01,col_02_02 from hbase_cust_info
+------------+------------------------+----------------------+----------------------+------------------------+----------------------+
| cust_id |col_01_01 | col_01_02 | col_01_03 | col_02_01 | col_02_02 |
+------------+------------------------+----------------------+----------------------+------------------------+----------------------+
| 1000000001 | val_01_01_1000000001_x |val_01_02_1000000001 | val_01_03_1000000001 | val_02_01_1000000001_8 |val_02_02_1000000001 |
| 1000000002 | val_01_01_1000000002_x |val_01_02_1000000002 | val_01_03_1000000002 | val_02_01_1000000002_8 |val_02_02_1000000002 |
| 1000000003 | val_01_01_1000000003_y |val_01_02_1000000003 | val_01_03_1000000003 | val_02_01_1000000003_8 |val_02_02_1000000003 |
+------------+------------------------+----------------------+----------------------+------------------------+----------------------+
这个时候能看到数据已做了更新。
以上过程说明通过impala的sql语句可以写入、更新hbase的表,基本的功能可以满足。下一步是性能测试。