参考文献:
https://blog.youkuaiyun.com/charry_a/article/details/79146593
https://www.cnblogs.com/zimo-jing/p/9102177.html
https://www.youkuaiyun.com/gather_23/MtTaEgzsNDc4NS1ibG9n.html
https://blog.51cto.com/12445535/2351982
https://blog.youkuaiyun.com/u014333671/article/details/81196341
Hbase分布式环境部署
选型
官方文档:http://hbase.apache.org/book.html#configuration
JDK版本
Hbase-Hadoop配套表
** 推荐版本组合:Hadoop(2.7.3) + Hbase(1.2.0) **
** 当前版本组合:Hadoop(2.6.4) + Hbase(1.2.0) **
Zookeeper版本
ZooKeeper 3.4.x is required.
Hadoop分布式部署
请参考:https://blog.youkuaiyun.com/waplys/article/details/89094587
Ntp时钟同步
请参考:https://blog.youkuaiyun.com/waplys/article/details/88865605
Hbase分布式部署
注意:Hbase依赖于Hadoop,所以Hbase建议部署在Hadoop集群内。
下载
@ 下载
官方地址:http://archive.apache.org/dist/hbase/1.2.0/
@ 解压
# tar -zxvf hbase-1.2.0-bin.tar.gz -C /home/hbase
环境变量
@ HBase 环境变量
[root]# vim /etc/profile
export HBASE_HOME=/home/hbase
export PATH=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HBASE_HOME/bin:$PATH
[root]# source /etc/profile
防火墙(可选)
@ 查看防火墙状态
[root]# service iptables status
@ 关闭防火墙
[root]# service iptables stop
@ 查看防火墙开机启动状态
[root]# chkconfig iptables --list
@ 关闭防火墙开机启动
[root]#chkconfig iptables off
HBase配置
1) 修改 conf/regionservers
[root]# vim regionservers
Node-1
Node-2
2) 修改 Hbase master 的备份节点 (可选)
[root]# vi backup-masters
Node-2
3) 修改 conf/hbase-site.xml
@ 增加配置
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<configuration>
<property>
<name>hbase.client.max.perserver.tasks</name>
<value>5</value>
</property>
<property>
<name>hbase.client.pause</name>
<value>100</value>
</property>
<property>
<name>hbase.client.scanner.caching</name>
<value>100</value>
</property>
<property>
<name>zookeeper.znode.rootserver</name>
<value>root-region-server</value>
</property>
<property>
<name>zookeeper.znode.parent</name>
<value>/hbase</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>Node-1,Node-2</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://Node-1:9000/hbase</value>
</property>
<property>
<name>hbase.client.retries.number</name>
<value>35</value>
</property>
<property>
<name>hbase.data.mask.enabled</name>
<value>false</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>5242880</value>
</property>
<property>
<name>hbase.fs.tmp.dir</name>
<value>hdfs://Node-1:9000/user/${user.name}/hbase-staging</value>
</property>
<property>
<name>hbase.client.prefetch</name>
<value>true</value>
</property>
<property>
<name>hbase.client.prefetch.limit</name>
<value>10</value>
</property>
<property>
<name>hbase.client.max.perregion.tasks</name>
<value>1</value>
</property>
<property>
<name>hbase.client.localityCheck.threadPoolSize</name>
<value>2</value>
</property>
<property>
<name>hbase.client.scanner.timeout.period</name>
<value>300000</value>
</property>
<property>
<name>hbase.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hbase.client.max.total.tasks</name>
<value>100</value>
</property>
<property>
<name>hbase.client.keyvalue.maxsize</name>
<value>10485760</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
</configuration>
【注意】
配置hbase.rootdir属性的时候,需要将hdfs的core-site.xml和hdfs-site.xml两个配置文件copy到hbase的conf或者lib目录下,否则regionserver不能识别cluster逻辑名称
[root]# cp $HADOOP_HOME/etc/hadoop/{core-site.xml,hdfs-site.xml} $HBASE_HOME/conf/
4) 修改 conf/hbase-env.sh
【可选】
@ 去掉 JAVA_HOME 前的 "#",修改为 Java 路径
export JAVA_HOME=/home/java
@ 去掉 HBASE_HEAPSIZE 前的 "#",修改为xMB(或保持默认值:1G)
export HBASE_HEAPSIZE=1G
@ 去掉 HBASE_MANAGES_ZK 前的 "#",将其设置为 false(使用独立的 ZooKeeper 集群)
export HBASE_MANAGES_ZK=false
4) 节点同步
@ Hbase 安装包远程同步到其它节点
[root]# cd /home;scp -r hbase yitaai-2:$PWD
启动集群
启动Hbase集群
需按照以下顺序来启动Hbase集群
1)启动Zookeeper
[root]# ./zkServer.sh start
2)启动HDFS和YARN
[root]# ./start-all.sh
3)启动Hbase
[root]# ./start-hbase.sh
4)jps查看各节点进程的状态
[root]# jps
7709 QuorumPeerMain -- ZOOKEEPER
7905 NameNode -- HADOOP
8010 DataNode
8193 JournalNode
6777 NodeManager
6190 SecondaryNameNode
7756 ResourceManager
8455 HMaster -- HBASE
2341 HRegionServer
web ui 查看HBase
@ 确认端口
jps查询HMaster的pid,使用netstat查询对应开启端口:
[root]# netstat -tanlp|grep $(jps|grep HMaster|awk '{print $1}')|grep ":::[0-9]"
@ 修改默认端口 (增加配置)
[root]# vim hbase-site.xml
<!-- hbase master的web ui页面的端口 -->
<property>
<name>hbase.master.info.port</name>
<value>16010</value>
<description>The port for the HBase Master web UI. Set to -1 if you do not want a UI instance run.</description>
</property>
@ web
http://10.47.85.214:16010/master-status (Master)
http://10.47.85.213:16010/master-status (Slave)
Hbase测试
hbase shell 测试
@ 连接hbase
# hbase shell
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version: 0.90.0, r1001068, Fri Sep 24 13:55:42 PDT 2010
hbase(main):001:0>
@ 建表
hbase(main):003:0> create 'test', 'cf'
0 row(s) in 1.2200 seconds
@ 查看表详情
hbase(main):003:0> describe 'test'
@ list表
hbase(main):003:0> list
test
1 row(s) in 0.0550 seconds
@ 写入数据
hbase(main):004:0> put 'test', 'row1', 'cf:a', 'value1'
0 row(s) in 0.0560 seconds
hbase(main):005:0> put 'test', 'row2', 'cf:b', 'value2'
0 row(s) in 0.0370 seconds
hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3'
0 row(s) in 0.0450 seconds
@ Scan表
② scan命令用于查看某个表的全部数据
hbase(main):007:0> scan 'test'
ROW COLUMN+CELL
row1 column=cf:a, timestamp=1288380727188, value=value1
row2 column=cf:b, timestamp=1288380738440, value=value2
row3 column=cf:c, timestamp=1288380747365, value=value3
3 row(s) in 0.0590 seconds
@ Get 行
① get命令,用于查看表的某一行数据;
hbase(main):008:0> get 'test', 'row1'
COLUMN CELL
cf:a timestamp=1288380727188, value=value1
1 row(s) in 0.0400 seconds
@ 删除数据
在HBase中用delete以及deleteall命令进行删除数据操作,它们的区别是:
① delete用于删除一个数据,是put的反向操作;
② deleteall操作用于删除一行数据。
delete 'test', 'row1', 'cf:a'
deleteall 'test', 'row1'
@ Disable、Drop表
hbase(main):012:0> disable 'test'
0 row(s) in 1.0930 seconds
hbase(main):013:0> drop 'test'
0 row(s) in 0.0770 seconds
java api 测试
IJ新建项目,下载hbase下lib目录至项目根目录,并加载所有jar包
修改ZK地址:hbase.zookeeper.quorum
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class javaapidemo3 {
// 声明静态配置
static Configuration conf = null;
static Connection connection = null;
static {
try{
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "10.47.85.214");
connection = ConnectionFactory.createConnection(conf);
}catch (Exception e){
e.printStackTrace();
}
}
/*
* 创建表
*
* @tableName 表名
*
* @family 列族列表
*/
public static void creatTable(String tableName, String[] family)
throws Exception {
Admin admin = connection.getAdmin();
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
for (int i = 0; i < family.length; i++) {
desc.addFamily(new HColumnDescriptor(family[i]));
}
if (admin.tableExists(TableName.valueOf(tableName))) {
System.out.println("table Exists!");
System.exit(0);
} else {
admin.createTable(desc);
System.out.println("create table Success!");
}
}
/*
* 为表添加数据(适合知道有多少列族的固定表)
*
* @rowKey rowKey
*
* @tableName 表名
*
* @column1 第一个列族列表
*
* @value1 第一个列的值的列表
*
* @column2 第二个列族列表
*
* @value2 第二个列的值的列表
*/
public static void addData(String rowKey, String tableName,
String[] column1, String[] value1, String[] column2, String[] value2)
throws IOException {
Put put = new Put(Bytes.toBytes(rowKey));// 设置rowkey
Table table = connection.getTable(TableName.valueOf(tableName));// HTabel负责跟记录相关的操作如增删改查等//
// 获取表
HColumnDescriptor[] columnFamilies = table.getTableDescriptor() // 获取所有的列族
.getColumnFamilies();
for (int i = 0; i < columnFamilies.length; i++) {
String familyName = columnFamilies[i].getNameAsString(); // 获取列族名
if (familyName.equals("article")) { // article列族put数据
for (int j = 0; j < column1.length; j++) {
put.addColumn(Bytes.toBytes(familyName),
Bytes.toBytes(column1[j]), Bytes.toBytes(value1[j]));
}
}
if (familyName.equals("author")) { // author列族put数据
for (int j = 0; j < column2.length; j++) {
put.addColumn(Bytes.toBytes(familyName),
Bytes.toBytes(column2[j]), Bytes.toBytes(value2[j]));
}
}
}
table.put(put);
System.out.println("add data Success!");
}
/*
* 根据rwokey查询
*
* @rowKey rowKey
*
* @tableName 表名
*/
public static Result getResult(String tableName, String rowKey)
throws IOException {
Get get = new Get(Bytes.toBytes(rowKey));
Table table = connection.getTable(TableName.valueOf(tableName));// 获取表
Result result = table.get(get);
for (Cell cell : result.listCells()) {
System.out.println("family:" + Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()));
System.out.println("qualifier:" + Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()));
System.out.println("value:" + Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
System.out.println("Timestamp:" + cell.getTimestamp());
System.out.println("-------------------------------------------");
}
return result;
}
/*
* 遍历查询hbase表
*
* @tableName 表名
*/
public static void getResultScann(String tableName) throws IOException {
Scan scan = new Scan();
ResultScanner rs = null;
Table table = connection.getTable(TableName.valueOf(tableName));
try {
rs = table.getScanner(scan);
for (Result r : rs) {
for (Cell cell : r.listCells()) {
System.out.println("row:" + Bytes.toString(cell.getRowArray(),cell.getRowOffset(),cell.getRowLength()));
System.out.println("family:"
+ Bytes.toString(cell.getFamilyArray(),cell.getFamilyOffset(),cell.getValueLength()));
System.out.println("qualifier:"
+ Bytes.toString(cell.getQualifierArray(),cell.getQualifierOffset(),cell.getQualifierLength()));
System.out
.println("value:" + Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
System.out.println("timestamp:" + cell.getTimestamp());
System.out
.println("-------------------------------------------");
}
}
} finally {
rs.close();
}
}
/*
* 遍历查询hbase表
*
* @tableName 表名
*/
public static void getResultScann(String tableName, String start_rowkey,
String stop_rowkey) throws IOException {
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(start_rowkey));
scan.setStopRow(Bytes.toBytes(stop_rowkey));
ResultScanner rs = null;
Table table = connection.getTable(TableName.valueOf(tableName));
try {
rs = table.getScanner(scan);
for (Result r : rs) {
for (Cell cell : r.listCells()) {
System.out.println("row:" + Bytes.toString(cell.getRowArray(),cell.getRowOffset(),cell.getRowLength()));
System.out.println("family:"
+ Bytes.toString(cell.getFamilyArray(),cell.getFamilyOffset(),cell.getValueLength()));
System.out.println("qualifier:"
+ Bytes.toString(cell.getQualifierArray(),cell.getQualifierOffset(),cell.getQualifierLength()));
System.out
.println("value:" + Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
System.out.println("timestamp:" + cell.getTimestamp());
System.out
.println("-------------------------------------------");
}
}
} finally {
rs.close();
}
}
/*
* 查询表中的某一列
*
* @tableName 表名
*
* @rowKey rowKey
*/
public static void getResultByColumn(String tableName, String rowKey,
String familyName, String columnName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列
Result result = table.get(get);
for (Cell cell : result.listCells()) {
System.out.println("family:"
+ Bytes.toString(cell.getFamilyArray(),cell.getFamilyOffset(),cell.getValueLength()));
System.out.println("qualifier:"
+ Bytes.toString(cell.getQualifierArray(),cell.getQualifierOffset(),cell.getQualifierLength()));
System.out
.println("value:" + Bytes.toString(cell.getValueArray(),cell.getValueOffset(),cell.getValueLength()));
System.out.println("timestamp:" + cell.getTimestamp());
System.out.println("-------------------------------------------");
}
}
/*
* 更新表中的某一列
*
* @tableName 表名
*
* @rowKey rowKey
*
* @familyName 列族名
*
* @columnName 列名
*
* @value 更新后的值
*/
public static void updateTable(String tableName, String rowKey,
String familyName, String columnName, String value)
throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(rowKey));
put.add(Bytes.toBytes(familyName), Bytes.toBytes(columnName),
Bytes.toBytes(value));
table.put(put);
System.out.println("update table Success!");
}
/*
* 查询某列数据的多个版本
*
* @tableName 表名
*
* @rowKey rowKey
*
* @familyName 列族名
*
* @columnName 列名
*/
public static void getResultByVersion(String tableName, String rowKey,
String familyName, String columnName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName));
get.setMaxVersions(5);
Result result = table.get(get);
for (KeyValue kv : result.list()) {
System.out.println("family:" + Bytes.toString(kv.getFamily()));
System.out
.println("qualifier:" + Bytes.toString(kv.getQualifier()));
System.out.println("value:" + Bytes.toString(kv.getValue()));
System.out.println("Timestamp:" + kv.getTimestamp());
System.out.println("-------------------------------------------");
}
/*
* List<?> results = table.get(get).list(); Iterator<?> it =
* results.iterator(); while (it.hasNext()) {
* System.out.println(it.next().toString()); }
*/
}
/*
* 删除指定的列
*
* @tableName 表名
*
* @rowKey rowKey
*
* @familyName 列族名
*
* @columnName 列名
*/
public static void deleteColumn(String tableName, String rowKey,
String falilyName, String columnName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Delete deleteColumn = new Delete(Bytes.toBytes(rowKey));
deleteColumn.addColumn(Bytes.toBytes(falilyName),
Bytes.toBytes(columnName));
table.delete(deleteColumn);
System.out.println(falilyName + ":" + columnName + "is deleted!");
}
/*
* 删除指定的列
*
* @tableName 表名
*
* @rowKey rowKey
*/
public static void deleteAllColumn(String tableName, String rowKey)
throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Delete deleteAll = new Delete(Bytes.toBytes(rowKey));
table.delete(deleteAll);
System.out.println("all columns are deleted!");
}
/*
* 删除表
*
* @tableName 表名
*/
public static void deleteTable(String tableName) throws IOException {
Admin admin = connection.getAdmin();
admin.disableTable(TableName.valueOf(tableName));
admin.deleteTable(TableName.valueOf(tableName));
System.out.println(tableName + "is deleted!");
}
public static void main(String[] args) throws Exception {
// 创建表
String tableName = "blog2";
String[] family = { "article", "author" };
creatTable(tableName, family);
// 为表添加数据
String[] column1 = { "title", "content", "tag" };
String[] value1 = {
"Head First HBase",
"HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data.",
"Hadoop,HBase,NoSQL" };
String[] column2 = { "name", "nickname" };
String[] value2 = { "nicholas", "lee" };
addData("rowkey1", "blog2", column1, value1, column2, value2);
addData("rowkey2", "blog2", column1, value1, column2, value2);
addData("rowkey3", "blog2", column1, value1, column2, value2);
// 遍历查询
getResultScann("blog2", "rowkey4", "rowkey5");
// 根据row key范围遍历查询
getResultScann("blog2", "rowkey4", "rowkey5");
// 查询
getResult("blog2", "rowkey1");
// 查询某一列的值
getResultByColumn("blog2", "rowkey1", "author", "name");
// 更新列
updateTable("blog2", "rowkey1", "author", "name", "bin");
// 查询某一列的值
getResultByColumn("blog2", "rowkey1", "author", "name");
// 查询某列的多版本
getResultByVersion("blog2", "rowkey1", "author", "name");
// 删除一列
deleteColumn("blog2", "rowkey1", "author", "nickname");
// 删除所有列
deleteAllColumn("blog2", "rowkey1");
// 删除表
deleteTable("blog2");
}
}
Hbase(HDFS)文件详解
1、/hbase/.META.
就是存储1中介绍的 META 表的存储路径。
2、/hbase/.archive
HBase 在做 Split或者 compact 操作完成之后,会将 HFile 移到.archive 目录中,然后将之前的 hfile 删除掉,该目录由 HMaster 上的一个定时任务定期去清理。
3、/hbase/.corrupt
存储HBase做损坏的日志文件,一般都是为空的。
4、/hbase/.hbck
HBase 运维过程中偶尔会遇到元数据不一致的情况,这时候会用到提供的 hbck 工具去修复,修复过程中会使用该目录作为临时过度缓冲。
5、/hbase/WAL
大家都知道 HBase 是支持 WAL(Write Ahead Log) 的,HBase 会在第一次启动之初会给每一台 RegionServer 在.log 下创建一个目录,若客户端如果开启WAL 模式,会先将数据写入一份到.log 下,当 RegionServer crash 或者目录达到一定大小,会开启 replay 模式,类似 MySQL 的 binlog。
6、/hbase/oldlogs
当.logs 文件夹中的 HLog 没用之后会 move 到.oldlogs 中,HMaster 会定期去清理。
7、/hbase/.snapshot
hbase若开启了 snapshot 功能之后,对某一个用户表建立一个 snapshot 之后,snapshot 都存储在该目录下,如对表test 做了一个 名为sp_test 的snapshot,就会在/hbase/.snapshot/目录下创建一个sp_test 文件夹,snapshot 之后的所有写入都是记录在这个 snapshot 之上。
8、/hbase/.tmp
当对表做创建或者删除操作的时候,会将表move 到该 tmp 目录下,然后再去做处理操作。
9、/hbase/hbase.id
它是一个文件,存储集群唯一的 cluster id 号,是一个 uuid。
10、/hbase/hbase.version
同样也是一个文件,存储集群的版本号,貌似是加密的,看不到,只能通过web-ui 才能正确显示出来。