Hbase pig相关
数据仓库 数据集市
edw odb adb
hadoop ecosystem
分布式搜索引擎「Elasticsearch」、
分布式文件系统「HDFS」、
分布式消息队列「Kafka」、
缓存数据库「Redis」等等…
HBASE 结构
Hbase 读取过程
Hbase 表结构 存储结构 关系型数据库(对比)
关系型数据库对比 增加新的列,占用更多的存储空间,可维护多个版本 version
dfs存储目录
hbase hadoop 版本
chown hadoop:hadoop -R
./hive --service metastore
./schematool -dbType mysql -initSchema
./hive --service metastore
hdfs namenode -format
create database wk110;
show databases;
hbase-site.xml 自带的zookeeper
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.region.replica.replication.enabled</name>
<value>true</value>
</property>
<property>
<name>hbase.replication</name>
<value>true</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>./tmp</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/hadoop/app/hbase-1.7.1/zk_data</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
</configuration>
hbase集群搭建
1.上传hbase安装包
2.解压
3.配置hbase集群,要修改3个文件(首先zk集群已经安装好了)
注意:要把hadoop的hdfs-site.xml和core-site.xml 放到hbase/conf下
3.1修改hbase-env.sh
export JAVA_HOME=/usr/java/jdk1.7.0_55
//告诉hbase使用外部的zk
export HBASE_MANAGES_ZK=false
vim hbase-site.xml
<configuration>
<!-- 指定hbase在HDFS上存储的路径 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://ns1/hbase</value>
</property>
<!-- 指定hbase是分布式的 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 指定zk的地址,多个用“,”分割 -->
<property>
<name>hbase.zookeeper.quorum</name>
<value>weekend04:2181,weekend05:2181,weekend06:2181</value>
</property>
</configuration>
vim regionservers
weekend03
weekend04
weekend05
weekend06
3.2拷贝hbase到其他节点
scp -r /weekend/hbase-0.96.2-hadoop2/ weekend02:/weekend/
scp -r /weekend/hbase-0.96.2-hadoop2/ weekend03:/weekend/
scp -r /weekend/hbase-0.96.2-hadoop2/ weekend04:/weekend/
scp -r /weekend/hbase-0.96.2-hadoop2/ weekend05:/weekend/
scp -r /weekend/hbase-0.96.2-hadoop2/ weekend06:/weekend/
4.将配置好的HBase拷贝到每一个节点并同步时间。
5.启动所有的hbase
分别启动zk
./zkServer.sh start
启动hbase集群
start-dfs.sh
启动hbase,在主节点上运行:
start-hbase.sh
6.通过浏览器访问hbase管理页面
192.168.1.201:60010
7.为保证集群的可靠性,要启动多个HMaster
hbase-daemon.sh start master
http://192.168.25.129:60010/master-status#baseStats
table mygirls
‘mygirls’, {NAME => ‘base_info’, BLOOMFILTER => ‘ROW’, IN_MEMORY => ‘false’, VERSIONS => ‘3’, KEEP_DELETED_CELLS => ‘FALSE’, DATA_BLOCK_ENCODING => ‘NONE’, COMPRESSION => ‘NONE’, TTL => ‘FOREVER’, MIN_VERSIONS => ‘0’, BLOCKCACHE => ‘true’, BLOCKSIZE => ‘65536’, REPLICATION_SCOPE => ‘0’}, {NAME => ‘extra_info’, BLOOMFILTER => ‘ROW’, IN_MEMORY => ‘false’, VERSIONS => ‘1’, KEEP_DELETED_CELLS => ‘FALSE’, DATA_BLOCK_ENCODING => ‘NONE’, COMPRESSION => ‘NONE’, TTL => ‘FOREVER’, MIN_VERSIONS => ‘0’, BLOCKCACHE => ‘true’, BLOCKSIZE => ‘65536’, REPLICATION_SCOPE => ‘0’}
jps
[hadoop@master bin]$ jps
63057 HQuorumPeer
63124 HMaster
63511 Jps
63256 HRegionServer
Hbase shell 命令 get put scan list describe disable delete
进入hbase命令行
./hbase shell
#显示hbase中的表
list
#创建user表,包含info、data两个列族
create 'user', 'info1', 'data1'
create 'user', {NAME => 'info', VERSIONS => '3'}
#向user表中插入信息,row key为rk0001,列族info中添加name列标示符,值为zhangsan
put 'user', 'rk0001', 'info:name', 'zhangsan'
#向user表中插入信息,row key为rk0001,列族info中添加gender列标示符,值为female
put 'user', 'rk0001', 'info:gender', 'female'
#向user表中插入信息,row key为rk0001,列族info中添加age列标示符,值为20
put 'user', 'rk0001', 'info:age', 20
#向user表中插入信息,row key为rk0001,列族data中添加pic列标示符,值为picture
put 'user', 'rk0001', 'data:pic', 'picture'
#获取user表中row key为rk0001的所有信息
get 'user', 'rk0001'
#获取user表中row key为rk0001,info列族的所有信息
get 'user', 'rk0001', 'info'
获取user表中row key为rk0001,info列族的name、age列标示符的信息
get 'user', 'rk0001', 'info:name', 'info:age'
#获取user表中row key为rk0001,info、data列族的信息
get 'user', 'rk0001', 'info', 'data'
get 'user', 'rk0001', {COLUMN => ['info', 'data']}
get 'user', 'rk0001', {COLUMN => ['info:name', 'data:pic']}
获取user表中row key为rk0001,列族为info,版本号最新5个的信息
get 'user', 'rk0001', {COLUMN => 'info', VERSIONS => 2}
get 'user', 'rk0001', {COLUMN => 'info:name', VERSIONS => 5}
get 'user', 'rk0001', {COLUMN => 'info:name', VERSIONS => 5, TIMERANGE => [1392368783980, 1392380169184]}
#获取user表中row key为rk0001,cell的值为zhangsan的信息
get 'people', 'rk0001', {FILTER => "ValueFilter(=, 'binary:图片')"}
#获取user表中row key为rk0001,列标示符中含有a的信息
get 'people', 'rk0001', {FILTER => "(QualifierFilter(=,'substring:a'))"}
put 'user', 'rk0002', 'info:name', 'fanbingbing'
put 'user', 'rk0002', 'info:gender', 'female'
put 'user', 'rk0002', 'info:nationality', '中国'
get 'user', 'rk0002', {FILTER => "ValueFilter(=, 'binary:中国')"}
#查询user表中的所有信息
scan 'user'
#查询user表中列族为info的信息
scan 'user', {COLUMNS => 'info'}
scan 'user', {COLUMNS => 'info', RAW => true, VERSIONS => 5}
scan 'persion', {COLUMNS => 'info', RAW => true, VERSIONS => 3}
查询user表中列族为info和data的信息
scan 'user', {COLUMNS => ['info', 'data']}
scan 'user', {COLUMNS => ['info:name', 'data:pic']}
#查询user表中列族为info、列标示符为name的信息
scan 'user', {COLUMNS => 'info:name'}
#查询user表中列族为info、列标示符为name的信息,并且版本最新的5个
scan 'user', {COLUMNS => 'info:name', VERSIONS => 5}
#查询user表中列族为info和data且列标示符中含有a字符的信息
scan 'user', {COLUMNS => ['info', 'data'], FILTER => "(QualifierFilter(=,'substring:a'))"}
#查询user表中列族为info,rk范围是[rk0001, rk0003)的数据
scan 'people', {COLUMNS => 'info', STARTROW => 'rk0001', ENDROW => 'rk0003'}
#查询user表中row key以rk字符开头的
scan 'user',{FILTER=>"PrefixFilter('rk')"}
#查询user表中指定范围的数据
scan 'user', {TIMERANGE => [1392368783980, 1392380169184]}
#删除数据
删除user表row key为rk0001,列标示符为info:name的数据
delete 'people', 'rk0001', 'info:name'
删除user表row key为rk0001,列标示符为info:name,timestamp为1392383705316的数据
delete 'user', 'rk0001', 'info:name', 1392383705316
#清空user表中的数据
truncate 'people'
#修改表结构
首先停用user表(新版本不用)
disable 'user'
#添加两个列族f1和f2
alter 'people', NAME => 'f1'
alter 'user', NAME => 'f2'
#启用表
enable 'user'
###disable 'user'(新版本不用)
#删除一个列族:
alter 'user', NAME => 'f1', METHOD => 'delete' 或 alter 'user', 'delete' => 'f1'
#添加列族f1同时删除列族f2
alter 'user', {NAME => 'f1'}, {NAME => 'f2', METHOD => 'delete'}
#将user表的f1列族版本号改为5
alter 'people', NAME => 'info', VERSIONS => 5
启用表
enable 'user'
#删除表
disable 'user'
drop 'user'
get 'person', 'rk0001', {FILTER => "ValueFilter(=, 'binary:中国')"}
get 'person', 'rk0001', {FILTER => "(QualifierFilter(=,'substring:a'))"}
scan 'person', {COLUMNS => 'info:name'}
scan 'person', {COLUMNS => ['info', 'data'], FILTER => "(QualifierFilter(=,'substring:a'))"}
scan 'person', {COLUMNS => 'info', STARTROW => 'rk0001', ENDROW => 'rk0003'}
scan 'person', {COLUMNS => 'info', STARTROW => '20140201', ENDROW => '20140301'}
scan 'person', {COLUMNS => 'info:name', TIMERANGE => [1395978233636, 1395987769587]}
delete 'person', 'rk0001', 'info:name'
alter 'person', NAME => 'ffff'
alter 'person', NAME => 'info', VERSIONS => 10
get 'user', 'rk0002', {COLUMN => ['info:name', 'data:pic']}
java API
HbasePut (1.7.1)
Configuration
HBaseConfiguration
HTableDescriptor
ConnectionFactory
Connection
Admin
HColumnDescriptor
table.addFamily()
admin.createTable(table)
Connection
Table connection.getTable()
Put
Put.addColumn(byte[] family, byte[] qualifier, byte[] value)
table.put(put)
package cn.itcast.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.sql.DriverManager;
import static org.apache.hadoop.hbase.client.ConnectionFactory.createConnection;
public class HbasePut {
public static Configuration config = null;
static {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "master:2181");
}
/**
* 创建Table
*
* @param tableName 表名
* @param family 列族
*/
public static void createTable(String tableName, String[] family) {
HTableDescriptor table = new HTableDescriptor(TableName.valueOf(tableName));
try (Connection connection = ConnectionFactory.createConnection(config)) {
System.out.println("成功连接ZK");
try (Admin admin = connection.getAdmin()) {
for (int i = 0; i < family.length; i++) {
table.addFamily(new HColumnDescriptor(family[i]));
}
if (admin.tableExists(TableName.valueOf(tableName))) {
System.out.println("Table Exists!!");
//System.exit(0);
} else {
admin.createTable(table);
System.out.println("Create Table Success!!! Table Name :[ " + tableName + " ]");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
//数据库的连接
public static Connection getConnection() throws Exception{
String url = "jdbc:mysql://127.0.0.1:3306/hbase";
String user = "root";
String password = "123456";
Class.forName("com.mysql.cj.jdbc.Driver");
Connection conn = (Connection) DriverManager.getConnection(url, user, password);
return conn;
}
/**
* 添加数据
*
* @param rowKey rowKey
* @param tableName 表名
* @param column 列名
* @param value 值
*/
public static void addData(String rowKey, String tableName, String[] column, String[] value) {
try (Connection connection = createConnection(config);
Table table = connection.getTable(TableName.valueOf(tableName))) {
Put put = new Put(Bytes.toBytes(rowKey));//存储到Hbase时都要转化为byte数组的形式
HColumnDescriptor[] columnFamilies = table.getTableDescriptor().getColumnFamilies();
for (int i = 0; i < columnFamilies.length; i++) {
String familyName = columnFamilies[i].getNameAsString();
if (familyName.equals("baseinfo")) {
for (int j = 0; j < column.length; j++) {
put.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(column[j]), Bytes.toBytes(value[j]));
}
}
table.put(put);
System.out.println("Add Data Success!!!-");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException {
String[] family = {"baseinfo"};
String tablename = "testapi";
HbasePut.createTable(tablename,family);
String[] column = {"name", "age", "email", "phone"};
String[] value={"zengxuefeng","24","1564665679@qq.com","18463101815"};
String rowkey = "student";
HbasePut.addData(
rowkey,tablename,column,value);
}
}
HbaseDemo 旧版api 0.96.2
HBaseAdmin
HTable
Put table.put(put);
Get
Result table.get(get)
KeyValue getFamily getQualifier getValue
HTablePool
HTableInterface
Scan
ResultScanner table.getScanner(scan)
Result
Delete table.delete(del)
package cn.itcast.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before;
import org.junit.Test;
public class HbaseDemo {
private Configuration conf = null;
@Before
public void init(){
conf = HBaseConfiguration.create();
//conf.set("hbase.zookeeper.quorum", "master");
}
@Test
public void testDrop() throws Exception{
HBaseAdmin admin = new HBaseAdmin(conf);
admin.disableTable("account");
admin.deleteTable("account");
admin.close();
}
@Test
public void testPut() throws Exception{
HTable table = new HTable(conf, "user");
Put put = new Put(Bytes.toBytes("rk0003"));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("liuyan"));
table.put(put);
table.close();
}
@Test
public void testGet() throws Exception{
//HTablePool pool = new HTablePool(conf, 10);
//HTable table = (HTable) pool.getTable("user");
HTable table = new HTable(conf, "user");
Get get = new Get(Bytes.toBytes("rk0001"));
//get.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
get.setMaxVersions(5);
Result result = table.get(get);
//result.getValue(family, qualifier)
for(KeyValue kv : result.list()){
String family = new String(kv.getFamily());
System.out.println(family);
String qualifier = new String(kv.getQualifier());
System.out.println(qualifier);
System.out.println(new String(kv.getValue()));
}
table.close();
}
@Test
public void testScan() throws Exception{
HTablePool pool = new HTablePool(conf, 10);
HTableInterface table = pool.getTable("user");
Scan scan = new Scan(Bytes.toBytes("rk0001"), Bytes.toBytes("rk0002"));
scan.addFamily(Bytes.toBytes("info"));
ResultScanner scanner = table.getScanner(scan);
for(Result r : scanner){
/**
for(KeyValue kv : r.list()){
String family = new String(kv.getFamily());
System.out.println(family);
String qualifier = new String(kv.getQualifier());
System.out.println(qualifier);
System.out.println(new String(kv.getValue()));
}
*/
byte[] value = r.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"));
System.out.println(new String(value));
}
pool.close();
}
@Test
public void testDel() throws Exception{
HTable table = new HTable(conf, "user");
Delete del = new Delete(Bytes.toBytes("rk0001"));
del.deleteColumn(Bytes.toBytes("data"), Bytes.toBytes("pic"));
table.delete(del);
table.close();
}
public static void main(String[] args) throws Exception {
createTable();
//createTable1();
}
public static void createTable() throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master");
conf.set("hbase.zookeeper.property.clientPort", "2181");
HBaseAdmin admin = new HBaseAdmin(conf);
//TableName name = TableName.valueOf("uperstar");
//HTableDescriptor td = new HTableDescriptor(name);
HTableDescriptor table_desc = new HTableDescriptor("uperstar");
HColumnDescriptor base_info = new HColumnDescriptor("base_info");
base_info.setMaxVersions(10);
table_desc.addFamily(base_info);
admin.createTable(table_desc);
admin.close();
}
public static void createTable1(){
String talbeName = "uperstar1";
String columnFamily = "base_info";
String columnFamily1 = "advance_info";
Connection connection = null;
Admin admin = null;
try {
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "master");
connection = ConnectionFactory.createConnection(config);
admin = connection.getAdmin();
boolean ifexists = admin.tableExists(TableName.valueOf(talbeName));
if (!ifexists) {
// 不存在则创建
HTableDescriptor tableDesc = new HTableDescriptor(
TableName.valueOf(talbeName));
HColumnDescriptor columnDesc = new HColumnDescriptor(
columnFamily);
HColumnDescriptor columnDesc1 = new HColumnDescriptor(
columnFamily1);
tableDesc.addFamily(columnDesc);
tableDesc.addFamily(columnDesc1);
admin.createTable(tableDesc);
System.out.println(talbeName + " create success!");
} else {
System.out.println(talbeName + " is exists:" + ifexists);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (admin != null) {
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (connection != null) {
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}