HBase 依赖于 HDFS 做底层的数据存储,BigTable 依赖 Google GFS 做数据存储;
HBase依赖于 MapReduce做数据计算,BigTable 依赖 Google MapReduce 做数据计算
HBase 依赖于 ZooKeeper 做服务协调,BigTable 依赖 Google Chubby 做服务协调
1、shell操作
善于运用help查看帮助
help command查询相关命令的帮助
使用help会出现很多命令,其中namespace、ddl、dml是我们学习的重点!!!
Group name: ddl
Commands: alter, alter_async, alter_status, create, describe, disable, disable_all, drop,
drop_all, enable, enable_all, exists, get_table, is_disabled, is_enabled, list,
locate_region, show_filters
Group name: namespace
Commands: alter_namespace, create_namespace, describe_namespace,
drop_namespace, list_namespace, list_namespace_tables
Group name: dml
Commands: append, count, delete, deleteall, get, get_counter, g**e**t_splits, incr, put,
scan, truncate, truncate_preserve
DDL
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;
public class HbaseDDL {
static Configuration conf = null;
static HBaseAdmin admin = null;
static Connection conn = null;
// 创建namespace
public static void create_namespace() throws IOException {
NamespaceDescriptor ns = NamespaceDescriptor.create("test_api_1901").build();
// 参数namespacedescriptor
admin.createNamespace(ns);
}
public static void list_namespace() throws IOException {
NamespaceDescriptor[] nsDescriptors = admin.listNamespaceDescriptors();
for (NamespaceDescriptor ns : nsDescriptors) {
System.out.println(ns.getName());
}
}
public static void delete_namespace(String name) throws IOException {
admin.deleteNamespace(name);
System.out.println(name+"删除成功!");
}
//创建表
public static void create_table(String name,String...familys) throws IOException {
//判断是否存在
if (admin.tableExists(name)) {
System.out.println(name+"表已存在,请换个表名");
} else {
TableName tn = TableName.valueOf(name);
//参数 tablename对象,表名描述器
HTableDescriptor table = new HTableDescriptor(tn);
//一个表至少有一个列簇
//封装列簇描述器
for (String f : familys) {
HColumnDescriptor family = new HColumnDescriptor(f);
table.addFamily(family);
}
admin.createTable(table);
}
System.out.println("建表成功");
}
//查看表列表
public static void list_tables() throws IOException {
TableName[] tNames = admin.listTableNames();
for (TableName t : tNames) {
System.out.println(t.getNameAsString());
}
}
//删除表
public static void delete_table(String name) throws IOException {
if (admin.tableExists(name)) {
admin.deleteTable(name);
} else {
System.out.println(name+"不存在啊,请检查后再操作");
}
}
public static void main(String[] args) throws IOException {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
conn = ConnectionFactory.createConnection(conf);
// 获取ddl句柄
admin = (HBaseAdmin) conn.getAdmin();
//create_namespace();
//list_namespace();
//create_table("test_api_1901:table1", "f1","f2");
//list_tables();
delete_table("test1");
}
}
DML
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
public class HbaseDML {
static Configuration conf = null;
static HBaseAdmin admin = null;
static Connection conn = null;
static HTable table = null;
/*
* 数据导入 shell put "表名",”行键“,"列簇:列",”值“,ts
*/
// 插入单条数据
public static void insertOneData() throws IOException {
// put对象封装需要插入的数据是每一条对应一个put对象
Put put = new Put("rk001".getBytes());
put.addColumn("info1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
table.put(put);
}
// 插入多条数据
public static void insertOneDatas() throws IOException {
// put对象封装需要插入的数据是每一条对应一个put对象
for (int i = 0; i < 10000; i++) {
Put put = new Put(("rk" + i).getBytes());
put.addColumn("info1".getBytes(), ("name" + i).getBytes(), ("zhangsan" + i).getBytes());
table.put(put);
}
}
// 先将数据放在list集合中,即先把数据房子list内存中,最终将内存中的数据一次性提交
public static void insertListDatas() throws IOException {
ArrayList<Put> list = new ArrayList<Put>();
for (int i = 0; i < 10000; i++) {
Put p = new Put(("rk" + i).getBytes());
p.addColumn("info1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
list.add(p);
}
table.put(list);
}
// 批量数据导入,合并put次数,利用本地缓存(磁盘的)
public static void insertBufferDatas() throws IOException {
// 设置是否需要进行刷新提交put对象,默认是true,默认一条数据就提交一次
// 将参数值改为false,不会立即提交,达到设定值才会提交
table.setAutoFlushTo(false);
for (int i = 0; i < 10000; i++) {
Put put = new Put(("rk" + i).getBytes());
put.addColumn("info1".getBytes(), ("name" + i).getBytes(), ("zhangsan" + i).getBytes());
//不提交,提交到本地缓存中,没有连接hbaseServer
table.put(put);
//够10M提交一次,跟下面的条件是或者的关系,满足一个就能执行
table.setWriteBufferSize(10*1024*1024);
if (i%3000==0) {
//提交
table.flushCommits();
}
}
//将不够3000条的数据强制刷出
table.flushCommits();
}
//单条数据删除
public static void deleteOnedatas() throws IOException {
Delete delete=new Delete("rk1".getBytes());
delete.addColumn("info1".getBytes(), "name".getBytes());
table.delete(delete);
}
//多条数据删除
public static void deleteOnedata() throws IOException {
Delete delete1=new Delete("rk1".getBytes());
delete1.addColumn("info1".getBytes(), "name".getBytes());
table.delete(delete1);
Delete delete2=new Delete("rk2".getBytes());
delete2.addColumn("info1".getBytes(), "name".getBytes());
table.delete(delete2);
}
public static void main(String[] args) throws IOException {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
conn = ConnectionFactory.createConnection(conf);
// 一个Htable对象对应一个表
table = (HTable) conn.getTable(TableName.valueOf("test_api_1901:table1"));
insertOneDatas();
}
}
DML——Quary
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
public class HbaseDMLQuary {
static Configuration conf = null;
static HBaseAdmin admin = null;
static Connection conn = null;
static HTable table = null;
public static void main(String[] args) throws IOException {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
conn = ConnectionFactory.createConnection(conf);
// 一个Htable对象对应一个表
table = (HTable) conn.getTable(TableName.valueOf("test_api_1901:table1"));
}
//get查询单条
public static void getOneData() throws IOException {
//将需要查询的一条数据封装成一个get对象,参数rowkey
Get get = new Get("zhangsan_20150701_0001".getBytes());
//result的对象是封装的get查询结果,封装的是一行数据,包含多个列簇多个列多个单元格
Result result = table.get(get);
List<Cell> cells = result.listCells();
//循环遍历每一个单元格
//行键----列簇----列----时间戳 定位单元格
for (Cell cell : cells) {
System.out.print(new String(cell.getRow())+"\t");
System.out.print(new String(cell.getFamily())+"\t");
System.out.print(new String(cell.getQualifier())+"\t");
System.out.print(new String(cell.getValue())+"\t");
System.out.print(cell.getTimestamp());
System.out.println();
}
}
//批量查询
public static void getDatas() throws IOException {
ArrayList<Get> list = new ArrayList<Get>();
Get get1 = new Get("zhangsan_20150701_0001".getBytes());
list.add(get1);
Get get2 = new Get("zhangsan_20150701_0002".getBytes());
//参数1 列簇 参数2 列
get2.addColumn("base_info".getBytes(), "name".getBytes());
list.add(get2);
Result[] results = table.get(list);
for (Result r : results) { //每一个get提交的结果
List<Cell> cells = r.listCells();
for (Cell cell : cells) { //每一个单元格的结果
System.out.print(new String(cell.getRow())+"\t");
System.out.print(new String(cell.getFamily())+"\t");
System.out.print(new String(cell.getQualifier())+"\t");
System.out.print(new String(cell.getValue())+"\t");
System.out.print(cell.getTimestamp());
System.out.println();
}
}
}
/*
* scan 表扫描
*/
public static void scanData01() throws IOException {
//全表扫描
Scan scan = new Scan();
//扫描的结果集
ResultScanner results = table.getScanner(scan);
//获取标准的迭代器对象
Iterator<Result> rit = results.iterator();
while (rit.hasNext()) {
Result next = rit.next();
List<Cell> cells = next.listCells();
for (Cell cell : cells) {
System.out.print(new String(cell.getRow())+"\t");
System.out.print(new String(cell.getFamily())+"\t");
System.out.print(new String(cell.getQualifier())+"\t");
System.out.print(new String(cell.getValue())+"\t");
System.out.print(cell.getTimestamp());
System.out.println();
}
}
}
//指定起始行键扫描
public static void scanData02() throws IOException {
Scan scan = new Scan();
scan.setStartRow("rk01".trim().getBytes());
scan.setStopRow("zhangsan_20150701_0005".trim().getBytes());
//按照时间戳范围扫描
//scan.setTimeRange("", "");
//扫描的结果集
ResultScanner results = table.getScanner(scan);
//获取标准的迭代器对象
Iterator<Result> rit = results.iterator();
while (rit.hasNext()) {
Result next = rit.next();
List<Cell> cells = next.listCells();
for (Cell cell : cells) {
System.out.print(new String(cell.getRow())+"\t");
System.out.print(new String(cell.getFamily())+"\t");
System.out.print(new String(cell.getQualifier())+"\t");
System.out.print(new String(cell.getValue())+"\t");
System.out.print(cell.getTimestamp());
System.out.println();
}
}
}
public static void scanDataWithFilter() throws IOException {
Scan scan = new Scan();
/*
* final CompareOp familyCompareOp 比较规则
final ByteArrayComparable familyComparator 指定比较机制
*/
//对列簇进行过滤的
Filter f1 = new FamilyFilter(CompareOp.GREATER, new BinaryComparator("base_info".getBytes()));
scan.setFilter(f1);
ResultScanner results = table.getScanner(scan);
Iterator<Result> rit = results.iterator();
while (rit.hasNext()) {
Result next = rit.next();
List<Cell> cells = next.listCells();
for (Cell cell : cells) {
System.out.print(new String(cell.getRow())+"\t");
System.out.print(new String(cell.getFamily())+"\t");
System.out.print(new String(cell.getQualifier())+"\t");
System.out.print(new String(cell.getValue())+"\t");
System.out.print(cell.getTimestamp());
System.out.println();
}
}
}
}
本文介绍了HBase的基础知识,包括其依赖HDFS和MapReduce进行数据存储和计算,以及利用ZooKeeper进行服务协调。同时,文章提到了通过shell进行HBase的操作,如使用help获取帮助、执行DDL和DML,尤其是查询操作。
3951

被折叠的 条评论
为什么被折叠?



