版本信息:
Hadoop2.7.7+HBase2.1.3
maven引入相关依赖:
<!-- hadoop相关 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.7</version>
</dependency>
<!-- hbase相关 -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>2.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>2.1.3</version>
</dependency>
初始化资源:
package com.cn.util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import java.io.IOException;
public class HBaseUtils {
public static Connection conn = null;
String hbhost ="192.168.4.238";
private static HBaseUtils instance = null;
/**
* 饿汉式的单例模式,没有线程安全问题
*
*
*/
public static synchronized HBaseUtils getInstance() {
if (null == instance) {
instance = new HBaseUtils();
}
return instance;
}
private HBaseUtils() {
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum",hbhost); //hbase 服务地址
configuration.set("hbase.zookeeper.property.clientPort", "2181"); //zookeeper端口号
try {
if(conn==null ) {
conn = ConnectionFactory.createConnection(configuration);
}
Admin admin = getAdmin();
//创建表
String tname1 ="t_target";
if (!admin.tableExists(TableName.valueOf(tname1))) {
TableName tableName = TableName.valueOf(tname1);
//int expireTime = 60*60*24*2;//两天
int expireTime=60*60*24*Integer.valueOf(CodeUtil.delBmpDay);
// 表描述类构造器
TableDescriptorBuilder tdb = TableDescriptorBuilder.newBuilder(tableName);
// 列族描述器构造器
ColumnFamilyDescriptorBuilder cdb;
// 获得列描述器
ColumnFamilyDescriptor cfd;
//列族集合(每个cell值均会存储一次列族名称,所以列名称应尽量简短,避免占用过多的存储空间)
String[] columnFamilys = { "t"};
for (String columnFamily : columnFamilys) {
//setTimeToLive 设置过期时间(单位:S)
//setBloomFilterType 开启BloomFilter,可提高查询效率
//setCompressionType(Compression.Algorithm.LZO) 设置压缩
cdb = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily)).setTimeToLive(expireTime).setBloomFilterType(BloomType.ROW);
cfd = cdb.build();
// 添加列族
tdb.setColumnFamily(cfd);
}
// 获得表描述器
TableDescriptor td = tdb.build();
// 根据rowkey特征设置预分区(这里的方法只是我根据自己的rowkey设置的预分区)
byte[][] splitKeys = new byte[][] {Bytes.toBytes("1000000000"),Bytes.toBytes("2000000000"),Bytes.toBytes("3000000000"), Bytes.toBytes("4000000000")
, Bytes.toBytes("5000000000"), Bytes.toBytes("6000000000"), Bytes.toBytes("7000000000"), Bytes.toBytes("8000000000"), Bytes.toBytes("9000000000")};
admin.createTable(td,splitKeys);
System.out.println(tname1+" 表创建成功!");
}else{
System.out.println(tname1+" 表已存在!");
}
//建完表关闭admin
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public Table getTable(String tableName){
Table table = null;
try {
TableName tname = TableName.valueOf(tableName);
table =(Table) conn.getTable(tname);
} catch (IOException e) {
e.printStackTrace();
}
return table;
}
public Admin getAdmin() {
Admin admin =null;
try {
admin = conn.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
return admin;
}
}
由于Connection是线程安全的,所以使用单例获取,一个进程共用一个Connection对象。在该类中还加入了获取Table和Admin对象的方法,由于Table和Admin不是线程安全的,所以在不同的线程中应使用单独的Table和Admin对象,随用随取,用完后必须close。
插入数据:
//单个插入
Table table1 = HBaseUtils.getInstance().getTable("t_target");
//rowkey为1547436588
Put putOne = new Put(Bytes.toBytes("1547436588"));
//t为建表时指定的列族名称,aaa为该列族里的一个key,bbb为该key对应的value
putOne.addColumn(Bytes.toBytes("t"), Bytes.toBytes("aaa"), Bytes.toBytes("bbb"));
table1.put(putOne);
table1.close;
//批量插入
Table table2 = HBaseUtils.getInstance().getTable("t_target");
List<Put> putList = new ArrayList<Put>();
Put putOne = new Put(Bytes.toBytes("1547436586"));
Put putTwo = new Put(Bytes.toBytes("1547436587"));
Put putThree = new Put(Bytes.toBytes("1547436588"));
putOne.addColumn(Bytes.toBytes("t"), Bytes.toBytes("aaa"), Bytes.toBytes("bbb"));
putTwo.addColumn(Bytes.toBytes("t"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd"));
putThree.addColumn(Bytes.toBytes("t"), Bytes.toBytes("eee"), Bytes.toBytes("fff"));
putList.add(putOne);
putList.add(putTwo);
putList.add(putThree);
table2.put(putList);
putList=null;
table2.close();
根据rowkey查询对应的值
//单个rowkey查询
Table table1= HBaseUtils.getInstance().getTable("t_target");
//1547436588为一个rowkey
Get get = new Get(Bytes.toBytes("1547436588"));
Result set = table.get(get);
Cell[] cells = set.rawCells();
for (Cell cell : cells) {
String targetStr=Bytes.toString(CellUtil.cloneValue(cell));
logger.info("获取到此行的value=="+targetStr);
}
table1.close();
//批量查询
Table table2= HBaseUtils.getInstance().getTable("t_target");
List<Get> getList = new ArrayList();
List<String> rowkeyList = new ArrayList<>();
rowkeyList.add("1547436586");
rowkeyList.add("1547436587");
rowkeyList.add("1547436588");
for (String rowkey : rowkeyList){//把rowkey加到get里,再把get装到list中
Get get = new Get(Bytes.toBytes(rowkey));
getList.add(get);
}
Result[] results = table.get(getList);//重点在这,直接查getList<Get>
for (Result result : results){//对返回的结果集进行遍历
for (Cell kv : result.rawCells()) {
String value = Bytes.toString(CellUtil.cloneValue(kv));
logger.info("value=="+value);
}
}
table2.close();
删除某个rowkey及对应的值
Table table= HBaseUtils.getInstance().getTable("t_target");
Delete delete = new Delete(Bytes.toBytes("1547436588"));
table.delete(delete);
table。close();