一段时间内,大家都是自己在storm的节点中实现对hbase的操作,不管是普通的topo还是在trident中都是这样;不知道从那个版本起,在storm的压缩包中就多出了好几个jar包,把针对habse,mysql,mongodb等等的数据库的操作都写好了框架,不需要我们在去自己实现了;这里就先解析一下strom-habse这个jar包在普通的topo中是怎么实现的:
package org.apache.storm.hbase.bolt;
import org.apache.storm.Config;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.commons.lang.Validate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
import org.apache.storm.hbase.common.HBaseClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* @ClassName: AbstractHBaseBolt
* @Description: 抽象的HbaseBolt,在该bolt中不提供任何的功能,只是对一些公用的初始值进行了赋值;具体的功能由他的子类负责,一个子类负责保存,一个子类负责查询
*/
public abstract class AbstractHBaseBolt extends BaseRichBolt {//这里继承了storm的BaseRichBolt
private static final Logger LOG = LoggerFactory.getLogger(AbstractHBaseBolt.class);
protected OutputCollector collector;
//hbaseClient 不参与序列化,这是storm-hbase自己实现的HbaseClient,用来连接和访问hbase数据库;在文中后面的地方会说
protected transient HBaseClient hBaseClient;
//这个bolt所操作的hbase中的表名
protected String tableName;
//这是storm-hbase中定一个一个接口,作用就是把一个接收到的tuple转换为hbase对应的rowkey和column
protected HBaseMapper mapper;
protected String configKey;//会在子类中进行赋值,表示对hbase的配置信息在strom的map中的key值
/*使用strom-hbase的时候,在初始化topology的时候,在storm的conf中把habse的相关配置设置到一个map中,然后通过这个key在bolt中取到*/
public AbstractHBaseBolt(String tableName, HBaseMapper mapper) {
Validate.notEmpty(tableName, "Table name can not be blank or null");
Validate.notNull(mapper, "mapper can not be null");
this.tableName = tableName;
this.mapper = mapper;
}
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector collector) {
this.collector = collector;
final Configuration hbConfig = HBaseConfiguration.create();
Map<String, Object> conf = (Map<String, Object>)map.get(this.configKey);
if(conf == null) {//在使用storm-hbase的时候,需要在storm的conf中初始化一个map,用来存放hbase对应的配置信息
throw new IllegalArgumentException("HBase configuration not found using key '" + this.configKey + "'");
}
if(conf.get("hbase.rootdir") == null) {
LOG.warn("No 'hbase.rootdir' value found in configuration! Using HBase defaults.");
}
for(String key : conf.keySet()) {//设置相关的hbase的配置信息
hbConfig.set(key, String.valueOf(conf.get(key)));
}
Map<String, Object> hbaseConfMap = new HashMap<String, Object>(conf);//conf是一个持久化的map,所以这里复制一份给hbaseClient使用
//为了能够向后兼容,需要把TOPOLOGY_AUTO_CREDENTIALS赋值到hbase的配置信息中(有了这个参数nimbus给每个worker自己的凭证信息,然后worker这个凭证信息去访问habse;在开启kebers认证的时候用的吧)
hbaseConfMap.put(Config.TOPOLOGY_AUTO_CREDENTIALS, map.get(Config.TOPOLOGY_AUTO_CREDENTIALS));
this.hBaseClient = new HBaseClient(hbaseConfMap, hbConfig, tableName);//创建hbaseClient
}
@Override
public void cleanup() {//正确关闭hbaseClient
try {
hBaseClient.close();
} catch (IOException e) {
LOG.error("HBase Client Close Failed ", e);
}
}
}
上面的代码是一个抽象的hbase的bolt,和我们写bolt的时候一样,继承了BaseRichBolt(虽然我偶尔集成basebasicBolt);在代码中的prepare方法中主要做了下面的事情
1.初始化了collector,因为只有在运行到prepare方法的时候,才能够获得到collector的实例;
2.初始化了hBaseClient,在这里初始化hBaseCilent是因为他是transient修饰的,不会被序列化;所以在nimbus下发任务的时候该对象是null,只能在prepare中实例化,这么做的原因是hBaseClient在网络序列化传输的过程中存在很多无法反序列化的情况;(具体一点,就举一个例子,在hbaseClient中会建立客户端和集群之间的代理对象,进行rpc通信;所以nimbus不可能把他建立的rpc通信通过网络传输赋值给某个worker;因为就算worker拿到了这个client对象,也并没有建立rpc通信)
在构造方法中则赋值了可以在网络之间传输的tablename和mapper;
在cleanup方法中,主要负责安全地关闭hbaseClient;
下面来看他的两个子类,他们分别用来写入hbase和查询habse;主要逻辑就是实现execute方法了。
(1)向hbase写入数据的bolt,源码与注释如下:
package org.apache.storm.hbase.bolt;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.utils.BatchHelper;
import org.apache.storm.utils.TupleUtils;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
import org.apache.storm.hbase.common.ColumnList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.List;
import java.util.LinkedList;
/**
* Basic bolt for writing to HBase.向habse写入数据的最基础的bolt
*
* Note: Each HBaseBolt defined in a topology is tied to a specific table.注意:每一个habseBolt都与habse中一张特定的表绑定在一起
*
*/
public class HBaseBolt extends AbstractHBaseBolt {
private static final long serialVersionUID = 5638715724596618454L;
private static final Logger LOG = LoggerFactory.getLogger(HBaseBolt.class);
private static final int DEFAULT_FLUSH_INTERVAL_SECS = 1;//默认的保存周期为1秒
boolean writeToWAL = true;//是否写入到滚动日志(在hbase中滚动日志的存在能够确保在断电等紧急情况发生后,重新开机数据不丢失;但是会降低吞吐量)
List<Mutation> batchMutations;//要保存到hbase的数据,可能是存放数据,也可能是计数
int flushIntervalSecs = DEFAULT_FLUSH_INTERVAL_SECS;
int batchSize;//在batchHelper中的batchSize的大小,默认为0
BatchHelper batchHelper;//storm中自带的批处理工具
public HBaseBolt(String tableName, HBaseMapper mapper) {
super(tableName, mapper);
this.batchMutations = new LinkedList<>();
}
public HBaseBolt writeToWAL(boolean writeToWAL) {
this.writeToWAL = writeToWAL;
return this;
}
public HBaseBolt withConfigKey(String configKey) {
this.configKey = configKey;
return this;
}
public HBaseBolt withBatchSize(int batchSize) {
this.batchSize = batchSize;
return this;
}
public HBaseBolt withFlushIntervalSecs(int flushIntervalSecs) {
this.flushIntervalSecs = flushIntervalSecs;
return this;
}
@Override
public Map<String, Object> getComponentConfiguration() {//设置每隔flushIntervalSecs,发送一次心跳信息的tuple
return TupleUtils.putTickFrequencyIntoComponentConfig(super.getComponentConfiguration(), flushIntervalSecs);
}
@Override
public void execute(Tuple tuple) {
try {
if (batchHelper.shouldHandle(tuple)) {//接收到的tuple是需要处理的数据tuple(接收到的是业务tuple)
byte[] rowKey = this.mapper.rowKey(tuple);//通过mapper来获得hbase中对应的rowkey
ColumnList cols = this.mapper.columns(tuple);//把tuple中的列相关的数据映射为hbase的col(这个ColumnList是封装过的,封装了hbase的普通的列和计数列两种,后面会说)
/*这里正式将封装过的结果映射为一个mutation的list,然后直接调用hbase的API对这个list进行批量保存*/
List<Mutation> mutations = hBaseClient.constructMutationReq(rowKey, cols, writeToWAL? Durability.SYNC_WAL : Durability.SKIP_WAL);
batchMutations.addAll(mutations);//放到总的batchMutations中
batchHelper.addBatch(tuple);//将要批量保存的tuple缓存到batch中
}
if (batchHelper.shouldFlush()) {//要保存数据到hbase中,(当接收到tick tuple或者在上面的代码执行后,缓存的tuple达到了batchsize的时候执行)
this.hBaseClient.batchMutate(batchMutations);//调用hbaseClient进行批量保存
LOG.debug("acknowledging tuples after batchMutate");
batchHelper.ack();//批量确认
batchMutations.clear();//清空
}
} catch(Exception e){
batchHelper.fail(e);//处理失败
batchMutations.clear();//清空 这里还不太确定,(在失败以后,整个batch的tuple都会失败重发,所以要清空)
}
}
@Override
public void prepare(@SuppressWarnings("rawtypes") Map map, TopologyContext topologyContext, OutputCollector collector) {
super.prepare(map, topologyContext, collector);//这个必须有,要调用父类中的prepare方法做很多事情
this.batchHelper = new BatchHelper(batchSize, collector);//趋势化一个batchHelper,大小就是配置的batchsize
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
(2)从hbase中读取信息
package org.apache.storm.hbase.bolt;
import org.apache.commons.lang.Validate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
import org.apache.storm.hbase.bolt.mapper.HBaseProjectionCriteria;
import org.apache.storm.hbase.bolt.mapper.HBaseValueMapper;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.TupleUtils;
import com.google.common.collect.Lists;
/**
* Basic bolt for querying from HBase.
*
* Note: Each HBaseBolt defined in a topology is tied to a specific table.
*
*/
public class HBaseLookupBolt extends AbstractHBaseBolt {
private static final long serialVersionUID = 8253062226922790455L;
/*这个是用来将从habse查询到的result转化为tuple以及声明输出的field字段的,需要自己实现;后面介绍*/
private HBaseValueMapper rowToTupleMapper;
/*这个主要用于辅助get查询,里面分装了要查询的columFamily 和 columFamily:qualifier 两数据,以便于在get的时候可以只指定列族,也可以指定特定的列;后面会介绍*/
private HBaseProjectionCriteria projectionCriteria;
public HBaseLookupBolt(String tableName, HBaseMapper mapper, HBaseValueMapper rowToTupleMapper){
super(tableName, mapper);
Validate.notNull(rowToTupleMapper, "rowToTupleMapper can not be null");
this.rowToTupleMapper = rowToTupleMapper;
}
public HBaseLookupBolt withConfigKey(String configKey){
this.configKey = configKey;
return this;
}
public HBaseLookupBolt withProjectionCriteria(HBaseProjectionCriteria projectionCriteria) {
this.projectionCriteria = projectionCriteria;
return this;
}
@Override
public void execute(Tuple tuple) {
if (TupleUtils.isTick(tuple)) {//如果是心跳信息的tuple,那么直接忽略不处理
collector.ack(tuple);//直接确认这个tick tuple
return;
}
byte[] rowKey = this.mapper.rowKey(tuple);
Get get = hBaseClient.constructGetRequests(rowKey, projectionCriteria);//构建查询
try {
/*其实操作很简单 就是 table.get(get)操作;他这里看着有些不对劲的地方就是调用批量查询方法来查询单个的get;这是为了匹配他们自己封装的接口,也就是没有多封装一下*/
Result result = hBaseClient.batchGet(Lists.newArrayList(get))[0];
for(Values values : rowToTupleMapper.toValues(tuple, result)) {//把result转换为tuple,并逐一发送;
this.collector.emit(tuple, values);
}
this.collector.ack(tuple);//发送确认消息
} catch (Exception e) {
//出错处理
this.collector.reportError(e);
this.collector.fail(tuple);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
rowToTupleMapper.declareOutputFields(outputFieldsDeclarer);//声明输出field字段
}
}
接着是hbaseClient的代码注释
package org.apache.storm.hbase.common;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.storm.hbase.bolt.mapper.HBaseProjectionCriteria;
import org.apache.storm.hbase.security.HBaseSecurityUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.Map;
public class HBaseClient implements Closeable{
private static final Logger LOG = LoggerFactory.getLogger(HBaseClient.class);
private HTable table;//Hbase中的table对象
public HBaseClient(Map<String, Object> map , final Configuration configuration, final String tableName) {
try {
UserProvider provider = HBaseSecurityUtil.login(map, configuration);//这里使用了Hbase的安全登录方式来进行登录
this.table = provider.getCurrent().getUGI().doAs(new PrivilegedExceptionAction<HTable>() {
@Override
public HTable run() throws IOException {
return new HTable(configuration, tableName);//创建hbase中的table
}
});
} catch(Exception e) {
throw new RuntimeException("HBase bolt preparation failed: " + e.getMessage(), e);
}
}
public List<Mutation> constructMutationReq(byte[] rowKey, ColumnList cols, Durability durability) {//创建一个mutation的LIST用来保存/更新
List<Mutation> mutations = Lists.newArrayList();
if (cols.hasColumns()) {//如果有标准的column要保存
Put put = new Put(rowKey);//创建put对象
put.setDurability(durability);//设置持久行
for (ColumnList.Column col : cols.getColumns()) {
if (col.getTs() > 0) {//有时间戳的加时间戳
put.add(
col.getFamily(),
col.getQualifier(),
col.getTs(),
col.getValue()
);
} else {//没有时间戳的不加时间戳
put.add(
col.getFamily(),
col.getQualifier(),
col.getValue()
);
}
}
mutations.add(put);//把put缓存起来
}
if (cols.hasCounters()) {//如果有计数列要保存
Increment inc = new Increment(rowKey);//创建一个increment对象
inc.setDurability(durability);//设置持久性
for (ColumnList.Counter cnt : cols.getCounters()) {
inc.addColumn(
cnt.getFamily(),
cnt.getQualifier(),
cnt.getIncrement()
);
}
mutations.add(inc);
}
if (mutations.isEmpty()) {//如果即没有计数也没有标准的column,那就添加一个空的---也就是只保存rowkey
mutations.add(new Put(rowKey));
}
return mutations;
}
public void batchMutate(List<Mutation> mutations) throws Exception {//批量提交mutation列表中的插入和更新操作
Object[] result = new Object[mutations.size()];
try {
table.batch(mutations, result);
} catch (InterruptedException e) {
LOG.warn("Error performing a mutation to HBase.", e);
throw e;
} catch (IOException e) {
LOG.warn("Error performing a mutation to HBase.", e);
throw e;
}
}
public Get constructGetRequests(byte[] rowKey, HBaseProjectionCriteria projectionCriteria) {//创建查询操作
Get get = new Get(rowKey);//创建一个habse的get对象
if (projectionCriteria != null) {
for (byte[] columnFamily : projectionCriteria.getColumnFamilies()) {//获取要查询的列族
get.addFamily(columnFamily);
}
for (HBaseProjectionCriteria.ColumnMetaData columnMetaData : projectionCriteria.getColumns()) {//获取要查询的列
get.addColumn(columnMetaData.getColumnFamily(), columnMetaData.getQualifier());
}
}
return get;
}
public Result[] batchGet(List<Get> gets) throws Exception {//批量查好多个get
try {
return table.get(gets);
} catch (Exception e) {
LOG.warn("Could not perform HBASE lookup.", e);
throw e;
}
}
@Override
public void close() throws IOException {//关闭操作
table.close();
}
}
在hbaseClient中用到了HBaseSecurityUtil,其代码注释如下:
package org.apache.storm.hbase.security;
import static org.apache.storm.Config.TOPOLOGY_AUTO_CREDENTIALS;
import java.io.IOException;
import java.net.InetAddress;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class provides util methods for storm-hbase connector communicating
* with secured HBase.
*/
public class HBaseSecurityUtil {
private static final Logger LOG = LoggerFactory.getLogger(HBaseSecurityUtil.class);
public static final String STORM_KEYTAB_FILE_KEY = "storm.keytab.file";
public static final String STORM_USER_NAME_KEY = "storm.kerberos.principal";
private static UserProvider legacyProvider = null;
@SuppressWarnings("rawtypes")
public static UserProvider login(Map conf, Configuration hbaseConfig) throws IOException {
//Allowing keytab based login for backward compatibility.(为了向后兼容,允许使用keytab)
if (UserGroupInformation.isSecurityEnabled() && (conf.get(TOPOLOGY_AUTO_CREDENTIALS) == null ||
!(((List) conf.get(TOPOLOGY_AUTO_CREDENTIALS)).contains(AutoHBase.class.getName())))) {//如果开启了安全认证,并且不是自动的安全认证登录,那么进行登录
LOG.info("Logging in using keytab as AutoHBase is not specified for " + TOPOLOGY_AUTO_CREDENTIALS);
//insure that if keytab is used only one login per process executed
if(legacyProvider == null) {
synchronized (HBaseSecurityUtil.class) {
if(legacyProvider == null) {
//初始化一个userProvider
legacyProvider = UserProvider.instantiate(hbaseConfig);
/*下面就是一堆安全登录的代码,有兴趣自己看*/
String keytab = (String) conf.get(STORM_KEYTAB_FILE_KEY);
if (keytab != null) {
hbaseConfig.set(STORM_KEYTAB_FILE_KEY, keytab);
}
String userName = (String) conf.get(STORM_USER_NAME_KEY);
if (userName != null) {
hbaseConfig.set(STORM_USER_NAME_KEY, userName);
}
legacyProvider.login(STORM_KEYTAB_FILE_KEY, STORM_USER_NAME_KEY,
InetAddress.getLocalHost().getCanonicalHostName());
}
}
}
return legacyProvider;
} else {//如果布开启安全认证或者是自动安全认证,那么就直接实例化一个userProvider
return UserProvider.instantiate(hbaseConfig);
}
}
}
然后是HBaseMapper的代码注释:
package org.apache.storm.hbase.bolt.mapper;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.hbase.common.ColumnList;
import java.io.Serializable;
/**
* Maps a <code>org.apache.storm.tuple.Tuple</code> object
* to a row in an HBase table.
* 这是一个接口,需要自己实现,主要提供了两个方法,一个是如何从tuple中获取rowKey,一个是如何从tuple中获取columns
* 自带一个简单实现,在下面的代码中会看到
*/
public interface HBaseMapper extends Serializable {
/**
* Given a tuple, return the HBase rowkey.
*
* @param tuple
* @return
*/
byte[] rowKey(Tuple tuple);
/**
* Given a tuple, return a list of HBase columns to insert.
*
* @param tuple
* @return
*/
ColumnList columns(Tuple tuple);
}
他的简单实现如下:
package org.apache.storm.hbase.bolt.mapper;
import static org.apache.storm.hbase.common.Utils.toBytes;
import static org.apache.storm.hbase.common.Utils.toLong;
import org.apache.storm.hbase.common.ColumnList;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
public class SimpleHBaseMapper implements HBaseMapper {
private static final long serialVersionUID = 1L;
private String rowKeyField;//hbase中的rowKey在tuple中对应的field
private byte[] columnFamily;//列族
private Fields columnFields;//hbase中普通列对应的tuple中的字段数组
private Fields counterFields;//hbase中计数列对应的tuple中的字段数组
public SimpleHBaseMapper(){
}
public SimpleHBaseMapper withRowKeyField(String rowKeyField){//就是一堆set方法,不过链式调用起来很爽的感觉···
this.rowKeyField = rowKeyField;
return this;
}
public SimpleHBaseMapper withColumnFields(Fields columnFields){
this.columnFields = columnFields;
return this;
}
public SimpleHBaseMapper withCounterFields(Fields counterFields){
this.counterFields = counterFields;
return this;
}
public SimpleHBaseMapper withColumnFamily(String columnFamily){
this.columnFamily = columnFamily.getBytes();
return this;
}
@Override
public byte[] rowKey(Tuple tuple) {//返回rowKey的值
Object objVal = tuple.getValueByField(this.rowKeyField);
return toBytes(objVal);
}
@Override
public ColumnList columns(Tuple tuple) {
//这个columnList也是storm-hbase自定义的,见下面
ColumnList cols = new ColumnList();
if(this.columnFields != null){//如果普通的列的字段不为空,那么给添加一个hbase中的列数据{列族-->列-->值}
for(String field : this.columnFields){
cols.addColumn(this.columnFamily, field.getBytes(), toBytes(tuple.getValueByField(field)));
}
}
if(this.counterFields != null){//如果计数列的字段不为空,那么添加一个 计数值{列族-->列-->计数值(一定是long类型的)}
for(String field : this.counterFields){
cols.addCounter(this.columnFamily, field.getBytes(), toLong(tuple.getValueByField(field)));
}
}
return cols;
}
}
在很多地方用到了ColumnList,其代码注释如下:
package org.apache.storm.hbase.common;
import java.util.ArrayList;
import java.util.List;
/**
* Represents a list of HBase columns.代表一个hbase的column的列表
*
* There are two types of columns, <i>standard</i> and <i>counter</i>.有两种类型的column,分别是标准的和计数的
*
* Standard columns have <i>column family</i> (required), <i>qualifier</i> (optional),
* <i>timestamp</i> (optional), and a <i>value</i> (optional) values.
* 标准的列有column family(必须有),qualifier(可选),时间错(可选)以及值(可选) ;
*
* Counter columns have <i>column family</i> (required), <i>qualifier</i> (optional),
* and an <i>increment</i> (optional, but recommended) values.
*计数列有columnfamily(必须有),qualifier(可选)以及一个要增加的数值(可选,但是建议填上)
*
*
* Inserts/Updates can be added via the <code>addColumn()</code> and <code>addCounter()</code>
* methods.
*插入/更新操作可以通过addColumn()和addCounter()来添加到对象中
*
*/
public class ColumnList {
public static abstract class AbstractColumn {//一个抽象的column,提供了最基本的famliy和qualifier
byte[] family, qualifier;
AbstractColumn(byte[] family, byte[] qualifier){
this.family = family;
this.qualifier = qualifier;
}
public byte[] getFamily() {
return family;
}
public byte[] getQualifier() {
return qualifier;
}
}
public static class Column extends AbstractColumn {//标准的column
byte[] value;
long ts = -1;//默认的时间戳为-1
Column(byte[] family, byte[] qualifier, long ts, byte[] value){
super(family, qualifier);
this.value = value;
this.ts = ts;
}
public byte[] getValue() {
return value;
}
public long getTs() {
return ts;
}
}
public static class Counter extends AbstractColumn {//计数的column
long incr = 0;//默认计数值为0
Counter(byte[] family, byte[] qualifier, long incr){
super(family, qualifier);
this.incr = incr;
}
public long getIncrement() {
return incr;
}
}
private ArrayList<Column> columns;
private ArrayList<Counter> counters;
private ArrayList<Column> columns(){
if(this.columns == null){
this.columns = new ArrayList<Column>();
}
return this.columns;
}
private ArrayList<Counter> counters(){
if(this.counters == null){
this.counters = new ArrayList<Counter>();
}
return this.counters;
}
/**
* Add a standard HBase column.
*
* @param family
* @param qualifier
* @param ts
* @param value
* @return
*/
public ColumnList addColumn(byte[] family, byte[] qualifier, long ts, byte[] value){//添加一个标准的column到对象中
columns().add(new Column(family, qualifier, ts, value));
return this;
}
/**
* Add a standard HBase column
* @param family
* @param qualifier
* @param value
* @return
*/
public ColumnList addColumn(byte[] family, byte[] qualifier, byte[] value){
columns().add(new Column(family, qualifier, -1, value));
return this;
}
/**
* Add a standard HBase column given an instance of a class that implements
* the <code>IColumn</code> interface.
* @param column
* @return
*/
public ColumnList addColumn(IColumn column){
return this.addColumn(column.family(), column.qualifier(), column.timestamp(), column.value());
}
/**
* Add an HBase counter column.
*
* @param family
* @param qualifier
* @param incr
* @return
*/
public ColumnList addCounter(byte[] family, byte[] qualifier, long incr){//添加一个计数column
counters().add(new Counter(family, qualifier, incr));
return this;
}
/**
* Add an HBase counter column given an instance of a class that implements the
* <code>ICounter</code> interface.
* @param counter
* @return
*/
public ColumnList addCounter(ICounter counter){
return this.addCounter(counter.family(), counter.qualifier(), counter.increment());
}
/**
* Query to determine if we have column definitions.
*
* @return
*/
public boolean hasColumns(){
return this.columns != null;
}
/**
* Query to determine if we have counter definitions.
*
* @return
*/
public boolean hasCounters(){
return this.counters != null;
}
/**
* Get the list of column definitions.
*
* @return
*/
public List<Column> getColumns(){
return this.columns;
}
/**
* Get the list of counter definitions.
* @return
*/
public List<Counter> getCounters(){
return this.counters;
}
}
在ColumnList中有通过IColumn和ICounter来添加标准column和计数column的方法,对应的接口如下:
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.storm.hbase.common;
/**
* Interface definition for classes that support being written to HBase as
* a regular column.
*
*/
public interface IColumn {
byte[] family();
byte[] qualifier();
byte[] value();
long timestamp();
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.storm.hbase.common;
/**
* Interface definition for classes that support being written to HBase as
* a counter column.
*
*/
public interface ICounter {
byte[] family();
byte[] qualifier();
long increment();
}
HBaseProjectionCriteria的代码注释如下:
package org.apache.storm.hbase.bolt;
import org.apache.commons.lang.Validate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
import org.apache.storm.hbase.bolt.mapper.HBaseProjectionCriteria;
import org.apache.storm.hbase.bolt.mapper.HBaseValueMapper;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.TupleUtils;
import com.google.common.collect.Lists;
/**
* Basic bolt for querying from HBase.
*
* Note: Each HBaseBolt defined in a topology is tied to a specific table.
*
*/
public class HBaseLookupBolt extends AbstractHBaseBolt {
private static final long serialVersionUID = 8253062226922790455L;
/*这个是用来将从habse查询到的result转化为tuple以及声明输出的field字段的,需要自己实现;后面介绍*/
private HBaseValueMapper rowToTupleMapper;
/*这个主要用于辅助get查询,里面分装了要查询的columFamily 和 columFamily:qualifier 两数据,以便于在get的时候可以只指定列族,也可以指定特定的列;后面会介绍*/
private HBaseProjectionCriteria projectionCriteria;
public HBaseLookupBolt(String tableName, HBaseMapper mapper, HBaseValueMapper rowToTupleMapper){
super(tableName, mapper);
Validate.notNull(rowToTupleMapper, "rowToTupleMapper can not be null");
this.rowToTupleMapper = rowToTupleMapper;
}
public HBaseLookupBolt withConfigKey(String configKey){
this.configKey = configKey;
return this;
}
public HBaseLookupBolt withProjectionCriteria(HBaseProjectionCriteria projectionCriteria) {
this.projectionCriteria = projectionCriteria;
return this;
}
@Override
public void execute(Tuple tuple) {
if (TupleUtils.isTick(tuple)) {//如果是心跳信息的tuple,那么直接忽略不处理
collector.ack(tuple);//直接确认这个tick tuple
return;
}
byte[] rowKey = this.mapper.rowKey(tuple);
Get get = hBaseClient.constructGetRequests(rowKey, projectionCriteria);//构建查询
try {
/*其实操作很简单 就是 table.get(get)操作;他这里看着有些不对劲的地方就是调用批量查询方法来查询单个的get;这是为了匹配他们自己封装的接口,也就是没有多封装一下*/
Result result = hBaseClient.batchGet(Lists.newArrayList(get))[0];
for(Values values : rowToTupleMapper.toValues(tuple, result)) {//把result转换为tuple,并逐一发送;
this.collector.emit(tuple, values);
}
this.collector.ack(tuple);//发送确认消息
} catch (Exception e) {
//出错处理
this.collector.reportError(e);
this.collector.fail(tuple);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
rowToTupleMapper.declareOutputFields(outputFieldsDeclarer);//声明输出field字段
}
}
最后是HBaseValueMapper的代码注释:
package org.apache.storm.hbase.bolt.mapper;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.ITuple;
import org.apache.storm.tuple.Values;
import org.apache.hadoop.hbase.client.Result;
import java.io.Serializable;
import java.util.List;
//这是一个接口,主要定义了如何把hbase中的查询结果映射人tuple中的values以及如何声明tuple的输出字段
public interface HBaseValueMapper extends Serializable {
/**
*
* @param input tuple.
* @param result HBase lookup result instance.
* @return list of values that should be emitted by the lookup bolt.
* @throws Exception
*/
public List<Values> toValues(ITuple input, Result result) throws Exception;
/**
* declares the output fields for the lookup bolt.
* @param declarer
*/
void declareOutputFields(OutputFieldsDeclarer declarer);
}