除了以上几种分组方式之外,我们还可以自己自定义分组,自己DIY去控制哪些Blot来接收Tuple;
实现自定义分组需要继承backtype.storm.grouping.CustomStreamGrouping 接口
我们现在业务中遇到一个问题想让用户的uid按照分段的规则grouping到对应的task上面,于是采用uid%k的方法将相同模值的记录在一个task进行业务处理,自己实现了ModStreamingGrouping,
代码如下:
package storm.starter;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import backtype.storm.grouping.CustomStreamGrouping;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Fields;
public class ModStreamGrouping implements CustomStreamGrouping {
private Map _map;
private TopologyContext _ctx;
private Fields _fields;
private List<Integer> _targetTasks;
public ModStreamGrouping(){
}
@Override
public void prepare(TopologyContext context, Fields outFields,
List<Integer> targetTasks) {
// TODO Auto-generated method stub
_ctx = context;
_fields = outFields;
_targetTasks = targetTasks;
}
@Override
public List<Integer> chooseTasks(List<Object> values) {
// TODO Auto-generated method stub
Long groupingKey = Long.valueOf( values.get(0).toString());
int index = (int) (groupingKey%(_targetTasks.size()));
return Arrays.asList(_targetTasks.get(index));
}
}
测试:
package storm.starter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
public class ModGroupingTest {
public static class TestUidSpout extends BaseRichSpout {
boolean _isDistributed;
SpoutOutputCollector _collector;
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
}
public void close() {
}
public void nextTuple() {
Utils.sleep(100);
final Random rand = new Random();
final int uid =rand.nextInt(100000000);
_collector.emit(new Values(uid));
}
public void ack(Object msgId) {
}
public void fail(Object msgId) {
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("uid"));
}
}
public static class modGroupBolt extends BaseRichBolt {
OutputCollector _collector;
String _ComponentId;
int _TaskId;
@Override
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
_collector = collector;
_ComponentId = context.getThisComponentId();
_TaskId = context.getThisTaskId();
}
@Override
public void execute(Tuple tuple) {
// _collector.emit(tuple, new Values(tuple.getString(0) + "!!!"));
System.out.println(_ComponentId+":"+_TaskId +"recevie :" + tuple.getInteger(0));
_collector.emit(new Values(tuple));
_collector.ack(tuple);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("uid"));
}
}
public static void main(String args[]){
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("uid", new TestUidSpout());
builder.setBolt("process", new modGroupBolt(), 10).customGrouping("uid", new ModStreamGrouping());
Config config = new Config();
config.setDebug(true);
config.setNumWorkers(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("test", config, builder.createTopology());
// Utils.sleep(30000);
// cluster.killTopology("test");
// cluster.shutdown();
}
}