import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.CombinerAggregator;
import org.apache.storm.trident.operation.ReducerAggregator;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.operation.TridentOperationContext;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import scala.Function2;
import scala.collection.parallel.IterableSplitter;
import scala.collection.parallel.ParIterableLike;
import scala.collection.parallel.ParIterableLike.Aggregate;
public class TopologyTrident {
/**
* @param args
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) {
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("sentence"), 3,
new Values("the cow jumped over the moon"),
// new Values("the man went to the store and bought some candy"),
// new Values("four score and seven years ago"),
new Values("the cow how many apples"));
spout.setCycle(true);
TridentTopology topology = new TridentTopology();
topology.newStream("spout",spout)
.each(new Fields("sentence"), new Split(), new Fields("word"))
.parallelismHint(2)
.partitionBy(new Fields("word")) //.shuffle()
.each(new Fields("word"),new Filter1())
.parallelismHint(5) //patition
.shuffle()
.each(new Fields("word"),new Filter2())
// .aggregate(new Fields("word"), new CombinerCount() , new Fields("combaggr"));
// .aggregate(new Fields("word"), new ReducerCount() , new Fields("combaggr"));
/*
* aggregate与groupBy进行搭配实现patition功能,aggregate后需要增加并发度即可实现并发parallelismHint
* 例如:.groupBy(new Fields("word")).aggregate(new Fields("word"),new Agg1(),new Fields("aggr1")).parallelismHint(2)
* partitionAggregate 无需使用groupBy,后面直接跟并发度即可实现分区并发聚合
*/
// .parallelismHint(4)
// .groupBy(new Fields("word"))
// .aggregate(new Fields("word"),new Agg1(),new Fields("aggr1"));
// .partitionAggregate(new Fields("word"),new Agg1(),new Fields("aggr1")).parallelismHint(2);
// .persistentAggregate(new MySqlStateFactory(), new ReducerCount(),new Fields("aggr1"));
.groupBy(new Fields("word")).persistentAggregate(new MySqlStateFactory(), new ReducerCount(), new Fields("aggr2"));
StormTopology stormTopology = topology.build();
LocalCluster cluster = new LocalCluster();
Config conf = new Config();
conf.setDebug(false);
cluster.submitTopology("test", conf,stormTopology);
}
@SuppressWarnings("rawtypes")
public static class Agg1 extends BaseAggregator<String>{
public String testStr = " " ;
/**
* 属于哪个batch
*/
private Object batchId;
/**
* 属于哪个分区
*/
private int partitionId;
/**
* 分区数量
*/
private int numPartitions;
/**
* 用来统计
*/
private Map<String,Integer> state;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map conf, TridentOperationContext context) {
state = new HashMap<String,Integer>();
partitionId = context.getPartitionIndex();
numPartitions = context.numPartitions();
}
@Override
public String init(Object arg0, TridentCollector arg1) {
return null;
}
@Override
public void aggregate(String arg0, TridentTuple arg1,TridentCollector arg2) {
System.out.println("[partitionId"+partitionId+"] "+Thread.currentThread().getId()+" "+arg1.getString(0));
testStr = testStr+arg1.getString(0);
// String str = arg1.getString(0);
// System.out.println(str);
}
@Override
public void complete(String arg0, TridentCollector arg1) {
System.out.println(testStr+Thread.currentThread().getId()+" end");
testStr = " " ;
arg1.emit(new Values(arg0));
}
}
public static class Filter1 extends BaseFilter {
public int partitionIndex = 0;
@Override
public void prepare(Map conf, TridentOperationContext context) {
this.partitionIndex = context.getPartitionIndex();
super.prepare(conf, context);
}
@Override
public boolean isKeep(TridentTuple arg0) {
// System.out.println("["+partitionIndex+"]"+arg0.getString(0));
return true;
}
}
public static class Filter2 extends BaseFilter {
public int partitionIndex = 0;
@Override
public void prepare(Map conf, TridentOperationContext context) {
this.partitionIndex = context.getPartitionIndex();
super.prepare(conf, context);
}
@Override
public boolean isKeep(TridentTuple arg0) {
// System.out.println("["+partitionIndex+"]"+arg0.getString(0));
return true;
}
}
public static class Split extends BaseFunction {
public void execute(TridentTuple tuple, TridentCollector collector) {
String sentence = tuple.getString(0);
for(String word: sentence.split(" ")) {
collector.emit(new Values(word));
}
}
}
/**
*
* @author yanjd
*
* 首先执行zero 赋值 value1 然后执行init 迭代 value2
*/
public static class CombinerCount implements CombinerAggregator<String>{
@Override
public String zero() {
return "start";
}
@Override
public String init(TridentTuple tuple) {
// System.out.println(tuple);
return "init";
}
@Override
public String combine(String val1, String val2) {
System.out.println("val1:" + val1);
System.out.println("val2:" + val2);
String s = val1 + val2;
System.out.println("val2 + val2:" + s);
System.out.println("=============== line ==================");
return s;
}
}
/**
*
* @author yanjd
*
*首先执行init 然后进行迭代
*curr永远是当前值
*/
public static class ReducerCount implements ReducerAggregator<Long>{
@Override
public Long init() {
return (long) 0;
}
@Override
public Long reduce(Long curr, TridentTuple tuple) {
System.out.println("tuple:"+ tuple);
System.out.println("str:"+ curr);
return curr+1 ;
}
}
}