Trident 统计
可参考
主要使用TupleCollectionGet()
,但还有问题,即每次drpc访问只能返回一个partition的数据,如何一次访问返回所有partition的数据,还没解决。
public static void main(String[] args) throws Exception {
Config conf = new Config();
conf.setMaxSpoutPending(20);
if (args.length == 0) {
LocalDRPC drpc = new LocalDRPC();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("wordCounter", conf, buildTopology(drpc));
for (int i = 0; i < 10; i++) {
drpc.execute("print", "");
//System.out.println("DRPC RESULT: " + drpc.execute("words", "cat the dog jumped"));
Thread.sleep(1000);
}
cluster.deactivate("wordCounter");
cluster.killTopology("wordCounter");
}
else {
conf.setNumWorkers(3);
StormSubmitter.submitTopology(args[0], conf, buildTopology(null));
}
}
public static StormTopology buildTopology(LocalDRPC drpc) {
FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"), new Values("four score and seven years ago"),
new Values("how many apples can you eat"), new Values("to be or not to be the person"));
spout.setCycle(true);
TridentTopology topology = new TridentTopology();
TridentState wordCounts = topology.newStream("spout1", spout)
.parallelismHint(16)
.each(
new Fields("sentence"),
new Split(),
new Fields("word"))
.groupBy(new Fields("word"))
.persistentAggregate(
new MemoryMapState.Factory(),
new Count(),
new Fields("count"))
.parallelismHint(16);
topology.newDRPCStream("print",drpc)
.stateQuery(
wordCounts,
new TupleCollectionGet(),
new Fields("word","count")
).each(new Fields("word","count"), new Print());
return topology.build();
}
// 打印filter
class Print extends BaseFilter {
private int partitionIndex;
private int numPartitions;
@Override
public void prepare(Map conf, TridentOperationContext context) {
this.partitionIndex = context.getPartitionIndex();
this.numPartitions = context.numPartitions();
}
@Override
public boolean isKeep(TridentTuple tuple) {
System.err.println(String.format("Partition idx: %s out of %s partitions got %s/%s", partitionIndex, numPartitions, tuple.get(0).toString(),tuple.get(1).toString()));
return true;
}
}
修改
1.local drpc 2 drpc
- 分流
stream1 to print
stream2 to hdfs
Fields hdfsFields = new Fields("field1", "field2");
FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPath("/trident")
.withPrefix("trident")
.withExtension(".txt");
RecordFormat recordFormat = new DelimitedRecordFormat()
.withFields(hdfsFields);
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
HdfsState.Options options = new HdfsState.HdfsFileOptions()
.withFileNameFormat(fileNameFormat)
.withRecordFormat(recordFormat)
.withRotationPolicy(rotationPolicy)
.withFsUrl("hdfs://localhost:54310");
StateFactory factory = new HdfsStateFactory().withOptions(options);
TridentState state = stream
.partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields());