storm-01（5）

最新推荐文章于 2019-05-15 08:46:03 发布

原创最新推荐文章于 2019-05-15 08:46:03 发布 · 244 阅读

0 ·

CC 4.0 BY-SA版权

大数据专栏收录该内容

52 篇文章

订阅专栏

6.投影操作 - projection

package com.liming.projection;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;

public class TridentDemo {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		Stream s = topology.newStream("xx", new SentenceSpout())
		.each(new Fields("name"), new GenderFunc(),new Fields("gender"))
		.project(new Fields("name"));
		s.each(s.getOutputFields(), new PrintFilter());
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
	}
}

package com.liming.projection;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		collector.emit(values[index]);
		index = index+1 == values.length ? 0 : index+1;
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.projection;

import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class GenderFunc extends BaseFunction{

	@Override
	public void execute(TridentTuple tuple, TridentCollector collector) {
		String name = tuple.getStringByField("name");
		if("xiaoming".equals(name)){
			collector.emit(new Values("male"));
		}else if("xiaohua".equals(name)){
			collector.emit(new Values("female"));
		}else{
		}
	}
	
}

package com.liming.projection;

import java.util.Iterator;
import java.util.Map;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;

public class PrintFilter extends BaseFilter{

	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();

		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

测试如下：

二、重分区操作 - Repartitioning operations

Repartition操作可以改变tuple在各个task之上的划分。
	Repartition也可以改变Partition的数量。
	Repartition需要网络传输。

	重分区时的并发度设置：
		0.parallelismHint：设置重分区时的并发度，此方法将会将会向前寻找最近的一次重分区操作，设置这两个方法之间的所有操作的并发度为指定值，如果不设置所有重分区操作的并发度默认为1。
	重分区操作包括如下方式：
		**重分区方法如果不通过parallelismHint方法设置并发度则默认后续方法的并发度为1.
		1.shuffle：随机将tuple均匀地分发到目标partition里。
		2.broadcast：每个tuple被复制到所有的目标partition里，在DRPC中有用 - 你可以在每个partition上使用stateQuery。
		3.partitionBy：对每个tuple选择partition的方法是：(该tuple指定字段的hash值) mod (目标partition的个数)，该方法确保指定字段相同的tuple能够被发送到同一个partition。（但同一个partition里可能有字段不同的tuple）
		4.global：所有的tuple都被发送到同一个partition。
		5.batchGlobal：确保同一个batch中的tuple被发送到相同的partition中。
		6.patition：该方法接受一个自定义分区的function（实现backtype.storm.grouping.CustomStreamGrouping）

.parallelismHint：设置重分区时的并发度，此方法将会将会向前寻找最近的一次重分区操作，设置这两个方法之间的所有操作的并发度为指定值，如果不设置所有重分区操作的并发度默认为1。如下

package com.liming.Repartition;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;

public class TridentDemo {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		Stream s = topology.newStream("xx", new SentenceSpout())
		.parallelismHint(2);
		s.each(s.getOutputFields(), new PrintFilter());
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
	}
}

package com.liming.Repartition;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.Repartition;

import java.util.Iterator;
import java.util.Map;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;

public class PrintFilter extends BaseFilter{

	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();

		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

结果每句话都输出了两次：

案例9 - 改造如上案例，分别用不同分区处理xiaoming和xiaohua的发言，统计每个人说话的次数：

package com.liming.Repartition;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
/**
 * 从分区案例
 */
public class TridentDemo2 {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		//TODO
		Stream s = topology.newStream("xx", new SentenceSpout())
			.partitionBy(new Fields("name"))
			.partitionAggregate(new Fields("name"),new SentenceAggerator(),new Fields("name","count"))
			.parallelismHint(2)
			;
			
			s.each(s.getOutputFields(), new PrintFilter());
		
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
		
	}
}

package com.liming.Repartition;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.Repartition;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import backtype.storm.tuple.Values;
import storm.trident.operation.BaseAggregator;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class SentenceAggerator extends BaseAggregator<ConcurrentHashMap<String,Integer>> {

	@Override
	public ConcurrentHashMap<String,Integer> init(Object batchId, TridentCollector collector) {
		return new ConcurrentHashMap<String,Integer>();
	}

	@Override
	public void aggregate(ConcurrentHashMap<String,Integer> val, TridentTuple tuple, TridentCollector collector) {
		String name = tuple.getStringByField("name");
		val.put(name, val.containsKey(name) ? val.get(name)+1 : 1);
	}

	@Override
	public void complete(ConcurrentHashMap<String,Integer> val, TridentCollector collector) {
		for(Map.Entry<String, Integer> entry : val.entrySet()){
			collector.emit(new Values(entry.getKey(),entry.getValue()));
		}
	}

}

package com.liming.Repartition;


import java.util.Iterator;
import java.util.Map;


import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;


public class PrintFilter extends BaseFilter{


	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		buf.append("---partition_id:"+context.getPartitionIndex());
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();


		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

结果如下：可见是两个分区处理的

三、聚合操作 - Aggregation operations

Trident中有aggregate()和persistentAggregate()方法对流进行聚合操作。

	1.aggregate()
		在每个batch上独立的执行,进行全局的聚合
		当使用ReduceAggregator或者Aggregator聚合器时，流先被重新划分成一个大分区(仅有一个partition)，然后对这个partition做聚合操作；
		当使用CombinerAggregatr时，Trident首先对每个partition局部聚合，然后将所有这些partition重新划分到一个partition中，完成全局聚合。
		相比而言，CombinerAggregator更高效，推荐使用。
		可以推测出，aggregate()操作将会隐含的导致数据流重分区，分区之后只剩一个分区。

		例子： 
			使用aggregate()对一个batch操作得到一个全局的count
				mystream.aggregate(new Count(), new Fields("count"))

	2.persistentAggregate() 
		对所有batch中的所有tuple进行聚合，并将结果存入state源中。
		当使用ReduceAggregator或者Aggregator聚合器时，流先被重新划分成一个大分区(仅有一个partition)，然后对这个partition做聚合操作；
		当使用CombinerAggregator时，Trident首先对每个partition局部聚合，然后将所有这些partition重新划分到一个partition中，完成全局聚合。
		相比而言，CombinerAggregator更高效，推荐使用。
		**同在partitionAggregate中一样，aggregate中的聚合器也可以使用链式用法。但是，如果你将一个CombinerAggregator链到一个非CombinerAggregator后面，Trident就不能做局部聚合优化。

四、分组操作 - Operations on grouped streams

groupBy操作先对流中的指定字段做partitionBy操作，让指定字段相同的tuple能被发送到同一个partition里。然后在每个partition里根据指定字段值对该分区里的tuple进行分组。
	！！注意，不是忽略Batch，而是在批内再考虑指定字段，基于批内相同的指定字段做聚合。
	参看图：Group By原理

	如果你在一个grouped stream上做聚合操作，聚合操作将会在每个分组(group)内进行，而不是整个batch上。
	GroupStream类中也有persistentAggregate方法，该方法聚合的结果将会存储在一个key值为分组字段(即groupBy中指定的字段)的MapState中，这些还是在Trident state。

package com.liming.groupby;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
/**
 * 分组操作案例
 */
public class TridentDemo4 {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		//TODO
		Stream s = topology.newStream("xx", new SentenceSpout())
				.groupBy(new Fields("name"))
				.aggregate(new Count(),new Fields("count"))
			;
			
			s.each(s.getOutputFields(), new PrintFilter());
		
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
		
	}
}

package com.liming.groupby;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.groupby;

import java.util.Iterator;
import java.util.Map;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;

public class PrintFilter extends BaseFilter{

	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		buf.append("---partition_id:"+context.getPartitionIndex());
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();

		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

结果如下：

五、合并和连接 - Merges and joins

可以将几个stream汇总到一起，最简单的汇总方法是将他们合并成一个stream，这个可以通过TridentTopology中的merge方法完成

		例如：
			topology.merge(stream1, stream2, stream3);
			Trident指定新的合并之后的流中的字段为stream1中的字段。
			所有的流中的字段必须一致才能进行merge

	另一种汇总方法是使用join（连接，类似于sql中的连接操作）。
	下面的在stream1( ["key", "val1", "val2"] ) 和 stream2["x", "val1"]

	例如：
		topology.join(stream1, new Fields("key"), stream2, new Fields("x"), new Fields("key", "a", "b", "c"));
		上面这个连接操作使用”key”和”x”字段作为连接字段。由于输入流中有重叠的字段名（如上面的val1字段在stream1和stream2中都有），Trident要求指定输出的新流中的所有字段。输出流中的tuple要包含下面这些字段：

		1、连接字段列表：如本例中的输出流中的”key”字段对应stream1中的”key”和stream2中的”x”。
		2、来自所有输入流中的非连接字段列表，按照传入join方法中的输入流的顺序：如本例中的”a”和”b”对应于stream1中的”val1″ 和 “val2″，”c” 对应stream2中的 “val1″。

		**注意：要连接的stream必须在同一个Topology中

流一样的合并

package com.liming.merge;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;
/**
 * 合并操作案例
 */
public class TridentDemo5 {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		/**
		 * merge
		 */
		Stream s1 = topology.newStream("xx", new SentenceSpout());
		Stream s2 = topology.newStream("yy", new SentenceSpout());
		Stream s3 = topology.newStream("zz", new SentenceSpout());
		Stream s = topology.merge(s1,s2,s3);
		s.each(s.getOutputFields(), new PrintFilter());
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
		
	}
}

package com.liming.merge;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.merge;

import java.util.Iterator;
import java.util.Map;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;

public class PrintFilter extends BaseFilter{

	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		buf.append("---partition_id:"+context.getPartitionIndex());
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();

		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

结果如下：

流不一样的合并：

package com.liming.merge;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import storm.trident.Stream;
import storm.trident.TridentTopology;
/**
 * 合并操作案例
 */
public class TridentDemo5 {
	public static void main(String[] args) {
		//--创建topology
		TridentTopology topology = new TridentTopology();
		
		
		Stream s1 = topology.newStream("xx", new SentenceSpout());
		Stream s2 = topology.newStream("yy", new GenderSpout());
		Stream s = topology.join(s1, new Fields("name"), s2, new Fields("name"),new Fields("name","sentence","gender"));
		s.each(s.getOutputFields(), new PrintFilter());
		
		//--提交到集群中运行
		Config conf = new Config();
		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("MyTopology", conf, topology.build());
		
		//--运行10秒钟后杀死Topology关闭集群
		Utils.sleep(1000 * 10);
		cluster.killTopology("MyTopology");
		cluster.shutdown();
		
	}
}

package com.liming.merge;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class SentenceSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","i am so shuai"),
			new Values("xiaoming","do you like me"),
			new Values("xiaohua","i do not like you"),
			new Values("xiaohua","you look like fengjie"),
			new Values("xiaoming","are you sure you do not like me"),
			new Values("xiaohua","yes i am"),
			new Values("xiaoming","ok i am sure")
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.merge;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class GenderSpout extends BaseRichSpout{

	private SpoutOutputCollector collector = null;
	
	private Values [] values = {
			new Values("xiaoming","male"),
			new Values("xiaohua","female"),
	};
	
	private int index = 0;
	@Override
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void nextTuple() {
		if(index<values.length){
			collector.emit(values[index]);
			index++;
		}
		Utils.sleep(100);
	}
//	@Override
//	public void nextTuple() {
//		collector.emit(values[index]);
//		index = index+1 == values.length ? 0 : index+1;
//		Utils.sleep(100);
//	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("name","sentence");
		declarer.declare(fields);
	}

}

package com.liming.merge;

import java.util.Iterator;
import java.util.Map;

import backtype.storm.tuple.Fields;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;

public class PrintFilter extends BaseFilter{

	private TridentOperationContext context = null;
	@Override
	public void prepare(Map conf, TridentOperationContext context) {
		super.prepare(conf, context);
		this.context = context;
	}
	
	@Override
	public boolean isKeep(TridentTuple tuple) {
		StringBuffer buf = new StringBuffer();
		buf.append("---partition_id:"+context.getPartitionIndex());
		
		Fields fields = tuple.getFields();
		Iterator<String> it = fields.iterator();

		while(it.hasNext()){
			String key = it.next();
			Object value = tuple.getValueByField(key);
			buf.append("---"+key+":"+value+"---");
		}
		System.out.println(buf.toString());
		
		return true;
	}
	
}

结果如下：