先写个java的,近期会对照实现clojure版的 并提供clojure实现中宏的介绍
入口类
package jvm.storm.starter; import jvm.storm.starter.wordcount.SplitSentence; import jvm.storm.starter.wordcount.WordCount; import jvm.storm.starter.wordcount.WordCountSpout; import backtype.storm.Config; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.topology.InputDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; /** * @author guiqiangl E-mail:larry.lv.word@gmail.com * @version 创建时间:2011-11-24 下午04:40:26 * */ public class WordCountStart { public static void main (String[] args){ TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("1", new WordCountSpout(""), 5);//发射器 InputDeclarer fieldsGrouping = builder.setBolt("2", new SplitSentence(), 5); fieldsGrouping.fieldsGrouping("1", new Fields("word")); builder.setBolt("3", new WordCount(), 5) .fieldsGrouping("2", new Fields("word")); Config conf = new Config(); conf.setDebug(false); // 本地模式 // LocalCluster cluster = new LocalCluster(); // cluster.submitTopology("rolling-demo", conf, builder.createTopology()); //远程启动 conf.setNumWorkers(20); conf.setMaxSpoutPending(5000); try { StormSubmitter.submitTopology("rolling-demo", conf, builder.createTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } //结束 // cluster.killTopology("rolling-demo"); // cluster.shutdown(); } }
发射器
package jvm.storm.starter.wordcount;
import java.util.Map;
import java.util.Random;
import org.apache.log4j.Logger;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
/**
* @author guiqiangl E-mail:larry.lv.word@gmail.com
* @version 创建时间:2011-11-24 下午04:41:34
*
*/
public class WordCountSpout implements IRichSpout {
private static final long serialVersionUID = -620768344883063619L;
public static Logger LOG = Logger.getLogger(WordCountSpout.class);
SpoutOutputCollector _collector;
public WordCountSpout(String string) {
}
public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
}
public void close() {
}
public void nextTuple() {
String[] words = new String[] {"nathan", "mike", "jackson", "golda", "bertels"};
Random rand = new Random();
String word = words[rand.nextInt(words.length)];
_collector.emit(new Values(word));
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public void ack(Object msgId) {
}
public void fail(Object msgId) {
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public boolean isDistributed() {
return false;
}
}
单词拆分:
package jvm.storm.starter.wordcount;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* @author guiqiangl E-mail:larry.lv.word@gmail.com
* @version 创建时间:2011-11-24 下午04:48:29
*
*/
public class SplitSentence implements IRichBolt {
private static final long serialVersionUID = -424523368294777576L;
OutputCollector _collector;
public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
_collector = collector;
}
public void execute(Tuple tuple) {
String sentence = tuple.getString(0);
for(String word: sentence.split(" ")) {
_collector.emit(tuple, new Values(word));//anchoring
//_collector.emit(new Values(word));//unanchoring
}
_collector.ack(tuple);
}
public void cleanup() {
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
计数:
package jvm.storm.starter.wordcount;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
/**
* @author guiqiangl E-mail:larry.lv.word@gmail.com
* @version 创建时间:2011-11-24 下午04:56:13
*
*/
public class WordCount implements IRichBolt {
private static final long serialVersionUID = -6706714875516091987L;
public Map<String, Integer> counterMap = new HashMap<String, Integer>();
OutputCollector _collector;
BufferedWriter output = null;
public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) {
_collector = collector;
try {
output = new BufferedWriter(new FileWriter("/home/hadoop/桌面/wordcount.txt" , true));
} catch (IOException e) {
e.printStackTrace();
try {
output.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
public void execute(Tuple tuple) {
String sentence = tuple.getString(0);
Integer count = counterMap.get(sentence);
if(count == null){
count = 0;
}
count ++;
counterMap.put(sentence, count);
Iterator<String> iterator = counterMap.keySet().iterator();
while(iterator.hasNext()){
String next = iterator.next();
try {
System.out.print(next + ":" + counterMap.get(next) + " ");
output.write(next + ":" + counterMap.get(next) + " ");
output.flush();
} catch (IOException e) {
e.printStackTrace();
try {
output.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
System.out.println();
_collector.ack(tuple);
}
public void cleanup() {
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}