1. 第一个简单例子: Spout 将随机发送目前流行的编程语言, Console 输出 及 使用 File 保存出现的语言日志。

2. Topology 流程图

3. 程序结构

4. 程序示例
1. PrintWriterSpout: fieldsDeclarer.declare(new Fields("PrintWord"));
package com.john.learn.storm.ch01.spout;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
public class PrintWriterSpout extends BaseRichSpout {
private static final long serialVersionUID = 1L;
@Override
public void open(Map config, TopologyContext context, SpoutOutputCollector collector) {
this.spoutOutputCollector = collector;
}
@Override
public void nextTuple() {
if (!init) {
init = true;
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
}
}
String selectWord = PrintWords[RANDOM.nextInt(PrintWords.length)];
// 发射一个Word数值
this.spoutOutputCollector.emit(new Values(selectWord));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer fieldsDeclarer) {
// 定义emit values中数值名称,即字段名称,这里叫做PrintWord
// 用于下一个组件获取数值通过Field Name 类似于 HttpRequest getParameter(String name)
// Refer to PrintBolt 中tuple.getStringByField("PrintWord")
fieldsDeclarer.declare(new Fields("PrintWord"));
}
private SpoutOutputCollector spoutOutputCollector;
private static final String[] PrintWords = new String[10];
private static final Random RANDOM = new Random(System.currentTimeMillis());
private static boolean init = false;
static {
PrintWords[0] = "Java";
PrintWords[1] = "C";
PrintWords[2] = "Php";
PrintWords[3] = "VB";
PrintWords[4] = "Python";
PrintWords[5] = "Scala";
PrintWords[6] = "Groovy";
PrintWords[7] = "Go";
PrintWords[8] = "Javascript";
PrintWords[9] = "Clojure";
}
}
2. PrintBolt: outputFieldsDeclarer.declare(new Fields("WriteWord"));
package com.john.learn.storm.ch01.bolt;
import org.apache.storm.shade.org.eclipse.jetty.util.log.Log;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PrintBolt extends BaseBasicBolt {
private static final long serialVersionUID = 1L;
@Override
public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
String printWord = tuple.getStringByField("PrintWord");
Logger.info("【Print】 " + printWord);
// 传递给下一个Bolt
basicOutputCollector.emit(new Values(printWord));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("WriteWord"));
}
private static final Logger Logger = LoggerFactory.getLogger(PrintBolt.class);
}
3. WriterBolt
package com.john.learn.storm.ch01.bolt;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import org.apache.storm.shade.org.eclipse.jetty.util.log.Log;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class WriterBolt extends BaseBasicBolt {
private static final long serialVersionUID = 1L;
@Override
public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
try {
String writeWord = tuple.getStringByField("WriteWord");
if (writer == null) {
File file = getFile();
file.getParentFile().mkdirs();
writer = new FileWriter(file, false);
}
Logger.info("【Write 】 " + writeWord);
writer.write(writeWord);
writer.write("\n");
writer.flush();
} catch (IOException e) {
}
}
private File getFile() {
if (System.getProperty("os.name").toLowerCase().contains("windows")) {
return new File("C:/Storm/WriterBolt/" + this);
}
return new File("/user/local/temp/" + this);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
private static final Logger Logger = LoggerFactory.getLogger(WriterBolt.class);
private Writer writer;
}
4. BoltGroupType
package com.john.learn.storm.ch01.topology;
public enum BoltGroupType {
FieldsGrouping, ShuffleGrouping, AllGrouping,GlobalGrouping;
}
5. PrintWriterTopology
package com.john.learn.storm.ch01.topology;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.BoltDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import com.john.learn.storm.ch01.bolt.PrintBolt;
import com.john.learn.storm.ch01.bolt.WriterBolt;
import com.john.learn.storm.ch01.spout.PrintWriterSpout;
public class PrintWriterTopology {
public static class PrintWriterTopologyBuilder {
public PrintWriterTopologyBuilder(String topologyName) {
printWriterTopology = new PrintWriterTopology(topologyName);
}
public void setNumWorkers(int numWorkers) {
printWriterTopology.setNumWorkers(numWorkers);
}
public void setDebug(boolean debug) {
printWriterTopology.setDebug(debug);
}
public void setSpout(int parallelismSize) {
hasSpout = true;
printWriterTopology.setSpout(parallelismSize, parallelismSize * 1);
}
public void setPrintBolt(int parallelismSize, int numTasks) {
hasPrintBolt = true;
printWriterTopology.setPrintBolt(parallelismSize, numTasks);
}
public void setWriterBolt(int parallelismSize, int numTasks, BoltGroupType boltGroupType) {
this.hasWriterBolt = true;
printWriterTopology.setWriterBolt(parallelismSize, numTasks, boltGroupType);
}
protected void setWriterBolt(int parallelismSize, int numTasks) {
setWriterBolt(parallelismSize, numTasks, BoltGroupType.ShuffleGrouping);
}
public PrintWriterTopology createTopology() {
if (!this.hasSpout) {
throw new RuntimeException("Please set spout!");
}
if (!this.hasPrintBolt) {
throw new RuntimeException("Please set hasPrintBolt!");
}
if (!this.hasWriterBolt) {
throw new RuntimeException("Please set hasWriterBolt!");
}
return printWriterTopology;
}
private PrintWriterTopology printWriterTopology;
private boolean hasSpout = false;
private boolean hasPrintBolt = false;
private boolean hasWriterBolt = false;
}
protected PrintWriterTopology(String topologyName) {
this.topologyName = topologyName;
}
protected void setNumWorkers(int numOfWorker) {
this.config.setNumWorkers(numOfWorker);
}
protected void setDebug(boolean debug) {
config.setDebug(debug);
}
protected void setSpout(int parallelismSize, int numTasks) {
topologyBuilder.setSpout("PrintWriterSpout", new PrintWriterSpout(), parallelismSize).setNumTasks(numTasks);
}
protected void setPrintBolt(int parallelismSize, int numTasks) {
topologyBuilder.setBolt("PrintBolt", new PrintBolt(), parallelismSize).setNumTasks(numTasks)
.shuffleGrouping("PrintWriterSpout");
}
protected void setWriterBolt(int parallelismSize, int numTasks, BoltGroupType boltGroupType) {
BoltDeclarer boltDeclarer = topologyBuilder.setBolt("WriterBolt", new WriterBolt(), parallelismSize)
.setNumTasks(numTasks);
if (boltGroupType == BoltGroupType.FieldsGrouping) {
boltDeclarer.fieldsGrouping("PrintBolt", new Fields("WriteWord"));
return;
}
if (boltGroupType == BoltGroupType.AllGrouping) {
boltDeclarer.allGrouping("PrintBolt");
return;
}
if (boltGroupType == BoltGroupType.GlobalGrouping) {
boltDeclarer.globalGrouping("PrintBolt");
return;
}
boltDeclarer.shuffleGrouping("PrintBolt");
}
public void submitTopology() {
try {
StormSubmitter.submitTopology(topologyName, config, topologyBuilder.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
e.printStackTrace();
}
}
public void submitTopologyOnLocal() {
localCluster = new LocalCluster();
localCluster.submitTopology(topologyName, config, topologyBuilder.createTopology());
}
public void killTopology() {
localCluster.killTopology(topologyName);
localCluster.shutdown();
}
private Config config = new Config();
private TopologyBuilder topologyBuilder = new TopologyBuilder();
private LocalCluster localCluster;
private String topologyName;
}
6. PrintWriterToplogyOnSimpleConfig
package com.john.learn.storm.ch01.topology;
import com.john.learn.storm.ch01.topology.PrintWriterTopology.PrintWriterTopologyBuilder;
public class PrintWriterToplogyOnSimpleConfig {
public static void main(String[] args) throws InterruptedException {
PrintWriterTopologyBuilder printWriterTopologyBuilder = new PrintWriterTopologyBuilder("PrintWriterTopology1");
printWriterTopologyBuilder.setNumWorkers(1);
printWriterTopologyBuilder.setSpout(1);
printWriterTopologyBuilder.setPrintBolt(1, 1);
printWriterTopologyBuilder.setWriterBolt(1, 1);
PrintWriterTopology printWriterTopology = printWriterTopologyBuilder.createTopology();
printWriterTopology.submitTopologyOnLocal();
Thread.sleep(20000);
printWriterTopology.killTopology();
}
}

运行: 默认并行度都是1, StreamGrouping 是 Shuffle 随机模式
Print: 1一个worker 1一个 Executor

WriteBolt
、


本文介绍了一个基于Apache Storm的简单流处理实例,通过Spout组件发送流行编程语言名称,随后使用Bolt组件进行打印和文件记录操作。该示例涵盖了Topology构建、Spout与Bolt的实现、Stream Grouping配置等内容。

1万+

被折叠的 条评论
为什么被折叠?



