一、Storm模型
重要概念:niubus、supervisor、worker、executor、task
二、安装Storm
部署一个storm集群
(1)安装Java 7和Pythong 2.6.6
(2)下载storm安装包,解压缩,重命名,配置环境变量
(3)修改storm配置文件
mkdir /var/storm
conf/storm.yaml
storm.zookeeper.servers:
- "ip"
- "ip"
storm.local.dir: "/mnt/storm"
nimbus.seeds: ["ip"]
slots.ports,指定每个机器上可以启动多少个worker,一个端口号代表一个worker
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
(4)启动storm集群和ui界面
一个节点,storm nimbus >/dev/null 2>&1 &
三个节点,storm supervisor >/dev/null 2>&1 &
三个节点,storm logviewer >/dev/null 2>&1 &
一个节点,storm ui >/dev/null 2>&1 &
(5)访问一下ui界面,8080端口
三、java编程模型
重要概念:topology、spout、blot、Tuple、stream、trident
1)topology是一个虚拟概念,包含spout和blot,被worker处理
2)spout 用来接收数据来源,作为storm流式处理中的开始端,负责数据的读入与发射到下游处理器上。
3)blot 用来做数据逻辑处理位于spout的下端
4)tuple做为流式处理中单条数据的传输对象 spout----->blot 中间的数据用tuple来传输
5)stream 也是一个虚拟概念,是tuple的集合,一条一条的tuple组成流
6)trident 以Batch的形式处理Stream
四、流分组的策略
1、fieldsGrouping(指定参数的分组策略)
2、globalGrouping
3、shuffleGrouping(负载均衡的分组策略)
4、noneGrouping
5、localOrShuffleGrouping
6、allGrouping
7、directGrouping
五、简单实现代码
package org.example;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
public class StormTest {
public static void main(String[] args) throws Exception {
TopologyBuilder topologyBuilder = new TopologyBuilder();
//第三个参数表示为使用的executor的数量
topologyBuilder.setSpout("helloSpolt",new StringSpolt(),2);
//对helloSpolt进行负载均衡的分组策略
topologyBuilder.setBolt("helloworld",new StringBolt(),2).setNumTasks(3).shuffleGrouping("helloSpolt");
Config config = new Config();
// 说明是在命令行执行,打算提交到storm集群上去
if(args != null && args.length > 0) {
config.setNumWorkers(3);
try {
StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
} else {
// 说明是在本地运行
config.setMaxTaskParallelism(20);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("helloworldTopology", config, topologyBuilder.createTopology());
try{
TimeUnit.SECONDS.sleep(6);
}catch (Exception e){
e.printStackTrace();
}
cluster.shutdown();
}
}
static class StringSpolt extends BaseRichSpout{
/**
* 做一些初始化的动作
* @param map
* @param topologyContext
* @param spoutOutputCollector
*/
private SpoutOutputCollector spoutOutputCollector;
private Random random;
@Override
public void open(Map<String, Object> conf, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.spoutOutputCollector = spoutOutputCollector;
this.random = new Random();
}
/**
* 发送数据,work会无限调用这个方法
*/
@Override
public void nextTuple() {
try{
TimeUnit.SECONDS.sleep(1);
}catch (Exception e){
e.printStackTrace();
}
String data = "hello world";
this.spoutOutputCollector.emit(new Values(data));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
Fields fields = new Fields("count");
outputFieldsDeclarer.declare(fields);
}
}
/**
* 构建一个blot
*/
static class StringBolt extends BaseRichBolt{
private OutputCollector outputCollector;
@Override
public void prepare(Map<String, Object> map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.outputCollector = outputCollector;
}
/**
* 执行计算
* @param tuple
*/
@Override
public void execute(Tuple tuple) {
String count = tuple.getStringByField("count");
if("hello world".equals(count)){
System.err.println("已经成功拿到: "+count);
}
outputCollector.emit(new Values("success"));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
Fields fields = new Fields("success");
outputFieldsDeclarer.declare(fields);
}
}
}