最近开始学storm,模仿storm入门教程中给出单词计数的例子改写了一个进行矩阵乘法运算的storm程序,用以入门storm,下面是代码及注释
首先构造一个Topology
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
public class TopologyGo {
public static void main(String args[]) throws InterruptedException {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("matrix-spout",new MatrixReader()); //用于读取存储矩阵文件的spout
builder.setBolt("matrix-dealer",new MatrixDealer()) //用于进行矩阵运算的bolt
.shuffleGrouping("matrix-spout"); //用随机数据流组进行连接
Config conf = new Config();
conf.put("matrixFile", "/home/nfs/input/storm/matrixFile"); //在conf中存入要读取的文件地址
conf.setDebug(false); //debug设置
conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("deal-matrix", conf, builder.createTopology()); //提交Topology
Thread.sleep(100000); //等待100000ms后关闭storm(主要目的是测试)
cluster.shutdown();
}
}
spout负责读取数据,下面是作为spout的MatrixReader
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
public class MatrixReader extends BaseRichSpout{
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
public void ack(Object msgId) { //定义处理成功的应答方式
System.out.println("OK:"+msgId);
}
public void close() {}
public void fail(Object msgId) { //定义处理失败的应答方式
System.out.println("FAIL:"+msgId);
}
public void nextTuple() { //反复调用以读取数据
/**
* The nextuple it is called forever, so if we have been readed the file
* we will wait and then return
*/
if(completed){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
return;
}
String str;
BufferedReader reader = new BufferedReader(fileReader);//打开文件
try{
while((str = reader.readLine()) != null){
this.collector.emit(new Values(str),str);//发送数据给bolt
}
}catch(Exception e){
throw new RuntimeException("Error reading tuple",e);
}finally{
completed = true;
}
}
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) { //在nextTuple开始循环前调用,获取数据或者做一些准备工作
try {
this.fileReader = new FileReader(conf.get("matrixFile").toString()); //创建要读取文件的fileReader
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file ["+conf.get("wordFile")+"]");
}
this.collector = collector;
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("matrix"));
}
}
注:读取的文件格式如下:每行字符串存储两个矩阵,矩阵间用“|”分割,矩阵行之间用“;”分割,同一矩阵同一行的不同元素间用“,”分割
示例如下:1,3,4,5;4,4,5,7;1,3,6,8;2,4,5,7;3,5,6,3|1,2;2,3;5,6;5,4表示一个5x4和一个4x2的矩阵
使用bolt进行数据处理,在这个例子中MatrixDealer作为bolt
import java.util.*;
import java.io.*;
import java.util.regex.*;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class MatrixDealer extends BaseBasicBolt {
ArrayList<double[][]> record;
public void cleanup() { //在这个例子中这个函数用于在处理结束后输出结果
System.out.println("matrix result output");
for(int i = 0; i < this.record.size();i++)
{
double[][] tmp = this.record.get(i);
for(int j = 0; j < tmp.length; j++)
{
String t_line = "";
for(int k = 0; k < tmp[0].length; k++)
{
t_line = t_line + tmp[j][k]+"\t";
}
System.out.println(t_line);
}
System.out.println();
}
}
@Override
public void prepare(Map stormConf, TopologyContext context) { //用于在bolt开始处理数据前进行一些准备工作
this.record = new ArrayList<double[][]>();
}
public void execute(Tuple input, BasicOutputCollector collector) {
String sentence = input.getString(0);//从传来的Tuple中获取字符串
double[][] storm_calc = this.calc_multi(sentence); //调用方法进行矩阵乘法
this.record.add(storm_calc);
}
/**
* The bolt will only emit the field "word"
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
public double[][] getMatrixa(String m_txt) //从字符串中提取第一个矩阵
{
String[] sp_matrix = m_txt.split("\\|");
String[] row = sp_matrix[0].split("\\;");
int num_row = row.length;
String[] col = row[0].split("\\,");
int num_col = col.length;
double[][] m_a = new double[row.length][col.length];
for(int i = 0; i < row.length; i++)
{
String[] tmp = row[i].split("\\,");
for(int j = 0; j < tmp.length; j++)
{
m_a[i][j] = Double.valueOf(tmp[j]);
}
}
return m_a;
}
public double[][] getMatrixb(String m_txt) //从字符串中提取第二个矩阵
{
String[] sp_matrix = m_txt.split("\\|");
String[] row = sp_matrix[1].split("\\;");
int num_row = row.length;
String[] col = row[0].split("\\,");
int num_col = col.length;
double[][] m_b = new double[row.length][col.length];
for(int i = 0; i < row.length; i++)
{
String[] tmp = row[i].split("\\,");
for(int j = 0; j < tmp.length; j++)
{
m_b[i][j] = Double.valueOf(tmp[j]);
}
}
return m_b;
}
public double[][] m_multi(double[][] m_a, double[][] m_b) //对两个矩阵进行乘法运算
{
int raw = m_a.length;
double[] tmp = m_b[0];
int col = tmp.length;
double[][] result = new double[raw][col];
for(int i = 0; i < raw; i++)
{
for(int j = 0; j < col; j++)
{
double[] tmp_a = m_a[i];
double[] tmp_b = new double[tmp_a.length];
for(int k = 0; k < tmp_b.length; k++)
{
tmp_b[k] = m_b[k][j];
}
double tmp_val = 0;
for(int k = 0 ; k < tmp_b.length; k++)
{
tmp_val = tmp_val + tmp_a[k]*tmp_b[k];
}
result[i][j] = tmp_val;
}
}
return result;
}
public double[][] calc_multi(String m_line)//从字符串中提去两个矩阵并进行乘法运算
{
double[][] m_a = this.getMatrixa(m_line);
double[][] m_b = this.getMatrixb(m_line);
double[][] result = this.m_multi( m_a, m_b);
return result;
}
}