【Storm】Storm事务案例实战

一、Storm事务案例实战之 ITransactionalSpout

案例背景:统计网站每天的访问量

1、ITransactionalSpout,事务性的spout类,继承ITransactionalSpout

package transaction1;

import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.transactional.ITransactionalSpout;
import backtype.storm.tuple.Fields;

/**
 * 事务性的spout类,继承ITransactionalSpout
 */
public class MyTxSpout implements ITransactionalSpout<MyMeta>{
	
	private static final long serialVersionUID = 1L;
	
	// 模拟数据源
	private Map<Long, String> dbMap = null;
	
	public MyTxSpout() {
		
		dbMap = new HashMap<Long, String>();
		Random random = new Random();
		
		// 登录的网站是taobao
		String hosts = "www.taobao.com";
		//每次登录的session id
		String[] sessionId = {"5GFBAT3D3100A7A7255027A70", "5X16BCA8823AC4BD9CD196A5D", 
				              "5CFBA5BD76BACF436ACA9DCC8", "5D16C3E0209C16DEAA28L1824",
				              "5I16CB309251CCF6CE6223BA1", "5C16BC4MB91B85661FE22F413",
				              "5D16C1F5191CF9371Y32B58CF", "5D16C7A886E2P2AE3EA29FC3E",
				              "5C3FBA728FD7D264B80769B23", "5B16C0F7215109AG43528BA2D",
				              "5N16C2FE51E5619C2A1244215", "5D16C1EB1C7A751AE03201C3F"};
		
		//登录的时间
		String[] times = {"2019-06-29 08:01:36", "2019-06-29 08:11:37", "2019-06-29 08:31:38", 
				          "2019-06-29 09:23:07", "2019-06-29 10:51:27", "2019-06-29 10:51:56",
				          "2019-06-29 11:01:07", "2019-06-29 11:01:20", "2019-06-29 11:45:30",
				          "2019-06-29 12:31:49", "2019-06-29 12:41:51", "2019-06-29 12:51:37", 
				          "2019-06-29 13:11:27", "2019-06-29 13:20:40", "2019-06-29 13:31:38",
				          };
		
		// 生成 15 行数据保存在 dbMap
		for (long i = 0; i < 15; i++) {
			dbMap.put(i, hosts + "\t" + sessionId[random.nextInt(12)] + "\t" + times[random.nextInt(15)] + "\n");
		}
	}
	
	@Override
	public Coordinator getCoordinator(Map conf, TopologyContext context) {
		return new MyCoordinator();
	}

	@Override
	public Emitter getEmitter(Map conf, TopologyContext context) {
		return new MyEmitter(this.dbMap);
	}
	
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// 第一列是tx,第二列是log
		declarer.declare(new Fields("tx", "log"));
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		// TODO Auto-generated method stub
		return null;
	}	
}

2、MyMeta,元数据类,元数据: 包含当前事务可以从哪个point进行重发数据,放在zookeeper中的,spout可以通过kryo从zookeeper中序列化和反序列化该元数据。

package transaction1;

import java.io.Serializable;

/**
 * 元数据
 * 必须要实现序列化接口
 */
public class MyMeta implements Serializable{
	
	private static final long serialVersionUID = 1L;
	
	private long beginPoint;   // 事务开始位置
	
	private int num;           // batch的tuple个数

	public long getBeginPoint() {
		return beginPoint;
	}

	public void setBeginPoint(long beginPoint) {
		this.beginPoint = beginPoint;
	}

	public int getNum() {
		return num;
	}

	public void setNum(int num) {
		this.num = num;
	}

	@Override
	public String toString() {
		return "MyMeta [beginPoint=" + beginPoint + ", num=" + num + "]";
	}
	
}

3、Coordinator,负责启动事务,生成元数据。接收2个参数,
txid:storm生成的事务ID,作为批次的唯一性标识
prevMetadata: 协调器生成的前一个事务元数据对象
元组一经返回,storm把myMata和txid一起保存在zookeeper,确保一旦发生故障,storm可以利用分发器重新发送批次;

只能有一个实例。

package transaction1;

import java.math.BigInteger;

import backtype.storm.transactional.ITransactionalSpout.Coordinator;
import backtype.storm.utils.Utils;

/**
 * Coordinator类
 */
public class MyCoordinator implements Coordinator<MyMeta>{
	
	private final static int BATCH_NUM = 5;        // 定义常量,1个事务包含5个bolt
	
	// 启动事务,生成元数据
	// txid是事务id,默认从0开始,prevMetadata是上一个元数据
	@Override
	public MyMeta initializeTransaction(BigInteger txid, MyMeta prevMetadata) {
		
		long beginPoint = 0;
		if (null == prevMetadata) {
			// 如果prevMetadata是null,说明是第一个事务,开始位置为0
			beginPoint = 0;
		} else {
			// 下一个事务的开始位置
			beginPoint = prevMetadata.getBeginPoint() + prevMetadata.getNum();
		}
		
		MyMeta myMeta = new MyMeta();         // 定义新的MyMeta
		myMeta.setBeginPoint(beginPoint);
		myMeta.setNum(BATCH_NUM);
		
		System.err.println("MyCoordinator - initializeTransaction, 启动一个事务: " + myMeta.toString());
		
		return myMeta;
	}

	@Override
	public boolean isReady() {
		Utils.sleep(200);         // 方便观察,休眠0.2秒
		System.err.println("MyCoordinator - isReady");
		return true;              // 返回true才能发射事务
	}

	@Override
	public void close() {
		// TODO Auto-generated method stub
		
	}
	
}

4、Emitter类,分发器,从数据源读取数据,并从数据流组发送数据,分发器可以为相同的事务id和事务元数据发送相同的批次,如果出现故障,storm发送相同的事务id和事务元数据,并确保批次已经重复过了,storm会在TransactionAttempt对象里为这样就知道批次已经重复过了。可以有多个实例。

package transaction1;

import java.math.BigInteger;
import java.util.Map;

import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.transactional.ITransactionalSpout.Emitter;
import backtype.storm.transactional.TransactionAttempt;
import backtype.storm.tuple.Values;

public class MyEmitter implements Emitter<MyMeta>{
	
	private Map<Long, String> dbMap = null;
	
	public MyEmitter(Map<Long, String> dbMap) {
		this.dbMap = dbMap;
	}
	
	@Override
	public void emitBatch(TransactionAttempt tx, MyMeta coordinatorMeta, BatchOutputCollector collector) {
		long beginPoint = coordinatorMeta.getBeginPoint();    // 获取本次事务的开始位置,处理的bolt数量
		int num = coordinatorMeta.getNum();
		System.err.println("MyEmitter - emitBatch");
		for (long i = beginPoint; i < beginPoint + num; i++) {
			// 如果为空,不发射
			if (null == dbMap.get(i)) {
				continue;
			}
			// 发射tuple,第一个必须是tx
			collector.emit(new Values(tx, dbMap.get(i)));
			System.err.println("MyEmitter - emitBatch: transactionId = " + tx.getTransactionId() + ", attemptId = " + tx.getAttemptId());
		}
	}

	@Override
	public void cleanupBefore(BigInteger txid) {
		
	}

	@Override
	public void close() {
		
	}

}

5、BaseTransactionalBolt类,execute 操作接收到的元组,整个批次的所有tuple执行完成后,再调用finishBatch方法。可以有多个BaseTransactionalBolt实例。

package transaction1;

import java.util.Map;

import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseTransactionalBolt;
import backtype.storm.transactional.TransactionAttempt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

// Processing阶段:多个batch可以并行计算
public class MyTransactionBolt extends BaseTransactionalBolt{
	
	private static final long serialVersionUID = 1L;
	
	BatchOutputCollector collector;
	TransactionAttempt id;
	private Integer count = 0;
	
	@Override
	public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt id) {
		this.collector = collector;
		this.id = id;
		// 打印txid和tupleid,方便观察
		System.err.println("MyTransactionBolt - prepare: transactionId = " + id.getTransactionId() + ", attemptId = " + id.getAttemptId());
		System.err.println("MyTransactionBolt - prepare: count = " + count);
	}

	@Override
	public void execute(Tuple tuple) {
		// emitter发射的每一行在这里进行处理,处理完交给finishbatch
		
		// getValue(0) 是事务
		TransactionAttempt tx = (TransactionAttempt) tuple.getValue(0);
		
		// tx包含TransactionId和AttemptId
		// TransactionId是事务id,同一事务内的所有tuple的事务id相同,多次处理事务id也不会变
		// AttemptId是该tuple本次处理的id,每个tuple的id不同,多次处理的id也不同
		System.err.println("MyTransactionBolt - execute: transactionId = " + tx.getTransactionId() + ", attemptId = " + tx.getAttemptId());
		
		//
		String log = tuple.getString(1);
		
		if (null != log && 0 != log.length()) {
			count ++;
		}
		System.err.println("MyTransactionBolt - execute: count = " + count);
	}
	
	@Override
	public void finishBatch() {
		collector.emit(new Values(id, count));
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// 定义输出类型
		declarer.declare(new Fields("tx", "count"));
	}
	
}

6、ICommitter类(要同时继承BaseTransactionalBolt),该批次的所有tuple处理完,才会调用finishBatch方法,作为commit;

package transaction1;

import java.math.BigInteger;
import java.util.HashMap;
import java.util.Map;

import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseTransactionalBolt;
import backtype.storm.transactional.ICommitter;
import backtype.storm.transactional.TransactionAttempt;
import backtype.storm.tuple.Tuple;

/**
 * Icommitter类
 */
public class MyCommitter extends BaseTransactionalBolt implements ICommitter{
	
	private static final long serialVersionUID = 1L;
	
	private static Map<String, DbValue> dbMap = new HashMap<>();
	
	private final static String GLOBAL_KEY = "GLOBAL_KEY"; 
	
	private TransactionAttempt id;
	BatchOutputCollector collector;
	
	private int sum = 0;
	
	@Override
	public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt id) {
		this.collector = collector;
		this.id = id;
		System.err.println("MyCommitter - prepare: sum = " + sum);
	}

	@Override
	public void execute(Tuple tuple) {
		sum += tuple.getInteger(1);
		System.err.println("MyCommitter - execute: sum = " + sum);
	}

	@Override
	public void finishBatch() {
		System.err.println("MyICommitter - finishBatch, transactionId = " + id.getTransactionId() + ", attemptId = " + id.getAttemptId());
		DbValue value = dbMap.get(GLOBAL_KEY);
		DbValue newValue;
		if (null == value || !value.getTxId().equals(id.getTransactionId())) {
			newValue = new DbValue();
			newValue.setTxId(id.getTransactionId());
			
			if (null == value) {
				newValue.setCount(sum);
			} else {
				newValue.setCount(value.getCount() + sum);
			}
			dbMap.put(GLOBAL_KEY, newValue);
		} else {
			newValue = value;
		}
		
		System.err.println("MyICommitter - finishBatch, total ====================================== " + dbMap.get(GLOBAL_KEY).getCount());
//		collector.emit(tuple);       // 如果有需要,发送到下一级
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	
	public class DbValue{
		BigInteger txId;
		int count = 0;
		public BigInteger getTxId() {
			return txId;
		}
		public void setTxId(BigInteger txId) {
			this.txId = txId;
		}
		public int getCount() {
			return count;
		}
		public void setCount(int count) {
			this.count = count;
		}
	}
}

7、程序启动类,

package transaction1;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.transactional.TransactionalTopologyBuilder;

public class MyTopo {
	
	public static void main(String[] args) {
		
		// 此类已经不建议使用,建议使用trident
		// 参数详解:参数1是事务id,参数2是spoutid,参数3是spout,参数4是并发度
		TransactionalTopologyBuilder builder = new TransactionalTopologyBuilder("ttbId", "spoutId", new MyTxSpout(), 1);
		
		// 可以有一至多个实例
		builder.setBolt("bolt1", new MyTransactionBolt(), 1).shuffleGrouping("spoutId");
		
		// 只能有1个实例
		builder.setBolt("commiter", new MyCommitter(), 1).shuffleGrouping("bolt1");
		
		Config conf = new Config();
//		conf.setDebug(true);
		
		if (args.length > 0)
		{
			try {
				StormSubmitter.submitTopology(args[0], conf, builder.buildTopology());
			} catch (AlreadyAliveException e) {
				e.printStackTrace();
			} catch (InvalidTopologyException e) {
				e.printStackTrace();
			}
		} else {
			LocalCluster localCluster = new LocalCluster();
			localCluster.submitTopology("myTopology", conf, builder.buildTopology());
		}
		
		
	}
	
}

8、运行主程序,观察控制台打印的日志。

spout总共发送了15行数据,每个tuple发送一行数据。

Coordinator的isReady方法返回true,才会启动initializeTransaction方法生成本次事务的元数据,启动一个事务。

Emitter的emitBatch方法,接收TransactionAttempt(包含txid和attemptId)和元数据(每次发送多少个tuple,本次开始位置),每个批次发送5个tuple,每个事务的id不同,attemptId不同。

BaseTransactionalBolt类处理每个事务,都会调用一次prepare方法,再对每个tuple调用execute方法,本次事务的所有tuple处理完,调用一次finishBatch方法。

ICommitter类处理每个事务,都会调用一次prepare方法,一次execute方法,一次finishBatch方法(这里的finishBatch方法是commit)。

MyCoordinator - isReady
MyCoordinator - initializeTransaction, 启动一个事务: MyMeta [beginPoint=0, num=5]
MyEmitter - emitBatch
MyEmitter - emitBatch: transactionId = 1, attemptId = 8609859021409715612
MyEmitter - emitBatch: transactionId = 1, attemptId = 8609859021409715612
MyEmitter - emitBatch: transactionId = 1, attemptId = 8609859021409715612
MyEmitter - emitBatch: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - prepare: transactionId = 1, attemptId = 8609859021409715612
MyEmitter - emitBatch: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - prepare: count = 0
MyTransactionBolt - execute: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - execute: count = 1
MyTransactionBolt - execute: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - execute: count = 2
MyTransactionBolt - execute: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - execute: count = 3
MyTransactionBolt - execute: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - execute: count = 4
MyTransactionBolt - execute: transactionId = 1, attemptId = 8609859021409715612
MyTransactionBolt - execute: count = 5
MyCommitter - prepare: sum = 0
MyCommitter - execute: sum = 5
MyICommitter - finishBatch, transactionId = 1, attemptId = 8609859021409715612
MyICommitter - finishBatch, total ====================================== 5

MyCoordinator - isReady
MyCoordinator - initializeTransaction, 启动一个事务: MyMeta [beginPoint=5, num=5]
MyE
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值