一.项目需求
实时分析某电商网站产生的订单数据。
分析订单的有效订单量、订单总金额、优惠后总金额、下单用户数。
二.使用主要技术
Kafka,Storm,Zookeeper,Redis,MySql
三.项目架构
架构解析:从web端获取数据订单数据,存入kafka中,storm处理订单数据,在kafka中读取数据,将处理完成的数据写入redis和mysql中
四.项目开发:
模拟产生数据类:
package com.xnmzdx.order;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import java.util.Random;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
/**
* 产生模拟数据发送到kafka
* @author Administrator
*
*/
public class SendMessage {
public static void main(String[] args) {
//创建配置对象
Properties props = new Properties();
//连接的broker
props.put("bootstrap.servers", "storm01:9092");
//设置确认机制
props.put("acks", "1");
//设置消息的key的序列化方式
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//设置消息的value的序列化方式
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//创建kafka生产者对象
KafkaProducer<String,String> producer = new KafkaProducer<String,String>(props);
for(int i=0;i<100000; i++){
Random rd = new Random();
int id = rd.nextInt(1000000); //订单id
int memberid = rd.nextInt(100000); //用户id
int totalPrice = rd.nextInt(1000)+100; //订单金额
int youhui = rd.nextInt(100); //优惠金额
int sendpay = rd.nextInt(2) + 1; //支付方式 1 pc端 2 移动端
int year = 2019;
int month = rd.nextInt(12);
int day = rd.nextInt(28) + 1;
int hour = rd.nextInt(24);
int minute = rd.nextInt(60);
int second = rd.nextInt(60);
//年 月 日 时:分:秒
Calendar cl = Calendar.getInstance();
cl.set(year, month, day, hour, minute, second);
Date date = cl.getTime();
SimpleDateFormat slf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String createdate = slf.format(date); //下单时间
// System.out.println(id + "\t" + memberid + "\t" + totalPrice + "\t" + youhui + "\t" + sendpay + "\t" + createdate);
String message = id + "\t" + memberid + "\t" + totalPrice + "\t" + youhui + "\t" + sendpay + "\t" + createdate;
//创建消息对象
ProducerRecord<String,String> record = new ProducerRecord<String,String>("orders",message);
//发送消息
producer.send(record);
}
//关闭生产者
producer.close();
}
}
Bolt开发:
第一个bolt,检查数据有效性
package com.xnmzdx.order.bolt;
import org.apache.commons.lang.StringUtils;
import com.xnmzdx.order.utils.DataUtils;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* 检查数据有效性
* @author Administrator
*
*/
public class CheckDataBolt extends BaseBasicBolt {
private static final long serialVersionUID = 2644592192189091510L;
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
//获取数据
//订单号 用户Id 订单金额 优惠金额 支付方式 下单时间
//70926 96510 1082 51 1 2019-11-16 11:38:59
String data = input.getString(0);
if(data != null && data.length()>0){//判断message是否为空
String[] values = data.split("\t");
if(values.length == 6){//判断message是否是6个字段
String orderid = values[0];
String memberid = values[1];
String totalPrice = values[2];
String youhui = values[3];
String sendpay = values[4];
String createdate = values[5];
if(StringUtils.isNotEmpty(orderid)
&& StringUtils.isNotEmpty(memberid)
&& StringUtils.isNotEmpty(totalPrice)
&& StringUtils.isNotEmpty(youhui)
&& StringUtils.isNotEmpty(sendpay)
&& StringUtils.isNotEmpty(createdate)){//判断数据是否为空
//createdate > 2019-10-01
if(DataUtils.isGreaterDate(createdate, "2019-10-01")){//判断日期 //System.out.println(orderid+","+memberid+","+totalPrice+","+youhui+","+sendpay+","+createdate);
//将数据发送给下一个bolt
collector.emit(new Values(orderid,memberid,totalPrice,youhui,sendpay,createdate));
}
}
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明发送数据的字段名称
declarer.declare(new Fields("orderid","memberid","totalPrice","youhui","sendpay","createdate"));
}
}
第二个bolt,统计订单数据、订单总金额、优惠后总金额、下单人数,将结果写入到redis
package com.xnmzdx.order.bolt;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import com.xnmzdx.order.utils.RedisUtils;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import redis.clients.jedis.Jedis;
public class SaveRedisBolt extends BaseRichBolt {
private static final long serialVersionUID = -6494066215714415933L;
private OutputCollector collector;
private static Map<String,String> memberMap = null; //定义一个Map数据结构,用户下单用户号的去重,key为sendpay_memberid,value为memberid
private Jedis jedis = null; //jedis客户端,用户操作redis
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
memberMap = new HashMap<String,String>();
jedis = RedisUtils.getJedis(); //通过redis工具类获取一个jedis客户端
}
@Override
public void execute(Tuple input) {
//获取数据
String orderid = input.getStringByField("orderid");
String memberid = input.getStringByField("memberid");
String totalPrice = input.getStringByField("totalPrice");
String youhui = input.getStringByField("youhui");
String sendpay = input.getStringByField("sendpay");
String createdate = input.getStringByField("createdate");
saveCounterMember(sendpay,memberid,orderid,totalPrice,youhui); //用户去重
collector.emit(new Values(orderid,memberid,totalPrice,youhui,sendpay,createdate));
//给kafkaspout确认消息处理成功
collector.ack(input);
}
private void saveCounterMember(String sendpay, String memberid,String orderid,String totalPrice,String youhui) {
String k = sendpay + "_" + memberid;
String v = memberMap.get(k);
boolean isMember = false;
if(StringUtils.isEmpty(v)){
memberMap.put(k, v);
}else{
isMember = true;
}
saveCouonterToRedis(sendpay,isMember,orderid,totalPrice,youhui);
}
private void saveCouonterToRedis(String sendpay, boolean isMember, String orderid, String totalPrice,
String youhui) {
String value = jedis.get(sendpay); //value = COUNT(id),SUM(totalPrice),SUM(totalPrice - youhui),COUNT(DISTINCT memberid)
if(value != null){
String[] split = value.split(",");
int id_num = Integer.parseInt(split[0]) + 1; //总订单数
BigDecimal total_num = new BigDecimal(split[1]).add(new BigDecimal(totalPrice)); //总金额
BigDecimal youhui_num = new BigDecimal(split[2]).add(new BigDecimal(youhui));
int member_num = Integer.parseInt(split[3]) + (isMember? 0 : 1);
value = id_num+","+total_num+","+youhui_num+","+member_num;
}else{
value = 1+","+totalPrice+","+(new BigDecimal(totalPrice).subtract(new BigDecimal(youhui)))+","+1;
}
// System.out.println("------------------"+sendpay + ":" + value);
jedis.set(sendpay, value);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明发送数据的字段名称
declarer.declare(new Fields("orderid","memberid","totalPrice","youhui","sendpay","createdate"));
}
}
第三个bolt,将订单原始数据写入到MySql
package com.xnmzdx.order.bolt;
import java.util.Map;
import org.apache.storm.jdbc.common.JdbcClient;
import com.xnmzdx.order.utils.DBUtils;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class SaveMysqlBolt extends BaseRichBolt {
private static final long serialVersionUID = 544229142026056862L;
private JdbcClient jdbcClient;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
jdbcClient = DBUtils.getJdbcClient();
}
@Override
public void execute(Tuple input) {
//获取数据
String orderid = input.getStringByField("orderid");
String memberid = input.getStringByField("memberid");
String totalPrice = input.getStringByField("totalPrice");
String youhui = input.getStringByField("youhui");
String sendpay = input.getStringByField("sendpay");
String createdate = input.getStringByField("createdate");
//写入MySql
jdbcClient.executeSql("insert into realtime_orders "
+ "(orderid,totalPrice,youhui,memberid,sendpay,createdate) "
+ "values ("+orderid+","+totalPrice+","+youhui+","+memberid+","+sendpay+",'"+createdate+"')");
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//数据不再往下流转了,所以这个方法是空的
}
}
4.工具类开发:
- 日期工具类
package com.xnmzdx.order.utils;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* 日期工具类
* @author dwei
*
*/
public class DataUtils {
private DataUtils(){}
/**
* 判断createDate日期是否大于targetDate日期
* @param createDate 格式:2019-11-16 11:38:59
* @param targetDate 格式:2019-10-01
* @return
*/
public static boolean isGreaterDate(String createDate,String targetDate){
try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date data1 = sdf.parse(createDate);
Date date2 = sdf.parse(targetDate);
if(data1.getTime() > date2.getTime()){
return true;
}
return false;
} catch (ParseException e) {
e.printStackTrace();
}
return false;
}
}
- Redis工具类
package com.xnmzdx.order.utils;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
/**
* redis工具类
*
*/
public class RedisUtils {
private static JedisPool jedisPool;
static {
if (jedisPool == null) {
// 单机连接池连接redis
JedisPoolConfig poolConfig = new JedisPoolConfig();
// 一个pool最多有多少个jedis实例。
poolConfig.setMaxTotal(100);
// 一个pool最多有多少个状态为idle(空闲的)的jedis实例。
poolConfig.setMaxIdle(10);
// 最大的等待时间,如果超过等待时间,
// 则直接抛出JedisConnectionException;
poolConfig.setMaxWaitMillis(100000);
// 在borrow一个jedis实例时,是否提前进行validate操作;
// 如果为true,则得到的jedis实例均是可用的;
poolConfig.setTestOnBorrow(false);
jedisPool = new JedisPool(poolConfig, "192.168.204.128", 6379);
}
}
/**
* 从连接池获取一个连接
*
* @return
*/
public static Jedis getJedis() {
Jedis jedis = jedisPool.getResource();
jedis.auth("123456");
return jedis;
}
/**
* 返回连接,将连接放回连接池
*
* @param jedis
*/
public static void ruturnResource(Jedis jedis) {
jedisPool.returnResource(jedis);
}
}
- Mysql工具类
package com.xnmzdx.order.utils;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.jdbc.common.ConnectionProvider;
import org.apache.storm.jdbc.common.HikariCPConnectionProvider;
import org.apache.storm.jdbc.common.JdbcClient;
/**
* 数据库工具类
*
*/
public class DBUtils {
private static ConnectionProvider connectionProvider;
static {
if(connectionProvider == null) {
// 连接池的配置
Map<String, Object> configMap = new HashMap<String, Object>();
configMap.put("dataSourceClassName", "com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
configMap.put("dataSource.url", "jdbc:mysql://localhost/taobao");
configMap.put("dataSource.user", "root");
configMap.put("dataSource.password", "123456");
// 创建连接池
connectionProvider = new HikariCPConnectionProvider(configMap);
// 对数据库连接池进行初始化
connectionProvider.prepare();
}
}
public static JdbcClient getJdbcClient() {
// 初始化jdbcClinet
JdbcClient jdbcClient = new JdbcClient(connectionProvider, 30);
return jdbcClient;
}
}
5.Topology开发
package com.xnmzdx.order.topology;
import java.util.UUID;
import com.xnmzdx.order.bolt.CheckDataBolt;
import com.xnmzdx.order.bolt.SaveMysqlBolt;
import com.xnmzdx.order.bolt.SaveRedisBolt;
import backtype.storm.Config;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
public class OrderTopology {
public static void main(String[] args) {
// kafka使用的zk地址,kafkaspout会根据zk找到kafka的地址
ZkHosts zkHosts = new ZkHosts("storm01:2181,storm02:2181,storm03:2181");
String topic = "orders";
String zkRoot = "/orders_spout"; // 在zk上创建一个节点,用于存储kafkaspout读取数据的一些基本信息
String id = UUID.randomUUID().toString();
SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, zkRoot, id);
// 指定kafkaz消息使用StringScheme,用于解析读取数据的解析类型
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
// 从头开始消费,从Kafka中的第一条数据开始读
kafkaConfig.forceFromStart = false;
// 创建拓扑
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafkaSpout", new KafkaSpout(kafkaConfig), 2);
builder.setBolt("checkBolt", new CheckDataBolt()).shuffleGrouping("kafkaSpout");
builder.setBolt("saveRedis", new SaveRedisBolt()).shuffleGrouping("checkBolt");
builder.setBolt("saveMysql", new SaveMysqlBolt()).shuffleGrouping("saveRedis");
// 创建本地集群对象,提交topology
Config conf = new Config();
conf.setNumWorkers(1);
conf.setDebug(false);
// LocalCluster cluster = new LocalCluster(); //本地提交
// cluster.submitTopology("CounterTopology", conf, builder.createTopology());
// try {
// Thread.sleep(50000);
// } catch (InterruptedException e) {
// e.printStackTrace();
// }
// cluster.killTopology("CounterTopology");
// cluster.shutdown();
//集群提交topology
try {
StormSubmitter.submitTopology("order_topology", conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
}
}
注释:源码已经打包上传到优快云上传资源中,可自行下载。其中pom文件也在源码,可下载查看。
网盘地址:https://pan.baidu.com/s/1AVi9MGOuZuJv2I-sxjmGyg
提取码:pps9