Storm之——Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(程序案例篇)

本文介绍使用Storm结合Kafka、Flume、Zookeeper及MySQL实现数据实时分析的程序案例,包括环境搭建、项目创建、依赖配置、Bolt实现及数据库操作。

Storm之——Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(程序案例篇)

2018年03月04日 23:44:59 冰 河 阅读数:16329更多

所属专栏: Hadoop生态

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.youkuaiyun.com/l1028386804/article/details/79441007

转载请注明出处:http://blog.youkuaiyun.com/l1028386804/article/details/79441007

一、前言

本博文是基于《Storm之——Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(环境搭建篇)》,请先阅读《Storm之——Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(环境搭建篇)

首先我们启动服务器上的Storm、Kafka、Flume、Zookeeper和MySQL,具体参见博文《Storm之——Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(环境搭建篇)》。

二、简单介绍

为了方便,这里我们只是简单的向/home/flume/log.log中追加单词,每行一个单词,利用Storm接收每个单词,将单词计数更新到数据库,具体的逻辑为,如果数据库中没有相关单词,则将数据插入数据库,如果存在相关单词,则更新数据库中的计数。具体SQL逻辑参见博文《MySQL之——实现无数据插入,有数据更新

三、程序实现

1、创建项目

创建Maven项目结构如下:

2、配置pom.xml

 
  1. <?xml version="1.0" encoding="UTF-8"?>

  2. <project xmlns="http://maven.apache.org/POM/4.0.0"

  3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

  4. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

  5. <modelVersion>4.0.0</modelVersion>

  6. <groupId>com.lyz</groupId>

  7. <artifactId>storm-kafka-mysql</artifactId>

  8. <version>1.0-SNAPSHOT</version>

  9.  
  10. <dependencies>

  11. <dependency>

  12. <groupId>org.apache.storm</groupId>

  13. <artifactId>storm-core</artifactId>

  14. <version>1.1.0</version>

  15. </dependency>

  16. <dependency>

  17. <groupId>org.apache.storm</groupId>

  18. <artifactId>storm-kafka</artifactId>

  19. <version>1.1.0</version>

  20. </dependency>

  21. <dependency>

  22. <groupId>redis.clients</groupId>

  23. <artifactId>jedis</artifactId>

  24. <version>2.7.3</version>

  25. </dependency>

  26.  
  27. <dependency>

  28. <groupId>mysql</groupId>

  29. <artifactId>mysql-connector-java</artifactId>

  30. <version>5.1.28</version>

  31. </dependency>

  32.  
  33. <dependency>

  34. <groupId>c3p0</groupId>

  35. <artifactId>c3p0</artifactId>

  36. <version>0.9.1.2</version>

  37. </dependency>

  38.  
  39. <dependency>

  40. <groupId>org.apache.kafka</groupId>

  41. <artifactId>kafka_2.12</artifactId>

  42. <version>1.0.0</version>

  43. <exclusions>

  44. <exclusion>

  45. <groupId>org.apache.zookeeper</groupId>

  46. <artifactId>zookeeper</artifactId>

  47. </exclusion>

  48. <exclusion>

  49. <groupId>log4j</groupId>

  50. <artifactId>log4j</artifactId>

  51. </exclusion>

  52. <exclusion>

  53. <groupId>org.slf4j</groupId>

  54. <artifactId>slf4j-log4j12</artifactId>

  55. </exclusion>

  56. </exclusions>

  57. </dependency>

  58.  
  59. <dependency>

  60. <groupId>org.apache.kafka</groupId>

  61. <artifactId>kafka-clients</artifactId>

  62. <version>1.0.0</version>

  63. </dependency>

  64.  
  65. </dependencies>

  66. <build>

  67. <plugins>

  68. <plugin>

  69. <artifactId>maven-assembly-plugin</artifactId>

  70. <configuration>

  71. <descriptorRefs>

  72. <descriptorRef>jar-with-dependencies</descriptorRef>

  73. </descriptorRefs>

  74. <archive>

  75. <manifest>

  76. <!--告诉运行的主类是哪个,注意根据自己的情况,下面的包名做相应的修改-->

  77. <mainClass>com.lyz.storm.StormTopologyDriver</mainClass>

  78. </manifest>

  79. </archive>

  80. </configuration>

  81. <executions>

  82. <execution>

  83. <id>make-assembly</id>

  84. <phase>package</phase>

  85. <goals>

  86. <goal>single</goal>

  87. </goals>

  88. </execution>

  89. </executions>

  90. </plugin>

  91. <plugin>

  92. <groupId>org.apache.maven.plugins</groupId>

  93. <artifactId>maven-compiler-plugin</artifactId>

  94. <configuration>

  95. <source>1.8</source>

  96. <target>1.8</target>

  97. </configuration>

  98. </plugin>

  99. </plugins>

  100. </build>

  101. </project>

3、实现单词分割计数的MySplitBolt类

 
  1. package com.lyz.storm.bolt;

  2.  
  3. import org.apache.storm.topology.BasicOutputCollector;

  4. import org.apache.storm.topology.OutputFieldsDeclarer;

  5. import org.apache.storm.topology.base.BaseBasicBolt;

  6. import org.apache.storm.tuple.Fields;

  7. import org.apache.storm.tuple.Tuple;

  8. import org.apache.storm.tuple.Values;

  9.  
  10. /**

  11. * 这个Bolt模拟从kafkaSpout接收数据,并把数据信息发送给MyWordCountAndPrintBolt的过程。

  12. * @author liuyazhuang

  13. *

  14. */

  15. public class MySplitBolt extends BaseBasicBolt {

  16.  
  17. private static final long serialVersionUID = 4482101012916443908L;

  18.  
  19. @Override

  20. public void execute(Tuple input, BasicOutputCollector collector) {

  21. //1、数据如何获取

  22. //如果StormTopologyDriver中的spout配置的是MyLocalFileSpout,则用的是declareOutputFields中的juzi这个key

  23. //byte[] juzi = (byte[]) input.getValueByField("juzi");

  24. //2、这里用这个是因为StormTopologyDriver这个里面的spout用的是KafkaSpout,而KafkaSpout中的declareOutputFields返回的是bytes,所以下面用bytes,这个地方主要模拟的是从kafka中获取数据

  25. byte[] juzi = (byte[]) input.getValueByField("bytes");

  26. //2、进行切割

  27. String[] strings = new String(juzi).split(" ");

  28. //3、发送数据

  29. for (String word : strings) {

  30. //Values对象帮我们生成一个list

  31. collector.emit(new Values(word,1));

  32. }

  33. }

  34.  
  35. @Override

  36. public void declareOutputFields(OutputFieldsDeclarer declarer) {

  37. declarer.declare(new Fields("word","num"));

  38. }

  39. }

4、实现入库操作的MyWordCountAndPrintBolt类

 
  1. package com.lyz.storm.bolt;

  2.  
  3. import java.sql.Connection;

  4. import java.sql.SQLException;

  5. import java.sql.Statement;

  6. import java.util.Map;

  7.  
  8. import org.apache.storm.task.TopologyContext;

  9. import org.apache.storm.topology.BasicOutputCollector;

  10. import org.apache.storm.topology.OutputFieldsDeclarer;

  11. import org.apache.storm.topology.base.BaseBasicBolt;

  12. import org.apache.storm.tuple.Tuple;

  13.  
  14. import com.lyz.storm.db.DBProvider;

  15.  
  16.  
  17. /**

  18. * 用于统计分析,并且把统计分析的结果存储到mysql中。

  19. * @author liuyazhuang

  20. *

  21. */

  22. public class MyWordCountAndPrintBolt extends BaseBasicBolt {

  23.  
  24. private static final long serialVersionUID = 5564341843792874197L;

  25. private DBProvider provider;

  26. @Override

  27. public void prepare(Map stormConf, TopologyContext context) {

  28. //连接redis---代表可以连接任何事物

  29. provider = new DBProvider();

  30. super.prepare(stormConf,context);

  31. }

  32.  
  33. @Override

  34. public void execute(Tuple input, BasicOutputCollector collector) {

  35. String word = (String) input.getValueByField("word");

  36. Integer num = (Integer) input.getValueByField("num");

  37. Connection conn = null;

  38. Statement stmt = null;

  39. try {

  40. conn = provider.getConnection();

  41. stmt = conn.createStatement() ;

  42. stmt.executeUpdate("INSERT INTO word_count (word, count) VALUES ('" + word + "', " + num + ") ON DUPLICATE KEY UPDATE count = count + " + num) ;

  43. } catch (SQLException e) {

  44. e.printStackTrace();

  45. }finally{

  46. if(stmt != null){

  47. try {

  48. stmt.close();

  49. stmt = null;

  50. } catch (Exception e2) {

  51. e2.printStackTrace();

  52. }

  53. }

  54. if(conn != null){

  55. try {

  56. conn.close();

  57. conn = null;

  58. } catch (Exception e2) {

  59. e2.printStackTrace();

  60. }

  61. }

  62. }

  63. }

  64.  
  65. @Override

  66. public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

  67. //todo 不需要定义输出的字段

  68. }

  69. }

5、实现操作数据库的DBProvider类

 
  1. package com.lyz.storm.db;

  2.  
  3. import java.beans.PropertyVetoException;

  4. import java.sql.Connection;

  5. import java.sql.PreparedStatement;

  6. import java.sql.ResultSet;

  7. import java.sql.SQLException;

  8.  
  9. import com.mchange.v2.c3p0.ComboPooledDataSource;

  10.  
  11. /**

  12. * JDBC操作数据库

  13. * @author liuyazhuang

  14. *

  15. */

  16. public class DBProvider {

  17.  
  18. private static ComboPooledDataSource source ;

  19. private static final String DB_DRIVER = "com.mysql.jdbc.Driver";

  20. private static final String DB_URL = "jdbc:mysql://127.0.0.1:3306/sharding_0?useUnicode=true&characterEncoding=UTF-8&useOldAliasMetadataBehavior=true";

  21. private static final String USER = "root";

  22. private static final String PASSWORD = "root";

  23. private static Connection connection;

  24.  
  25. static{

  26. try {

  27. source = new ComboPooledDataSource();

  28. source.setDriverClass(DB_DRIVER);

  29. source.setJdbcUrl(DB_URL);

  30. source.setUser(USER);

  31. source.setPassword(PASSWORD);

  32. source.setInitialPoolSize(10);

  33. source.setMaxPoolSize(20);

  34. source.setMinPoolSize(5);

  35. source.setAcquireIncrement(1);

  36. source.setMaxIdleTime(3);

  37. source.setMaxStatements(3000);

  38. source.setCheckoutTimeout(2000);

  39. } catch (PropertyVetoException e) {

  40. e.printStackTrace();

  41. }

  42. }

  43.  
  44. /**

  45. * 获取数据库连接

  46. *

  47. * @return 数据库连接

  48. */

  49. public Connection getConnection() throws SQLException {

  50. connection = source.getConnection();

  51. return connection;

  52. }

  53.  
  54.  
  55. //关闭操作

  56. public static void closeConnection(Connection con){

  57. if(con!=null){

  58. try {

  59. con.close();

  60. } catch (SQLException e) {

  61. e.printStackTrace();

  62. }

  63. }

  64. }

  65.  
  66. public static void closeResultSet(ResultSet rs){

  67. if(rs!=null){

  68. try {

  69. rs.close();

  70. } catch (SQLException e) {

  71. e.printStackTrace();

  72. }

  73. }

  74. }

  75.  
  76. public static void closePreparedStatement(PreparedStatement ps){

  77. if(ps!=null){

  78. try {

  79. ps.close();

  80. } catch (SQLException e) {

  81. e.printStackTrace();

  82. }

  83. }

  84. }

  85. }

6、实现程序的入口类StormTopologyDriver

 
  1. package com.lyz.storm;

  2.  
  3. import org.apache.storm.Config;

  4. import org.apache.storm.LocalCluster;

  5. import org.apache.storm.StormSubmitter;

  6. import org.apache.storm.generated.StormTopology;

  7. import org.apache.storm.kafka.KafkaSpout;

  8. import org.apache.storm.kafka.SpoutConfig;

  9. import org.apache.storm.kafka.ZkHosts;

  10. import org.apache.storm.topology.TopologyBuilder;

  11.  
  12. import com.lyz.storm.bolt.MySplitBolt;

  13. import com.lyz.storm.bolt.MyWordCountAndPrintBolt;

  14.  
  15. /**

  16. * 这个Driver使Kafka、strom、mysql进行串联起来。

  17. *

  18. * 这个代码执行前需要创建kafka的topic,创建代码如下:

  19. * [root@liuyazhuang kafka]# bin/kafka-topics.sh --create --zookeeper liuyazhuang1:2181 --replication-factor 1 -partitions 3 --topic wordCount

  20. *

  21. * 接着还要向kafka中传递数据,打开一个shell的producer来模拟生产数据

  22. * [root@liuyazhuang kafka]# bin/kafka-console-producer.sh --broker-list liuyazhuang:9092 --topic wordCount

  23. * 接着输入数据

  24. *

  25. * @author liuyazhuang

  26. */

  27. public class StormTopologyDriver {

  28.  
  29. public static void main(String[] args) throws Exception {

  30. //1、准备任务信息

  31. TopologyBuilder topologyBuilder = new TopologyBuilder();

  32. SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts("192.168.209.121:2181"),"wordCount","/wordCount","wordCount");

  33. topologyBuilder.setSpout("KafkaSpout",new KafkaSpout(spoutConfig),2);

  34. topologyBuilder.setBolt("bolt1",new MySplitBolt(),4).shuffleGrouping("KafkaSpout");

  35. topologyBuilder.setBolt("bolt2",new MyWordCountAndPrintBolt(),2).shuffleGrouping("bolt1");

  36.  
  37. //2、任务提交

  38. Config config = new Config();

  39. config.setNumWorkers(2);

  40. StormTopology stormTopology = topologyBuilder.createTopology();

  41.  
  42. if(args != null && args.length > 0){

  43. StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());

  44. }else{

  45. //本地模式

  46. LocalCluster localCluster = new LocalCluster();

  47. localCluster.submitTopology("wordcount",config,stormTopology);

  48. }

  49.  
  50. }

  51. }

7、创建数据库

执行如下脚本创建数据库

 
  1. create database sharding_0;

  2. CREATE TABLE `word_count` (

  3. `id` int(11) NOT NULL AUTO_INCREMENT,

  4. `word` varchar(255) DEFAULT '',

  5. `count` int(11) DEFAULT NULL,

  6. PRIMARY KEY (`id`),

  7. UNIQUE KEY `word` (`word`) USING BTREE

  8. ) ENGINE=InnoDB AUTO_INCREMENT=233 DEFAULT CHARSET=utf8;

至此,我们的程序案例编写完成。

四、温馨提示

大家可以到链接http://download.youkuaiyun.com/download/l1028386804/10269075下载完整的Storm+Kafka+Flume+Zookeeper+MySQL实现数据实时分析(程序案例篇)源代码

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值