Storm1.1.0<温故而知新--hdfs和storm的集成>

1.环境

apache-storm-1.1.0
Hadoop 2.8.0

使用到的依赖:

<dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-hdfs</artifactId>
            <version>1.1.0</version>
        </dependency>
    </dependencies>

需求:读取hdfs上的mapreduce日志文件,统计其中的INFO、WARN、DEBUG、Error日志级别的条数

思路很简单,Spout端读入文件,然后Bolt端做Wordcount(先用正则表达式匹配日志级别)

2.实现

LogLevelCountTopology

用到的参数:

这里写图片描述


import neu.bolt.CountBolt;
import neu.bolt.ExtractBolt;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.hdfs.spout.HdfsSpout;
import org.apache.storm.hdfs.spout.TextFileReader;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

import java.util.HashMap;


public class LogLevelCountTopology {


    public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException, InterruptedException {
        System.setProperty("HADOOP_USER_NAME", "root");
        if (args.length != 4) {
            System.out.println("Usage <HdfsUri SourceDir ArchiveDir BadFilesDir>");
            System.exit(1);
        }
        TopologyBuilder builder = new TopologyBuilder();
        HdfsSpout hdfsSpout = new HdfsSpout()
                .setReaderType("text")
                .withOutputFields(TextFileReader.defaultFields)
                .setHdfsUri(args[0])
                .setSourceDir(args[1])
                .setArchiveDir(args[2])
                .setBadFilesDir(args[3]);


        HashMap<String, Object> hashMap = new HashMap<>();
        //正则表达式
        hashMap.put(ExtractBolt.REGEX, ".{23}(INFO|DEBUG|WARN|ERROR)");
        hashMap.put(ExtractBolt.FIELD, "line");
        builder.setSpout("hdfsSpout", hdfsSpout, 1);
        builder.setBolt("extractbolt", new ExtractBolt(), 1)
                .addConfigurations(hashMap).shuffleGrouping("hdfsSpout");
        builder.setBolt("countBolt", new CountBolt(), 1)
                .fieldsGrouping("extractbolt", new Fields("level"));

        Config conf = new Config();
        conf.setDebug(true);


        conf.setMaxTaskParallelism(1);
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("hdfsLogLevelCountTopology", conf, builder.createTopology());
        Thread.sleep(90000);
        cluster.shutdown();
    }
}

ExtractBolt


import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ExtractBolt implements IRichBolt {
    public static final String REGEX = "regex";
    public static final String FIELD = "field";
    String field;
    Pattern regex;
    OutputCollector collector;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        String regexString = (String) stormConf.get(REGEX);
        this.collector = collector;
        this.field = (String) stormConf.get(FIELD);
        this.regex = Pattern.compile(regexString);
    }

    public void execute(Tuple input) {
        String log = input.getStringByField(field);
        if (log != null) {
            Matcher matcher = regex.matcher(log);
            if (matcher.find()) {
                String level = matcher.group(1);
                collector.emit(new Values(level));
            } else {
                System.err.println("不包含INFO|DEBUG|ERROR|WARN 日志:" + log);
            }
        }
        collector.ack(input);
    }

    public void cleanup() {
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("level"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

CountBolt


import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;


public class CountBolt extends BaseBasicBolt {
    private Map<String, Integer> counts = new HashMap<>();

    public void execute(Tuple input, BasicOutputCollector collector) {
        String level = input.getStringByField("level");
        Integer count = counts.get(level);
        if (count == null)
            count = 0;
        count++;
        counts.put(level, count);
        System.out.println(level + " :  " + count);
        collector.emit(new Values(level, count));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("level", "count"));
    }
}

主函数传参示例hdfs://172.17.11.85:9000 /log /ArchiveDir /BadFilesDir

给一张IDE控制台的输出日志:

这里写图片描述

<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>3.5.3</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>com.app</groupId> <artifactId>AIApp</artifactId> <version>0.0.1-SNAPSHOT</version> <name>AIApp</name> <description>AIApp</description> <url/> <licenses> <license/> </licenses> <developers> <developer/> </developers> <scm> <connection/> <developerConnection/> <tag/> <url/> </scm> <properties> <java.version>17</java.version> <spring-ai.version>1.0.0</spring-ai.version> </properties> <repositories> <repository> <id>spring-milestones</id> <name>Spring Milestones</name> <url>https://repo.spring.io/milestone</url> <snapshots> <enabled>false</enabled> </snapshots> </repository> </repositories> <dependencies> <dependency> <groupId>group.springframework.ai</groupId> <artifactId>spring-ai-dashscope-spring-boot-starter</artifactId> <version>1.1.0</version> </dependency> <dependency> <groupId>org.springframework.cloud</groupId> <artifactId>spring-cloud-function-context</artifactId> <version>4.3.0</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-jpa</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-security</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-starter-vector-store-milvus</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>org.springframework.security</groupId> <artifactId>spring-security-test</artifactId> <scope>test</scope> </dependency> <!-- MyBatis Plus Starter --> <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-boot-starter</artifactId> <version>3.5.3</version> </dependency> <!-- Jakarta Persistence API--> <dependency> <groupId>jakarta.persistence</groupId> <artifactId>jakarta.persistence-api</artifactId> <version>3.1.0</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.30</version> <scope>provided</scope> </dependency> <dependency> <groupId>com.alibaba.fastjson2</groupId> <artifactId>fastjson2</artifactId> <version>2.0.48</version> <!-- 使用当前稳定版本 --> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.11.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>5.2.3</version> <!-- 推荐使用稳定版本 --> </dependency> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils</artifactId> <version>1.9.4</version> </dependency> <!-- 使用 MyBatis-Plus 的 Spring Boot 3 专用 starter --> <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-spring-boot3-starter</artifactId> <version>3.5.5</version> </dependency> <!-- MySQL JDBC 驱动 --> <!-- 数据库 --> <dependency> <groupId>com.mysql</groupId> <artifactId>mysql-connector-j</artifactId> <scope>runtime</scope> </dependency> </dependencies> <dependencyManagement> <dependencies> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-bom</artifactId> <version>${spring-ai.version}</version> <type>pom</type> <scope>import</scope> </dependency> </dependencies> </dependencyManagement> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project> 这个是我的pom.xml文件
最新发布
07-13
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值