Flink CEP 是 Flink 的复杂处理库。它允许用户快速检测无尽数据流中的复杂模式。不过 Flink CEP 仅可用于通过 DataStream API处理。 参考细说Flink CEP,我们知道Flink 的每个模式包含多个状态,模式匹配的过程就是状态转换的过程,每个状态(state)可以理解成由Pattern构成,为了从当前的状态转换成下一个状态,用户可以在Pattern上指定条件,用于状态的过滤和转换。
flink cep demo:
场景: 获取用户实时登录的信息, 检测出在3秒内重复登录三次失败的用户, 并推送一条告警信息
用户登录的数据:
用户, IP, Type
new LoginEvent("小明","192.168.0.1","fail"),
new LoginEvent("小明","192.168.0.2","fail"),
new LoginEvent("小王","192.168.10,11","fail"),
new LoginEvent("小王","192.168.10,12","fail"),
new LoginEvent("小明","192.168.0.3","fail"),
new LoginEvent("小明","192.168.0.4","fail"),
new LoginEvent("小王","192.168.10,10","success")
预期产生告警的信息:
new LoginEvent("小明","192.168.0.3","fail"),
new LoginEvent("小明","192.168.0.4","fail"),
FlinkLoginFail.java
package com.flink.UserLoginAttack;
import com.flink.UserLoginAttack.entity.LoginEvent;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.IterativeCondition;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public class FlinkLoginFail {
private static final Logger LOGGER = LoggerFactory.getLogger(FlinkLoginFail.class);
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<LoginEvent> loginEventStream = env.fromCollection(Arrays.asList(
new LoginEvent("小明","192.168.0.1","fail"),
new LoginEvent("小明","192.168.0.2","fail"),
new LoginEvent("小王","192.168.10,11","fail"),
new LoginEvent("小王","192.168.10,12","fail"),
new LoginEvent("小明","192.168.0.3","fail"),
new LoginEvent("小明","192.168.0.4","fail"),
new LoginEvent("小王","192.168.10,10","success")
));
// 在3秒 内重复登录了三次, 则产生告警
Pattern<LoginEvent, LoginEvent> loginFailPattern = Pattern.<LoginEvent>
begin("first")
.where(new IterativeCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent loginEvent, Context context) throws Exception {
System.out.println("first: " + loginEvent);
return loginEvent.getType().equals("fail");
}
})
.next("second")
.where(new IterativeCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent loginEvent, Context context) throws Exception {
System.out.println("second: " + loginEvent);
return loginEvent.getType().equals("fail");
}
})
.next("three")
.where(new IterativeCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent loginEvent, Context context) throws Exception {
System.out.println("three: " + loginEvent);
return loginEvent.getType().equals("fail");
}
})
.within(Time.seconds(3));
// 根据用户id分组,以便可以锁定用户IP,cep模式匹配
PatternStream<LoginEvent> patternStream = CEP.pattern(
loginEventStream.keyBy(LoginEvent::getUserId),
loginFailPattern);
// 获取重复登录三次失败的用户信息
DataStream<String> loginFailDataStream = patternStream.select(
new PatternSelectFunction<LoginEvent, String>() {
@Override
public String select(Map<String, List<LoginEvent>> pattern) throws Exception {
List<LoginEvent> second = pattern.get("three");
return second.get(0).getUserId() + ", "+ second.get(0).getIp() + ", "+ second.get(0).getType();
}
}
);
// 打印告警用户
loginFailDataStream.print();
env.execute();
LOGGER.info("finish");
}
}
LoginEvent.java
package com.flink.UserLoginAttack.entity;
public class LoginEvent {
private String userId;
private String ip;
private String type;
public LoginEvent(String userId, String ip, String type) {
this.userId = userId;
this.ip = ip;
this.type = type;
}
public String getUserId() {
return userId;
}
public String getType() {
return type;
}
public String getIp() {
return ip;
}
@Override
public String toString() {
return "LoginEvent{" +
"userId='" + userId + '\'' +
", type='" + type + '\'' +
", ip='" + ip + '\'' +
'}';
}
}
结果显示
first: LoginEvent{userId='小明', type='fail', ip='192.168.0.1'}
second: LoginEvent{userId='小明', type='fail', ip='192.168.0.2'}
first: LoginEvent{userId='小明', type='fail', ip='192.168.0.2'}
first: LoginEvent{userId='小王', type='fail', ip='192.168.10,11'}
second: LoginEvent{userId='小王', type='fail', ip='192.168.10,12'}
first: LoginEvent{userId='小王', type='fail', ip='192.168.10,12'}
three: LoginEvent{userId='小明', type='fail', ip='192.168.0.3'}
second: LoginEvent{userId='小明', type='fail', ip='192.168.0.3'}
first: LoginEvent{userId='小明', type='fail', ip='192.168.0.3'}
1> 小明, 192.168.0.3, fail
three: LoginEvent{userId='小明', type='fail', ip='192.168.0.4'}
second: LoginEvent{userId='小明', type='fail', ip='192.168.0.4'}
first: LoginEvent{userId='小明', type='fail', ip='192.168.0.4'}
1> 小明, 192.168.0.4, fail
three: LoginEvent{userId='小王', type='success', ip='192.168.10,10'}
second: LoginEvent{userId='小王', type='success', ip='192.168.10,10'}
first: LoginEvent{userId='小王', type='success', ip='192.168.10,10'}
参考:
https://www.jianshu.com/p/4aef81468b6d
https://www.jianshu.com/p/d4d7edc86497?utm_source=oschina-app
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>FlinkDataHandle</groupId>
<artifactId>FlinkDataHandle</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>FlinkDataHandle</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.11.11</scala.version>
<scala.compat.version>2.11</scala.compat.version>
<spark.version>2.2.0</spark.version>
<c3p0.version>0.9.1.1</c3p0.version>
<mysql.version>5.1.26</mysql.version>
<fastjson.version>1.1.41</fastjson.version>
<hbase.version>1.2.0</hbase.version>
<flink.version>1.4.2</flink.version>
</properties>
<repositories>
<!-- 指定该项目可以从哪些地方下载依赖包 -->
<repository>
<id>aliyun</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>jboss</id>
<url>http://repository.jboss.org/nexus/content/groups/public</url>
</repository>
</repositories>
<dependencies>
<!-- This dependency provides the implementation of compiler "jdt": -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-twitter_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.10_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-hbase_${scala.compat.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.1.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-pool2</artifactId>
<version>2.4.2</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180130</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>org.eclipse.paho</groupId>
<artifactId>org.eclipse.paho.client.mqttv3</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-rabbitmq_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.fusesource.mqtt-client</groupId>
<artifactId>mqtt-client</artifactId>
<version>1.12</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.35</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>2.1.1</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<testSourceDirectory>src/test/java</testSourceDirectory>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>java</executable>
<includeProjectDependencies>true</includeProjectDependencies>
<includePluginDependencies>false</includePluginDependencies>
<classpathScope>compile</classpathScope>
<mainClass>com.stars</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>