最近有个抢购黑名单的需求,产品为了增加对黄牛党的限制,想要在抢购活动期间能对潜在黄牛党进行干扰,从而保障普通用户的权益。于是我想到了适合实时风控场景的Flink CEP技术。
通过对Flink官方文档的学习,我自己根据黑名单需求写了个Demo。
黑名单需求简化后如下:
在抢购活动前后,实时向后端系统反馈异常访问/购买的用户名单信息,具体实现逻辑是,BY渠道BY用户BY指定的URL统计用户访问指定URL的次数,选取访问次数>设定阈值的用户,并统计出这些用户访问指定URL时的IP切换次数。
主要Demo代码实现如下:
package com.cmbchina.bigdata.flink.streaming.job.cep.learn
import java.util
import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.scala.{CEP, PatternStream}
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import scala.collection.mutable
// 用户抢购事件流的数据模型
case class UserAccessEvent(
channelId: Int,
clientIp: String,
requestUrl: String,
channelUserId: String,
actionType: String,
timestamp: Long
)
object BlackListCepTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
// 用scala集合模拟实时数据流,注意:指定EventTime的时候必须要确保是时间戳(精确到毫秒)
val eventSource1 = env.fromCollection(List(
UserAccessEvent(1, "172.16.0.1", "/url/SI_ORD0016", "张三", "visit", 1577080457),
UserAccessEvent(1, "172.16.0.2", "/url/SI_ORD0016", "张三", "visit", 1577080458),
UserAccessEvent(1, "172.16.0.3", "/url/SI_ORD0016", "张三", "visit", 1577080460),
UserAccessEvent(2, "172.16.0.5", "/url/SI_ORD0045", "李四", "visit", 1577080458),
UserAccessEvent(2, "172.16.0.6", "/url/SI_ORD0045", "李四", "visit", 1577080460),
UserAccessEvent(2, "172.16.0.7", "/url/SI_ORD0045", "李四", "visit", 1577080462),
UserAccessEvent(2, "172.16.0.8", "/url/SI_ORD0045", "李四", "visit", 1577080466),
UserAccessEvent(1, "172.16.0.4", "/url/SI_ORD0016", "张三", "visit", 1577080465),
UserAccessEvent(1, "172.16.0.9", "/url/SI_ORD0016", "张三", "visit", 1577080466),
UserAccessEvent(2, "172.16.0.9", "/url/SI_ORD0045", "李四", "visit", 1577080467),
UserAccessEvent(1, "172.16.0.10", "/url/SI_ORD0016", "张三", "visit", 1577080467),
UserAccessEvent(2, "172.16.0.8", "/url/SI_ORD0045", "李四", "visit", 1577080467)
)).assignAscendingTimestamps(_.timestamp * 1000L).keyBy(r => (r.channelId, r.channelUserId, r.requestUrl))
val eventSource2 = env.fromCollection(List(
UserAccessEvent(1, "172.16.0.1", "/url/SI_ORD0016", "张三", "buy", 1577080457),
UserAccessEvent(1, "172.16.0.2", "/url/SI_ORD0046", "张三", "buy", 1577080458),
UserAccessEvent(1, "172.16.0.3", "/url/SI_ORD0016", "张三", "buy", 1577080460),
UserAccessEvent(2, "172.16.0.5", "/url/SI_ORD0045", "李四", "buy", 1577080458),
UserAccessEvent(2, "172.16.0.6", "/url/SI_ORD0045", "李四", "buy", 1577080460),
UserAccessEvent(2, "172.16.0.7", "/url/SI_ORD0046", "李四", "buy", 1577080462),
UserAccessEvent(2, "172.16.0.8", "/url/SI_ORD0045", "李四", "buy", 1577080467),
UserAccessEvent(1, "172.16.0.4", "/url/SI_ORD0016", "张三", "buy", 1577080466),
UserAccessEvent(1, "172.16.0.4", "/url/SI_ORD0016", "张三", "buy", 1577080466),
UserAccessEvent(2, "172.16.0.4", "/url/SI_ORD0016", "李四", "buy", 1577080467)
)).assignAscendingTimestamps(_.timestamp * 1000L).keyBy(r => (r.channelId, r.channelUserId, r.requestUrl))
//定义模式(Pattern)
//val detectExceptVisitPattern: Pattern[UserAccessEvent, UserAccessEvent] = Pattern.begin[UserAccessEvent]("start", AfterMatchSkipStrategy.skipPastLastEvent())
val detectExceptVisitPattern: Pattern[UserAccessEvent, UserAccessEvent] = Pattern.begin[UserAccessEvent]("start")
.where(_.actionType.equals("visit"))
.times(6)
.within(Time.seconds(10)) //时间限制10秒内
//将定义好的Pattern应用在输入流,生成PatternStream
val exceptVisitPatternStream: PatternStream[UserAccessEvent] = CEP.pattern(eventSource1, detectExceptVisitPattern)
//对模式匹配的结果进行处理
val exceptVisitResult: DataStream[String] = exceptVisitPatternStream.process(new PatternProcessFunction[UserAccessEvent, String] {
override def processMatch(pattern: util.Map[String, util.List[UserAccessEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = {
val start: util.Iterator[UserAccessEvent] = pattern.get("start").iterator()
var ipMap: mutable.Map[String, String] = mutable.Map()
var resultString: String = ""
while (start.hasNext) {
val e = start.next()
println(s"""{${e.channelId},${e.channelUserId},${e.actionType}}{${e.clientIp},${e.requestUrl},${e.timestamp}}""")
ipMap += (e.clientIp -> e.channelUserId)
resultString = s"{${e.channelId},${e.channelUserId},${e.actionType},${e.requestUrl}} match finished!"
}
out.collect(s"""$resultString ip change times=${ipMap.size}""")
}
})
exceptVisitResult.print()
//定义模式(Pattern)
//val detectExceptBuyPattern: Pattern[UserAccessEvent, UserAccessEvent] = Pattern.begin[UserAccessEvent]("start", AfterMatchSkipStrategy.skipPastLastEvent()).where(_.actionType.equals("buy"))
val detectExceptBuyPattern: Pattern[UserAccessEvent, UserAccessEvent] = Pattern.begin[UserAccessEvent]("start")
.where(_.actionType.equals("buy"))
.times(5)
.within(Time.seconds(10)) //时间限制10秒内
//将定义好的Pattern应用在输入流,生成PatternStream
val exceptBuyPatternStream = CEP.pattern(eventSource2, detectExceptBuyPattern)
//对模式匹配的结果进行处理
val exceptBuyResult: DataStream[String] = exceptBuyPatternStream.process(new PatternProcessFunction[UserAccessEvent, String] {
override def processMatch(pattern: util.Map[String, util.List[UserAccessEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = {
val start: util.Iterator[UserAccessEvent] = pattern.get("start").iterator()
var ipMap: mutable.Map[String, String] = mutable.Map()
var resultString: String = ""
while (start.hasNext) {
val e = start.next()
println(s"""{${e.channelId},${e.channelUserId},${e.actionType}}{${e.clientIp},${e.requestUrl},${e.timestamp}}""")
ipMap += (e.clientIp -> e.channelUserId)
resultString = s"{${e.channelId},${e.channelUserId},${e.actionType},${e.requestUrl}} match finished!"
}
out.collect(s"""$resultString ip change times=${ipMap.size}""")
}
})
exceptBuyResult.print()
env.execute("Demo")
}
}