flink求topN

原创已于 2022-06-05 09:03:00 修改 · 272 阅读

CC 4.0 BY-SA版权

文章标签：

于 2022-05-31 11:48:36 首次发布

/**
 * @author jiasongfan
 * @date 2022/5/31
 * @apiNote
 */

import org.apache.flink.api.common.state.{ListState, ListStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
import org.apache.flink.util.Collector
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}
import org.apache.flink.streaming.api.scala.function.WindowFunction

import scala.collection.JavaConverters.iterableAsScalaIterableConverter
object Test06 {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    val text: DataStream[String] = env.socketTextStream("hdp1", 9999)
    val mapDS: DataStream[StuScore] = text.map(line => {
      val li: Array[String] = line.split(",")
      StuScore(li(0), li(1), li(2).trim.toInt,li(3).trim.toLong*1000)
    })
    //分数出现连续下滑报警
    val timeDS: DataStream[StuScore] = mapDS.assignAscendingTimestamps(_.ts)
    val keyS: KeyedStream[StuScore, String] = timeDS.keyBy(_.id)
    val winDS: WindowedStream[StuScore, String, TimeWindow] = keyS.window(TumblingEventTimeWindows.of(Time.seconds(5)))
    val avgDS: DataStream[Ws] = winDS.aggregate(new MyAvg2, new MyAvgFunc2)
    val keyS2: KeyedStream[Ws, Long] = avgDS.keyBy(_.end)
    val top3:DataStream[List[Ws]] = keyS2.process(new TopNProcess)
    env.execute()
  }
}
class MyAvg2 extends AggregateFunction[StuScore,(Int,Int),Double] {
  //初始化中间变量
  override def createAccumulator(): (Int, Int) = (0,0)
  //局部运算
  override def add(in: StuScore, acc: (Int, Int)): (Int, Int) = (acc._1+in.score,acc._2+1)
  //合并局部数据
  override def merge(acc: (Int, Int), acc1: (Int, Int)): (Int, Int) = (acc._1+acc1._1,acc._2+acc1._2)
  //最终输出
  override def getResult(acc: (Int, Int)): Double = acc._1/acc._2
}

//[IN, OUT, KEY, W <: Window]
//输入，输出
case class Ws(start:Long,end:Long,stuid:String,avgscore:Double)
class MyAvgFunc2 extends WindowFunction[Double,Ws,String,TimeWindow] {
  override def apply(key: String, window: TimeWindow, input: Iterable[Double], out: Collector[Ws]): Unit = {
    for(t <- input){
      out.collect(Ws(window.getStart,window.getEnd,key,t))
    }

  }
}
//<K, I, O>
class TopNProcess extends KeyedProcessFunction[Long,Ws,List[Ws]]{
  val descriptor = new ListStateDescriptor[Ws](
    "buffered-elements",
    TypeInformation.of(new TypeHint[Ws]() {})
  )

  lazy val liststate: ListState[Ws] = getRuntimeContext.getListState(descriptor)


  override def processElement(i: Ws, context: KeyedProcessFunction[Long, Ws, List[Ws]]#Context, collector: Collector[List[Ws]]): Unit = {
    //添加数据
    liststate.add(i)
    context.timerService.registerEventTimeTimer(i.end)
  }

  override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, Ws, List[Ws]]#OnTimerContext, out: Collector[List[Ws]]): Unit = {
    out.collect(liststate.get().asScala.toList.sortBy(-_.avgscore).take(3))
  }
}

需要注意

1.先求出平均值之后使用的是结果的时间进行keyby

2.然后在process

3.agg的输出记得写一个样例类在process里面使用

4.list的找